correldata

Read/write vectors of correlated data from/to a csv file.

These data are stored in a dictionary, whose values are numpy arrays with elements which may be strings, floats, or floats with associated uncertainties as defined in the uncertainties library.

  1"""
  2Read/write vectors of correlated data from/to a csv file.
  3
  4These data are stored in a dictionary, whose values are numpy arrays
  5with elements which may be strings, floats, or floats with associated uncertainties
  6as defined in the [uncertainties](https://pypi.org/project/uncertainties) library.
  7"""
  8
  9
 10__author__    = 'Mathieu Daëron'
 11__contact__   = 'mathieu@daeron.fr'
 12__copyright__ = 'Copyright (c) 2024 Mathieu Daëron'
 13__license__   = 'MIT License - https://opensource.org/licenses/MIT'
 14__date__      = '2024-11-02'
 15__version__   = '1.6.0'
 16
 17
 18import os as _os
 19import numpy as _np
 20import uncertainties as _uc
 21
 22from typing import Callable, Hashable, Any
 23from uncertainties.unumpy import nominal_values as nv
 24
 25nv = nv
 26"""Alias for [`uncertainties.unumpy.nominal_values()`](https://pythonhosted.org/uncertainties/numpy_guide.html#uncertainties-and-nominal-values)"""
 27
 28class uarray(_np.ndarray):
 29
 30	__doc__ = """
 31	1-D [ndarray](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html)
 32	of [UFloat](https://pythonhosted.org/uncertainties/tech_guide.html) values
 33	"""
 34
 35	def __new__(cls, a):
 36		obj = _np.asarray(a).view(cls)
 37		return obj
 38	
 39	@property
 40	def nv(self):
 41		"""Return the array of nominal values (read-only)."""
 42		return _uc.unumpy.nominal_values(_np.array(self))
 43
 44	@property
 45	def se(self):
 46		"""Return the array of standard errors (read-only)"""
 47		return _uc.unumpy.std_devs(_np.array(self))
 48
 49	@property
 50	def correl(self):
 51		"""Return the correlation matrix of the array elements (read-only)"""
 52		return _np.array(_uc.correlation_matrix(self))
 53
 54	@property
 55	def covar(self):
 56		"""Return the covariance matrix of the array elements (read-only)"""
 57		return _np.array(_uc.covariance_matrix(self))
 58	
 59	@property
 60	def mahalanobis(self):
 61		"""Return the squared Mahalanobis distance from zero of the array (read-only)"""
 62		flatself = self.n.flatten().reshape((1, self.size))
 63		return (flatself @ _np.linalg.inv(self.covar) @ flatself.T)[0,0]
 64	
 65	n = nv
 66	"Alias for `uarray.nv`"
 67	
 68	s = se
 69	"Alias for `uarray.se`"
 70	
 71	cor = correl
 72	"Alias for `uarray.correl`"
 73	
 74	cov = covar
 75	"Alias for `uarray.covar`"
 76	
 77	m = mahalanobis
 78	"Alias for `uarray.mahalanobis`"
 79
 80
 81def is_symmetric_positive_semidefinite(M: _np.ndarray) -> bool:
 82	'''
 83	Test whether 2-D array `M` is symmetric and positive semidefinite.
 84	'''
 85	ev = _np.linalg.eigvals(M)
 86	return (
 87		_np.allclose(M, M.T) # M is symmetric
 88		and _np.all(
 89			(ev > 0) | _np.isclose(ev, 0)
 90		) # all eignevalues are either real and strictly positive or close to zero
 91	)
 92
 93
 94def smart_type(s: str) -> (int | float | str):
 95	'''
 96	Tries to convert string `s` to an `int`, or to an `float` if that fails.
 97	If both fail, return the original string unchanged.
 98	'''
 99	try: return int(s)
100	except: pass
101	try: return float(s)
102	except: pass
103	return s
104
105
106def read_data(data: str, sep: str = ',', validate_covar: bool = True):
107	'''
108	Read correlated data from a CSV-like string.
109	
110	Column names are interpreted in the following way:
111	* In most cases, each columns is converted to a dict value, with the corresponding
112	dict key being the column's label.
113	* Columns whose label starts with `SE` are interpreted as specifying the standard
114	error for the latest preceding data column.
115	* Columns whose label starts with `correl` are interpreted as specifying the
116	correlation matrix for the latest preceding data column. In that case, column labels
117	are ignored for the rest of the columns belonging to this matrix.
118	* Columns whose label starts with `covar` are interpreted as specifying the
119	covariance matrix for the latest preceding data column. In that case, column labels
120	are ignored for the rest of the columns belonging to this matrix.
121	* `SE`, `correl`, and `covar` may be specified for any arbitrary variable other than
122	the latest preceding data column, by adding an underscore followed by the variable's
123	label (ex: `SE_foo`, `correl_bar`, `covar_baz`).
124	* `correl`, and `covar` may also be specified for any pair of variable, by adding an
125	underscore followed by the two variable labels, joined by a second underscore
126	(ex: `correl_foo_bar`, `covar_X_Y`). The elements of the first and second variables
127	correspond, respectively, to the lines and columns of this matrix.
128	* Exceptions will be raised, for any given variable:
129		- when specifying both `covar` and any combination of (`SE`, `correl`)
130		- when specifying `correl` without `SE`
131
132	**Arguments**
133	- `data`: a CSV-like string
134	- `sep`: the CSV separator
135	- `validate_covar`: whether to check that the overall covariance matrix
136	is symmetric and positive semidefinite. Specifying `validate_covar = False`
137	bypasses this computationally expensive step.
138	
139	**Example**
140	```py
141	import correldata
142	data  = """
143	Sample, Tacid,  D47,   SE,         correl,,,  D48, covar,,,          correl_D47_D48
144	   FOO,   90., .245, .005,      1, 0.5, 0.5, .145,  4e-4, 1e-4, 1e-4, 0.5,   0,   0
145	   BAR,   90., .246, .005,    0.5,   1, 0.5, .146,  1e-4, 4e-4, 1e-4,   0, 0.5,   0
146	   BAZ,   90., .247, .005,    0.5, 0.5,   1, .147,  1e-4, 1e-4, 4e-4,   0,   0, 0.5
147	"""[1:-1]
148	print(correldata.read_data(data))
149	
150	# yields:
151	# 
152	# > {
153	#     'Sample': array(['FOO', 'BAR', 'BAZ'], dtype='<U3'),
154	#     'Tacid': array([90., 90., 90.]),
155	#     'D47': uarray([0.245+/-0.004999999999999998, 0.246+/-0.004999999999999997, 0.247+/-0.005], dtype=object),
156	#     'D48': uarray([0.145+/-0.019999999999999993, 0.146+/-0.019999999999999993, 0.147+/-0.019999999999999997], dtype=object)
157	#   }
158	```
159	'''
160
161	data = [[smart_type(e.strip()) for e in l.split(sep)] for l in data.split('\n')]
162	N = len(data) - 1
163
164	values, se, correl, covar = {}, {}, {}, {}
165	j = 0
166	while j < len(data[0]):
167		field = data[0][j]
168		if not (
169			field.startswith('SE_')
170			or field.startswith('correl_')
171			or field.startswith('covar_')
172			or field == 'SE'
173			or field == 'correl'
174			or field == 'covar'
175			or len(field) == 0
176		):
177			values[field] = _np.array([l[j] for l in data[1:]])
178			j += 1
179			oldfield = field
180		elif field.startswith('SE_'):
181			se[field[3:]] = _np.array([l[j] for l in data[1:]])
182			j += 1
183		elif field == 'SE':
184			se[oldfield] = _np.array([l[j] for l in data[1:]])
185			j += 1
186		elif field.startswith('correl_'):
187			correl[field[7:]] = _np.array([l[j:j+N] for l in data[1:]])
188			j += N
189		elif field == 'correl':
190			correl[oldfield] = _np.array([l[j:j+N] for l in data[1:]])
191			j += N
192		elif field.startswith('covar_'):
193			covar[field[6:]] = _np.array([l[j:j+N] for l in data[1:]])
194			j += N
195		elif field == 'covar':
196			covar[oldfield] = _np.array([l[j:j+N] for l in data[1:]])
197			j += N
198
199	nakedvalues = {}
200	for k in [_ for _ in values]:
201		if (
202			k not in se
203			and k not in correl
204			and k not in covar
205		):
206			nakedvalues[k] = values.pop(k)
207
208	for x in values:
209		if x in covar:
210			if x in se:
211				raise KeyError(f'Too much information: both SE and covar are specified for variable "{x}".')
212			if x in correl:
213				raise KeyError(f'Too much information: both correl and covar are specified for variable "{x}".')
214		if x in correl:
215			if x not in se:
216				raise KeyError(f'Not enough information: correl is specified without SE for variable "{x}".')
217
218	for x in correl:
219		if x in values:
220			covar[x] = _np.diag(se[x]) @ correl[x] @ _np.diag(se[x])
221		else:
222			for x1 in values:
223				for x2 in values:
224					if x == f'{x1}_{x2}':
225						if x1 in se:
226							se1 = se[x1]
227						else:
228							if x1 in covar:
229								se1 = _np.diag(covar[x1])**0.5
230							else:
231								raise KeyError(f'Not enough information: correl_{x} is specified without SE for variable "{x1}".')
232						if x2 in se:
233							se2 = se[x2]
234						else:
235							if x2 in covar:
236								se2 = _np.diag(covar[x2])**0.5
237							else:
238								raise KeyError(f'Not enough information: correl_{x} is specified without SE for variable "{x1}".')
239
240						covar[x] = _np.diag(se1) @ correl[x] @ _np.diag(se2)
241
242	for x in se:
243		if x in values and x not in correl:
244			covar[x] = _np.diag(se[x]**2)
245
246	for k in [_ for _ in covar]:
247		if k not in values:
248			for j1 in values:
249				for j2 in values:
250					if k == f'{j1}_{j2}':
251						covar[f'{j2}_{j1}'] = covar[f'{j1}_{j2}'].T
252
253	X = _np.array([_ for k in values for _ in values[k]])
254	CM = _np.zeros((X.size, X.size))
255	for i, vi in enumerate(values):
256		for j, vj in enumerate(values):
257			if vi == vj:
258				if vi in covar:
259					CM[N*i:N*i+N,N*j:N*j+N] = covar[vi]
260			else:
261				if f'{vi}_{vj}' in covar:
262					CM[N*i:N*i+N,N*j:N*j+N] = covar[f'{vi}_{vj}']
263
264	s = _np.diag(CM)**.5
265	s[s==0] = 1.
266	invs = _np.diag(s**-1)
267
268	if (
269		validate_covar
270		and not (
271			is_symmetric_positive_semidefinite(CM)
272			or is_symmetric_positive_semidefinite(invs @ CM @ invs)
273		)
274	):
275		raise _np.linalg.LinAlgError('The complete covariance matrix is not symmetric positive-semidefinite.')
276
277	corvalues = uarray(_uc.correlated_values(X, CM))
278
279	allvalues = nakedvalues
280
281	for i, x in enumerate(values):
282		allvalues[x] = corvalues[i*N:i*N+N]
283
284	return allvalues
285
286
287def read_data_from_file(filename: str | _os.PathLike, **kwargs):
288	'''
289	Read correlated data from a CSV file.
290
291	**Arguments**
292	- `filename`: `str` or path to the file to read from
293	- `kwargs`: passed to correldata.read_data()
294	'''
295	with open(filename) as fid:
296		return read_data(fid.read(), **kwargs)
297
298
299def f2s(
300	x: Any,
301	f: (str | Callable | dict),
302	k: Hashable = None,
303	fb: (str | Callable) = 'z.6g',
304) -> str:
305	'''
306	Format `x` according to format `f`
307	
308	* If `f` is a string, return `f'{x:{f}}'`
309	* If `f` is a callable, return `f(x)`
310	* If `f` is a dict and optional argument `k` is a hashable,
311	  return f2s(x, f[k]), otherwise return f2s(x, fb)
312	'''
313	if isinstance (x, str):
314		return x
315	if isinstance (f, str):
316		return f'{x:{f}}'
317	if isinstance (f, Callable):
318		return f(x)
319	if isinstance (f, dict):
320		if k in f:
321			return f2s(x, f[k])
322		if isinstance (fb, str):
323			return f'{x:{fb}}'
324		if isinstance (fb, Callable):
325			return fb(x)
326	raise TypeError(f'f2s() formatting argument f = {repr(f)} is neither a string nor a dict nor a callable.')
327	
328
329
330def data_string(
331	data: dict,
332	sep: str = ',',
333	include_fields: list = None,
334	exclude_fields: list = [],
335	float_format: (str | dict | Callable) = 'z.6g',
336	correl_format: (str | dict | Callable) = 'z.6f',
337	default_float_format: (str | Callable) = 'z.6g',
338	default_correl_format: (str | Callable) = 'z.6f',
339	show_nv: bool = True,
340	show_se: bool = True,
341	show_correl: bool = True,
342	show_mixed_correl: bool = True,
343	align: str = '>',
344	atol: float = 1e-12,
345	rtol: float = 1e-12,
346):
347	'''
348	Generate CSV-like string from correlated data
349
350	**Arguments**
351	- `data`: dict of arrays with strings, floats or correlated data
352	- `sep`: the CSV separator
353	- `include_fields`: subset of fields to write; if `None`, write all fields
354	- `exclude_fields`: subset of fields to ignore (takes precedence over `include_fields`);
355	  to exclude only the SE for field `foo`, include `SE_foo`; same goes for `correl_foo`
356	- `float_format`: formatting for float values. May be a string (ex: `'z.3f'`), a callable
357	  (ex: `lambda x: '.2f' if x else '0'`), or a dictionary of strings and/or callables, with dict keys
358	  corresponding to different fields (ex: `{'foo': '.2e', 'bar': (lambda x: str(x))}`).
359	- `correl_format`: same as `float_format`, but applies to correlation matrix elements
360	- `default_float_format`: only used when `float_format` is a dict; in that case, fields
361	  missing from `float_format.keys()` will use `default_float_format` instead.
362	  corresponding to different fields (ex: `{'foo': '.2e', 'bar': `lambda x: str(x)`}`).
363	- `default_correl_format`: same as `default_float_format`, but applies to `correl_format`
364	- `show_nv`: show nominal values
365	- `show_se`: show standard errors
366	- `show_correl`: show correlations for any given field (ex: `correl_X`)
367	- `show_mixed_correl`: show correlations between different fields (ex: `correl_X_Y`)
368	- `align`: right-align (`>`), left-align (`<`), or don't align (empty string) CSV values
369	- `atol`: passed to [numpy.allclose()](https://numpy.org/doc/stable/reference/generated/numpy.allclose.html)
370	  when deciding whether a matrix is equal to the identity matrix or to the zero matrix
371	- `rtol`: passed to [numpy.allclose()](https://numpy.org/doc/stable/reference/generated/numpy.allclose.html)
372	  when deciding whether a matrix is equal to the identity matrix or to the zero matrix
373	
374	
375	**Example**
376	
377	```py
378	from correldata import _uc
379	from correldata import _np
380	from correldata import *
381	
382	X = uarray(_uc.correlated_values([1., 2., 3.], _np.eye(3)*0.09))
383	Y = uarray(_uc.correlated_values([4., 5., 6.], _np.eye(3)*0.16))
384	
385	data = dict(X=X, Y=Y, Z=X+Y)
386	
387	print(data_string(data, float_format = 'z.1f', correl_format = 'z.1f'))
388	
389	# yields:
390	# 
391	#   X, SE_X,   Y, SE_Y,   Z, SE_Z, correl_X_Z,    ,    , correl_Y_Z,    ,    
392	# 1.0,  0.3, 4.0,  0.4, 5.0,  0.5,        0.6, 0.0, 0.0,        0.8, 0.0, 0.0
393	# 2.0,  0.3, 5.0,  0.4, 7.0,  0.5,        0.0, 0.6, 0.0,        0.0, 0.8, 0.0
394	# 3.0,  0.3, 6.0,  0.4, 9.0,  0.5,        0.0, 0.0, 0.6,        0.0, 0.0, 0.8
395	```
396	'''
397	if include_fields is None:
398		include_fields = [_ for _ in data]
399	cols, ufields = [], []
400	for f in include_fields:
401		if f in exclude_fields:
402			continue
403		if isinstance(data[f], uarray):
404			ufields.append(f)
405			N = data[f].size
406			if show_nv:
407				cols.append([f] + [f2s(_, float_format, f, default_float_format) for _ in data[f].n])
408			if show_se and (f'SE_{f}' not in exclude_fields):
409				cols.append([f'SE_{f}'] + [f2s(_, float_format, f, default_float_format) for _ in data[f].s])
410			if show_correl and (f'correl_{f}' not in exclude_fields):
411				CM = _uc.correlation_matrix(data[f])
412				if not _np.allclose(CM, _np.eye(N), atol = atol, rtol = rtol):
413					for i in range(N):
414						cols.append(
415							['' if i else f'correl_{f}']
416							+ [
417								f2s(
418									CM[i,j],
419									correl_format,
420									f,
421									default_correl_format,
422								)
423								for j in range(N)
424							]
425						)
426		elif show_nv:
427				cols.append([f] + [f2s(_, float_format, f, default_float_format) for _ in data[f]])
428
429	if show_mixed_correl:
430		for i in range(len(ufields)):
431			for j in range(i):
432				if f'correl_{ufields[i]}_{ufields[j]}' in exclude_fields or f'correl_{ufields[j]}_{ufields[i]}' in exclude_fields:
433					continue
434				CM = _uc.correlation_matrix((*data[ufields[i]], *data[ufields[j]]))[:N, -N:]
435				if not _np.allclose(CM, _np.zeros((N, N)), atol = atol, rtol = rtol):
436					for k in range(N):
437						cols.append(
438							['' if k else f'correl_{ufields[j]}_{ufields[i]}']
439							+ [
440								f2s(
441									CM[k,l],
442									correl_format,
443									f,
444									default_correl_format,
445								)
446								for l in range(N)
447							]
448						)
449
450	lines = list(map(list, zip(*cols)))
451
452	if align:
453		lengths = [max([len(e) for e in l]) for l in cols]
454		for l in lines:
455			for k,ln in enumerate(lengths):
456				l[k] = f'{l[k]:{align}{ln}s}'
457		return '\n'.join([(sep+' ').join(l) for l in lines])
458
459	return '\n'.join([sep.join(l) for l in lines])
460
461
462def save_data_to_file(data, filename, **kwargs):
463	'''
464	aaa
465	
466	Write correlated data to a CSV file.
467
468	**Arguments**
469	- `data`: dict of arrays with strings, floats or correlated data
470	- `filename`: `str` or path to the file to read from
471	- `kwargs`: passed to correldata.data_string()
472	'''
473	with open(filename, 'w') as fid:
474		return fid.write(data_string(data, **kwargs))
475
476
477def as_uarray(
478	X: (uarray | _np.ndarray | _uc.UFloat | float),
479	Xse: (_np.ndarray | float | None) = None,
480	CM: (_np.ndarray | None) = None,
481) -> uarray:
482	"""
483	Convert the input to an uarray. If the input is a single float or
484	[UFloat](https://pythonhosted.org/uncertainties/tech_guide.html),
485	yields an uarray of size 1.
486	
487	**Arguments**
488	* `X`: nominal value(s)
489	* `CM`: covariance matrix of X; not needed if elements of X are of type
490		[`UFloat`](https://pythonhosted.org/uncertainties/tech_guide.html)
491		or if `Xse` is specified.
492	* `Xse`,: SE of X; not needed if elements of X are of type
493		[`UFloat`](https://pythonhosted.org/uncertainties/tech_guide.html)
494		or if `CM` is specified.
495	
496	If neither `CM` nor `Xse` are specified, assume SE = 0.
497	"""
498	
499	if isinstance(X, uarray):
500		return X
501
502	if isinstance(X, _np.ndarray):
503		if _np.all([isinstance(_, _uc.UFloat) for _ in X]):
504			return uarray(X)
505		else:
506			X = X.astype(float)
507			
508			if CM is not None:
509				if Xse is not None: raise ValueError('Too much information: Xse is redundant because CM is already specified.')
510
511			if CM is None:
512				if Xse is None:
513					Xse = X * 0
514
515				CM = _np.diag((*Xse,))**2
516
517			return uarray(_uc.correlated_values(X, CM))
518				
519	if isinstance(X, _uc.UFloat):
520		return uarray([X])
521
522	if isinstance(X, (float, int)):
523
524		if CM is not None:
525			if Xse is not None: raise ValueError('Too much information: Xse is redundant because CM is already specified.')
526			Xse = CM[0,0]**0.5
527
528		return uarray([_uc.ufloat(X, Xse)])
529
530
531def as_pair_of_uarrays(
532	X: (uarray | _np.ndarray | _uc.UFloat | float),
533	Y: (uarray | _np.ndarray | _uc.UFloat | float),
534	Xse: (_np.ndarray | float | None) = None,
535	Yse: (_np.ndarray | float | None) = None,
536	CM: (_np.ndarray | None) = None,
537) -> uarray:
538	"""
539	Convert the input to a pair of uarrays.
540	
541	**Arguments**
542	* `X`: x values
543	* `Y`: y values
544	* `CM`: covariance matrix of `(*X, *Y)`; not needed if elements of X and Y are of type
545		[`uncertainties.UFloat`](https://pythonhosted.org/uncertainties/tech_guide.html)
546		or if (`Xse`, `Yse`) are specified.
547	* `Xse`, `Yse`: SE of X and Y; not needed if elements of X and Y are of type
548		[`uncertainties.UFloat`](https://pythonhosted.org/uncertainties/tech_guide.html)
549		or if `CM` is specified.
550	
551	If neither `CM`, `Xse` nor `Yse` are specified, assume SE = 0.
552	"""
553	
554	if type(X) is not type(Y):
555		raise TypeError(f'X ({type(X)}) and Y ({type(Y)}) must have the same type.')
556
557	if isinstance(X, uarray):
558		return (X, Y)
559
560	if isinstance(X, _np.ndarray):
561		if (
562			_np.all([isinstance(_, _uc.UFloat) for _ in X])
563			and
564			_np.all([isinstance(_, _uc.UFloat) for _ in Y])
565		):
566			return uarray(X), uarray(Y)
567		else:
568			X = X.astype(float)
569			Y = Y.astype(float)
570			
571			if CM is not None:
572				if Xse is not None: raise ValueError('Too much information: Xse is redundant because CM is already specified.')
573				if Yse is not None: raise ValueError('Too much information: Yse is redundant because CM is already specified.')
574
575			if CM is None:
576				if Xse is None:
577					Xse = X * 0
578				if Yse is None:
579					Yse = Y * 0
580
581				CMx = _np.diag((*Xse,))**2
582				CMy = _np.diag((*Yse,))**2			
583				return uarray(_uc.correlated_values(X, CMx)), uarray(_uc.correlated_values(Y, CMy))
584
585			else:
586				XY = uarray(_uc.correlated_values([*X, *Y], CM))
587				return XY[:X.size], XY[X.size:]
588				
589	if isinstance(X, _uc.UFloat):
590		return uarray([X]), uarray([Y])
591
592	if isinstance(X, (float, int)):
593
594		if CM is not None:
595			if Xse is not None: raise ValueError('Too much information: Xse is redundant because CM is already specified.')
596			if Yse is not None: raise ValueError('Too much information: Yse is redundant because CM is already specified.')
597
598		if CM is None:
599			if Xse is None: raise ValueError('Not enough information: specify either CM or Xse.')
600			if Yse is None: raise ValueError('Not enough information: specify either CM or Yse.')				
601
602			CM = _np.diag([Xse, Yse])**2
603
604		XY = uarray(_uc.correlated_values([X, Y], CM))
605		return XY[:1], XY[1:]
def nv(arr):
81def nominal_values(arr):
82    """
83    Return the nominal values of the numbers in NumPy array arr.
84
85    Elements that are not numbers with uncertainties (derived from a
86    class from this module) are passed through untouched (because a
87    numpy.array can contain numbers with uncertainties and pure floats
88    simultaneously).
89
90    If arr is of type unumpy.matrix, the returned array is a
91    numpy.matrix, because the resulting matrix does not contain
92    numbers with uncertainties.
93    """
94
95    return unumpy_to_numpy_matrix(to_nominal_values(arr))
class uarray(numpy.ndarray):
29class uarray(_np.ndarray):
30
31	__doc__ = """
32	1-D [ndarray](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html)
33	of [UFloat](https://pythonhosted.org/uncertainties/tech_guide.html) values
34	"""
35
36	def __new__(cls, a):
37		obj = _np.asarray(a).view(cls)
38		return obj
39	
40	@property
41	def nv(self):
42		"""Return the array of nominal values (read-only)."""
43		return _uc.unumpy.nominal_values(_np.array(self))
44
45	@property
46	def se(self):
47		"""Return the array of standard errors (read-only)"""
48		return _uc.unumpy.std_devs(_np.array(self))
49
50	@property
51	def correl(self):
52		"""Return the correlation matrix of the array elements (read-only)"""
53		return _np.array(_uc.correlation_matrix(self))
54
55	@property
56	def covar(self):
57		"""Return the covariance matrix of the array elements (read-only)"""
58		return _np.array(_uc.covariance_matrix(self))
59	
60	@property
61	def mahalanobis(self):
62		"""Return the squared Mahalanobis distance from zero of the array (read-only)"""
63		flatself = self.n.flatten().reshape((1, self.size))
64		return (flatself @ _np.linalg.inv(self.covar) @ flatself.T)[0,0]
65	
66	n = nv
67	"Alias for `uarray.nv`"
68	
69	s = se
70	"Alias for `uarray.se`"
71	
72	cor = correl
73	"Alias for `uarray.correl`"
74	
75	cov = covar
76	"Alias for `uarray.covar`"
77	
78	m = mahalanobis
79	"Alias for `uarray.mahalanobis`"

1-D ndarray of UFloat values

nv
40	@property
41	def nv(self):
42		"""Return the array of nominal values (read-only)."""
43		return _uc.unumpy.nominal_values(_np.array(self))

Return the array of nominal values (read-only).

se
45	@property
46	def se(self):
47		"""Return the array of standard errors (read-only)"""
48		return _uc.unumpy.std_devs(_np.array(self))

Return the array of standard errors (read-only)

correl
50	@property
51	def correl(self):
52		"""Return the correlation matrix of the array elements (read-only)"""
53		return _np.array(_uc.correlation_matrix(self))

Return the correlation matrix of the array elements (read-only)

covar
55	@property
56	def covar(self):
57		"""Return the covariance matrix of the array elements (read-only)"""
58		return _np.array(_uc.covariance_matrix(self))

Return the covariance matrix of the array elements (read-only)

mahalanobis
60	@property
61	def mahalanobis(self):
62		"""Return the squared Mahalanobis distance from zero of the array (read-only)"""
63		flatself = self.n.flatten().reshape((1, self.size))
64		return (flatself @ _np.linalg.inv(self.covar) @ flatself.T)[0,0]

Return the squared Mahalanobis distance from zero of the array (read-only)

n
40	@property
41	def nv(self):
42		"""Return the array of nominal values (read-only)."""
43		return _uc.unumpy.nominal_values(_np.array(self))

Alias for uarray.nv

s
45	@property
46	def se(self):
47		"""Return the array of standard errors (read-only)"""
48		return _uc.unumpy.std_devs(_np.array(self))

Alias for uarray.se

cor
50	@property
51	def correl(self):
52		"""Return the correlation matrix of the array elements (read-only)"""
53		return _np.array(_uc.correlation_matrix(self))

Alias for uarray.correl

cov
55	@property
56	def covar(self):
57		"""Return the covariance matrix of the array elements (read-only)"""
58		return _np.array(_uc.covariance_matrix(self))

Alias for uarray.covar

m
60	@property
61	def mahalanobis(self):
62		"""Return the squared Mahalanobis distance from zero of the array (read-only)"""
63		flatself = self.n.flatten().reshape((1, self.size))
64		return (flatself @ _np.linalg.inv(self.covar) @ flatself.T)[0,0]
Inherited Members
numpy.ndarray
dumps
dump
all
any
argmax
argmin
argpartition
argsort
astype
byteswap
choose
clip
compress
conj
conjugate
copy
cumprod
cumsum
diagonal
dot
fill
flatten
getfield
item
max
mean
min
nonzero
partition
prod
put
ravel
repeat
reshape
resize
round
searchsorted
setfield
setflags
sort
squeeze
std
sum
swapaxes
take
tobytes
tofile
tolist
tostring
trace
transpose
var
view
to_device
ndim
flags
shape
strides
data
itemsize
size
nbytes
base
dtype
real
imag
flat
ctypes
T
mT
ptp
newbyteorder
itemset
device
def is_symmetric_positive_semidefinite(M: numpy.ndarray) -> bool:
82def is_symmetric_positive_semidefinite(M: _np.ndarray) -> bool:
83	'''
84	Test whether 2-D array `M` is symmetric and positive semidefinite.
85	'''
86	ev = _np.linalg.eigvals(M)
87	return (
88		_np.allclose(M, M.T) # M is symmetric
89		and _np.all(
90			(ev > 0) | _np.isclose(ev, 0)
91		) # all eignevalues are either real and strictly positive or close to zero
92	)

Test whether 2-D array M is symmetric and positive semidefinite.

def smart_type(s: str) -> int | float | str:
 95def smart_type(s: str) -> (int | float | str):
 96	'''
 97	Tries to convert string `s` to an `int`, or to an `float` if that fails.
 98	If both fail, return the original string unchanged.
 99	'''
100	try: return int(s)
101	except: pass
102	try: return float(s)
103	except: pass
104	return s

Tries to convert string s to an int, or to an float if that fails. If both fail, return the original string unchanged.

def read_data(data: str, sep: str = ',', validate_covar: bool = True):
107def read_data(data: str, sep: str = ',', validate_covar: bool = True):
108	'''
109	Read correlated data from a CSV-like string.
110	
111	Column names are interpreted in the following way:
112	* In most cases, each columns is converted to a dict value, with the corresponding
113	dict key being the column's label.
114	* Columns whose label starts with `SE` are interpreted as specifying the standard
115	error for the latest preceding data column.
116	* Columns whose label starts with `correl` are interpreted as specifying the
117	correlation matrix for the latest preceding data column. In that case, column labels
118	are ignored for the rest of the columns belonging to this matrix.
119	* Columns whose label starts with `covar` are interpreted as specifying the
120	covariance matrix for the latest preceding data column. In that case, column labels
121	are ignored for the rest of the columns belonging to this matrix.
122	* `SE`, `correl`, and `covar` may be specified for any arbitrary variable other than
123	the latest preceding data column, by adding an underscore followed by the variable's
124	label (ex: `SE_foo`, `correl_bar`, `covar_baz`).
125	* `correl`, and `covar` may also be specified for any pair of variable, by adding an
126	underscore followed by the two variable labels, joined by a second underscore
127	(ex: `correl_foo_bar`, `covar_X_Y`). The elements of the first and second variables
128	correspond, respectively, to the lines and columns of this matrix.
129	* Exceptions will be raised, for any given variable:
130		- when specifying both `covar` and any combination of (`SE`, `correl`)
131		- when specifying `correl` without `SE`
132
133	**Arguments**
134	- `data`: a CSV-like string
135	- `sep`: the CSV separator
136	- `validate_covar`: whether to check that the overall covariance matrix
137	is symmetric and positive semidefinite. Specifying `validate_covar = False`
138	bypasses this computationally expensive step.
139	
140	**Example**
141	```py
142	import correldata
143	data  = """
144	Sample, Tacid,  D47,   SE,         correl,,,  D48, covar,,,          correl_D47_D48
145	   FOO,   90., .245, .005,      1, 0.5, 0.5, .145,  4e-4, 1e-4, 1e-4, 0.5,   0,   0
146	   BAR,   90., .246, .005,    0.5,   1, 0.5, .146,  1e-4, 4e-4, 1e-4,   0, 0.5,   0
147	   BAZ,   90., .247, .005,    0.5, 0.5,   1, .147,  1e-4, 1e-4, 4e-4,   0,   0, 0.5
148	"""[1:-1]
149	print(correldata.read_data(data))
150	
151	# yields:
152	# 
153	# > {
154	#     'Sample': array(['FOO', 'BAR', 'BAZ'], dtype='<U3'),
155	#     'Tacid': array([90., 90., 90.]),
156	#     'D47': uarray([0.245+/-0.004999999999999998, 0.246+/-0.004999999999999997, 0.247+/-0.005], dtype=object),
157	#     'D48': uarray([0.145+/-0.019999999999999993, 0.146+/-0.019999999999999993, 0.147+/-0.019999999999999997], dtype=object)
158	#   }
159	```
160	'''
161
162	data = [[smart_type(e.strip()) for e in l.split(sep)] for l in data.split('\n')]
163	N = len(data) - 1
164
165	values, se, correl, covar = {}, {}, {}, {}
166	j = 0
167	while j < len(data[0]):
168		field = data[0][j]
169		if not (
170			field.startswith('SE_')
171			or field.startswith('correl_')
172			or field.startswith('covar_')
173			or field == 'SE'
174			or field == 'correl'
175			or field == 'covar'
176			or len(field) == 0
177		):
178			values[field] = _np.array([l[j] for l in data[1:]])
179			j += 1
180			oldfield = field
181		elif field.startswith('SE_'):
182			se[field[3:]] = _np.array([l[j] for l in data[1:]])
183			j += 1
184		elif field == 'SE':
185			se[oldfield] = _np.array([l[j] for l in data[1:]])
186			j += 1
187		elif field.startswith('correl_'):
188			correl[field[7:]] = _np.array([l[j:j+N] for l in data[1:]])
189			j += N
190		elif field == 'correl':
191			correl[oldfield] = _np.array([l[j:j+N] for l in data[1:]])
192			j += N
193		elif field.startswith('covar_'):
194			covar[field[6:]] = _np.array([l[j:j+N] for l in data[1:]])
195			j += N
196		elif field == 'covar':
197			covar[oldfield] = _np.array([l[j:j+N] for l in data[1:]])
198			j += N
199
200	nakedvalues = {}
201	for k in [_ for _ in values]:
202		if (
203			k not in se
204			and k not in correl
205			and k not in covar
206		):
207			nakedvalues[k] = values.pop(k)
208
209	for x in values:
210		if x in covar:
211			if x in se:
212				raise KeyError(f'Too much information: both SE and covar are specified for variable "{x}".')
213			if x in correl:
214				raise KeyError(f'Too much information: both correl and covar are specified for variable "{x}".')
215		if x in correl:
216			if x not in se:
217				raise KeyError(f'Not enough information: correl is specified without SE for variable "{x}".')
218
219	for x in correl:
220		if x in values:
221			covar[x] = _np.diag(se[x]) @ correl[x] @ _np.diag(se[x])
222		else:
223			for x1 in values:
224				for x2 in values:
225					if x == f'{x1}_{x2}':
226						if x1 in se:
227							se1 = se[x1]
228						else:
229							if x1 in covar:
230								se1 = _np.diag(covar[x1])**0.5
231							else:
232								raise KeyError(f'Not enough information: correl_{x} is specified without SE for variable "{x1}".')
233						if x2 in se:
234							se2 = se[x2]
235						else:
236							if x2 in covar:
237								se2 = _np.diag(covar[x2])**0.5
238							else:
239								raise KeyError(f'Not enough information: correl_{x} is specified without SE for variable "{x1}".')
240
241						covar[x] = _np.diag(se1) @ correl[x] @ _np.diag(se2)
242
243	for x in se:
244		if x in values and x not in correl:
245			covar[x] = _np.diag(se[x]**2)
246
247	for k in [_ for _ in covar]:
248		if k not in values:
249			for j1 in values:
250				for j2 in values:
251					if k == f'{j1}_{j2}':
252						covar[f'{j2}_{j1}'] = covar[f'{j1}_{j2}'].T
253
254	X = _np.array([_ for k in values for _ in values[k]])
255	CM = _np.zeros((X.size, X.size))
256	for i, vi in enumerate(values):
257		for j, vj in enumerate(values):
258			if vi == vj:
259				if vi in covar:
260					CM[N*i:N*i+N,N*j:N*j+N] = covar[vi]
261			else:
262				if f'{vi}_{vj}' in covar:
263					CM[N*i:N*i+N,N*j:N*j+N] = covar[f'{vi}_{vj}']
264
265	s = _np.diag(CM)**.5
266	s[s==0] = 1.
267	invs = _np.diag(s**-1)
268
269	if (
270		validate_covar
271		and not (
272			is_symmetric_positive_semidefinite(CM)
273			or is_symmetric_positive_semidefinite(invs @ CM @ invs)
274		)
275	):
276		raise _np.linalg.LinAlgError('The complete covariance matrix is not symmetric positive-semidefinite.')
277
278	corvalues = uarray(_uc.correlated_values(X, CM))
279
280	allvalues = nakedvalues
281
282	for i, x in enumerate(values):
283		allvalues[x] = corvalues[i*N:i*N+N]
284
285	return allvalues

Read correlated data from a CSV-like string.

Column names are interpreted in the following way:

  • In most cases, each columns is converted to a dict value, with the corresponding dict key being the column's label.
  • Columns whose label starts with SE are interpreted as specifying the standard error for the latest preceding data column.
  • Columns whose label starts with correl are interpreted as specifying the correlation matrix for the latest preceding data column. In that case, column labels are ignored for the rest of the columns belonging to this matrix.
  • Columns whose label starts with covar are interpreted as specifying the covariance matrix for the latest preceding data column. In that case, column labels are ignored for the rest of the columns belonging to this matrix.
  • SE, correl, and covar may be specified for any arbitrary variable other than the latest preceding data column, by adding an underscore followed by the variable's label (ex: SE_foo, correl_bar, covar_baz).
  • correl, and covar may also be specified for any pair of variable, by adding an underscore followed by the two variable labels, joined by a second underscore (ex: correl_foo_bar, covar_X_Y). The elements of the first and second variables correspond, respectively, to the lines and columns of this matrix.
  • Exceptions will be raised, for any given variable:
    • when specifying both covar and any combination of (SE, correl)
    • when specifying correl without SE

Arguments

  • data: a CSV-like string
  • sep: the CSV separator
  • validate_covar: whether to check that the overall covariance matrix is symmetric and positive semidefinite. Specifying validate_covar = False bypasses this computationally expensive step.

Example

import correldata
data  = """
Sample, Tacid,  D47,   SE,         correl,,,  D48, covar,,,          correl_D47_D48
   FOO,   90., .245, .005,      1, 0.5, 0.5, .145,  4e-4, 1e-4, 1e-4, 0.5,   0,   0
   BAR,   90., .246, .005,    0.5,   1, 0.5, .146,  1e-4, 4e-4, 1e-4,   0, 0.5,   0
   BAZ,   90., .247, .005,    0.5, 0.5,   1, .147,  1e-4, 1e-4, 4e-4,   0,   0, 0.5
"""[1:-1]
print(read_data(data))

# yields:
# 
# > {
#     'Sample': array(['FOO', 'BAR', 'BAZ'], dtype='<U3'),
#     'Tacid': array([90., 90., 90.]),
#     'D47': uarray([0.245+/-0.004999999999999998, 0.246+/-0.004999999999999997, 0.247+/-0.005], dtype=object),
#     'D48': uarray([0.145+/-0.019999999999999993, 0.146+/-0.019999999999999993, 0.147+/-0.019999999999999997], dtype=object)
#   }
def read_data_from_file(filename: str | os.PathLike, **kwargs):
288def read_data_from_file(filename: str | _os.PathLike, **kwargs):
289	'''
290	Read correlated data from a CSV file.
291
292	**Arguments**
293	- `filename`: `str` or path to the file to read from
294	- `kwargs`: passed to correldata.read_data()
295	'''
296	with open(filename) as fid:
297		return read_data(fid.read(), **kwargs)

Read correlated data from a CSV file.

Arguments

  • filename: str or path to the file to read from
  • kwargs: passed to read_data()
def f2s( x: Any, f: Union[str, Callable, dict], k: Hashable = None, fb: Union[str, Callable] = 'z.6g') -> str:
300def f2s(
301	x: Any,
302	f: (str | Callable | dict),
303	k: Hashable = None,
304	fb: (str | Callable) = 'z.6g',
305) -> str:
306	'''
307	Format `x` according to format `f`
308	
309	* If `f` is a string, return `f'{x:{f}}'`
310	* If `f` is a callable, return `f(x)`
311	* If `f` is a dict and optional argument `k` is a hashable,
312	  return f2s(x, f[k]), otherwise return f2s(x, fb)
313	'''
314	if isinstance (x, str):
315		return x
316	if isinstance (f, str):
317		return f'{x:{f}}'
318	if isinstance (f, Callable):
319		return f(x)
320	if isinstance (f, dict):
321		if k in f:
322			return f2s(x, f[k])
323		if isinstance (fb, str):
324			return f'{x:{fb}}'
325		if isinstance (fb, Callable):
326			return fb(x)
327	raise TypeError(f'f2s() formatting argument f = {repr(f)} is neither a string nor a dict nor a callable.')

Format x according to format f

  • If f is a string, return f'{x:{f}}'
  • If f is a callable, return f(x)
  • If f is a dict and optional argument k is a hashable, return f2s(x, f[k]), otherwise return f2s(x, fb)
def data_string( data: dict, sep: str = ',', include_fields: list = None, exclude_fields: list = [], float_format: Union[str, dict, Callable] = 'z.6g', correl_format: Union[str, dict, Callable] = 'z.6f', default_float_format: Union[str, Callable] = 'z.6g', default_correl_format: Union[str, Callable] = 'z.6f', show_nv: bool = True, show_se: bool = True, show_correl: bool = True, show_mixed_correl: bool = True, align: str = '>', atol: float = 1e-12, rtol: float = 1e-12):
331def data_string(
332	data: dict,
333	sep: str = ',',
334	include_fields: list = None,
335	exclude_fields: list = [],
336	float_format: (str | dict | Callable) = 'z.6g',
337	correl_format: (str | dict | Callable) = 'z.6f',
338	default_float_format: (str | Callable) = 'z.6g',
339	default_correl_format: (str | Callable) = 'z.6f',
340	show_nv: bool = True,
341	show_se: bool = True,
342	show_correl: bool = True,
343	show_mixed_correl: bool = True,
344	align: str = '>',
345	atol: float = 1e-12,
346	rtol: float = 1e-12,
347):
348	'''
349	Generate CSV-like string from correlated data
350
351	**Arguments**
352	- `data`: dict of arrays with strings, floats or correlated data
353	- `sep`: the CSV separator
354	- `include_fields`: subset of fields to write; if `None`, write all fields
355	- `exclude_fields`: subset of fields to ignore (takes precedence over `include_fields`);
356	  to exclude only the SE for field `foo`, include `SE_foo`; same goes for `correl_foo`
357	- `float_format`: formatting for float values. May be a string (ex: `'z.3f'`), a callable
358	  (ex: `lambda x: '.2f' if x else '0'`), or a dictionary of strings and/or callables, with dict keys
359	  corresponding to different fields (ex: `{'foo': '.2e', 'bar': (lambda x: str(x))}`).
360	- `correl_format`: same as `float_format`, but applies to correlation matrix elements
361	- `default_float_format`: only used when `float_format` is a dict; in that case, fields
362	  missing from `float_format.keys()` will use `default_float_format` instead.
363	  corresponding to different fields (ex: `{'foo': '.2e', 'bar': `lambda x: str(x)`}`).
364	- `default_correl_format`: same as `default_float_format`, but applies to `correl_format`
365	- `show_nv`: show nominal values
366	- `show_se`: show standard errors
367	- `show_correl`: show correlations for any given field (ex: `correl_X`)
368	- `show_mixed_correl`: show correlations between different fields (ex: `correl_X_Y`)
369	- `align`: right-align (`>`), left-align (`<`), or don't align (empty string) CSV values
370	- `atol`: passed to [numpy.allclose()](https://numpy.org/doc/stable/reference/generated/numpy.allclose.html)
371	  when deciding whether a matrix is equal to the identity matrix or to the zero matrix
372	- `rtol`: passed to [numpy.allclose()](https://numpy.org/doc/stable/reference/generated/numpy.allclose.html)
373	  when deciding whether a matrix is equal to the identity matrix or to the zero matrix
374	
375	
376	**Example**
377	
378	```py
379	from correldata import _uc
380	from correldata import _np
381	from correldata import *
382	
383	X = uarray(_uc.correlated_values([1., 2., 3.], _np.eye(3)*0.09))
384	Y = uarray(_uc.correlated_values([4., 5., 6.], _np.eye(3)*0.16))
385	
386	data = dict(X=X, Y=Y, Z=X+Y)
387	
388	print(data_string(data, float_format = 'z.1f', correl_format = 'z.1f'))
389	
390	# yields:
391	# 
392	#   X, SE_X,   Y, SE_Y,   Z, SE_Z, correl_X_Z,    ,    , correl_Y_Z,    ,    
393	# 1.0,  0.3, 4.0,  0.4, 5.0,  0.5,        0.6, 0.0, 0.0,        0.8, 0.0, 0.0
394	# 2.0,  0.3, 5.0,  0.4, 7.0,  0.5,        0.0, 0.6, 0.0,        0.0, 0.8, 0.0
395	# 3.0,  0.3, 6.0,  0.4, 9.0,  0.5,        0.0, 0.0, 0.6,        0.0, 0.0, 0.8
396	```
397	'''
398	if include_fields is None:
399		include_fields = [_ for _ in data]
400	cols, ufields = [], []
401	for f in include_fields:
402		if f in exclude_fields:
403			continue
404		if isinstance(data[f], uarray):
405			ufields.append(f)
406			N = data[f].size
407			if show_nv:
408				cols.append([f] + [f2s(_, float_format, f, default_float_format) for _ in data[f].n])
409			if show_se and (f'SE_{f}' not in exclude_fields):
410				cols.append([f'SE_{f}'] + [f2s(_, float_format, f, default_float_format) for _ in data[f].s])
411			if show_correl and (f'correl_{f}' not in exclude_fields):
412				CM = _uc.correlation_matrix(data[f])
413				if not _np.allclose(CM, _np.eye(N), atol = atol, rtol = rtol):
414					for i in range(N):
415						cols.append(
416							['' if i else f'correl_{f}']
417							+ [
418								f2s(
419									CM[i,j],
420									correl_format,
421									f,
422									default_correl_format,
423								)
424								for j in range(N)
425							]
426						)
427		elif show_nv:
428				cols.append([f] + [f2s(_, float_format, f, default_float_format) for _ in data[f]])
429
430	if show_mixed_correl:
431		for i in range(len(ufields)):
432			for j in range(i):
433				if f'correl_{ufields[i]}_{ufields[j]}' in exclude_fields or f'correl_{ufields[j]}_{ufields[i]}' in exclude_fields:
434					continue
435				CM = _uc.correlation_matrix((*data[ufields[i]], *data[ufields[j]]))[:N, -N:]
436				if not _np.allclose(CM, _np.zeros((N, N)), atol = atol, rtol = rtol):
437					for k in range(N):
438						cols.append(
439							['' if k else f'correl_{ufields[j]}_{ufields[i]}']
440							+ [
441								f2s(
442									CM[k,l],
443									correl_format,
444									f,
445									default_correl_format,
446								)
447								for l in range(N)
448							]
449						)
450
451	lines = list(map(list, zip(*cols)))
452
453	if align:
454		lengths = [max([len(e) for e in l]) for l in cols]
455		for l in lines:
456			for k,ln in enumerate(lengths):
457				l[k] = f'{l[k]:{align}{ln}s}'
458		return '\n'.join([(sep+' ').join(l) for l in lines])
459
460	return '\n'.join([sep.join(l) for l in lines])

Generate CSV-like string from correlated data

Arguments

  • data: dict of arrays with strings, floats or correlated data
  • sep: the CSV separator
  • include_fields: subset of fields to write; if None, write all fields
  • exclude_fields: subset of fields to ignore (takes precedence over include_fields); to exclude only the SE for field foo, include SE_foo; same goes for correl_foo
  • float_format: formatting for float values. May be a string (ex: 'z.3f'), a callable (ex: lambda x: '.2f' if x else '0'), or a dictionary of strings and/or callables, with dict keys corresponding to different fields (ex: {'foo': '.2e', 'bar': (lambda x: str(x))}).
  • correl_format: same as float_format, but applies to correlation matrix elements
  • default_float_format: only used when float_format is a dict; in that case, fields missing from float_format.keys() will use default_float_format instead. corresponding to different fields (ex: {'foo': '.2e', 'bar':lambda x: str(x)}).
  • default_correl_format: same as default_float_format, but applies to correl_format
  • show_nv: show nominal values
  • show_se: show standard errors
  • show_correl: show correlations for any given field (ex: correl_X)
  • show_mixed_correl: show correlations between different fields (ex: correl_X_Y)
  • align: right-align (>), left-align (<), or don't align (empty string) CSV values
  • atol: passed to numpy.allclose() when deciding whether a matrix is equal to the identity matrix or to the zero matrix
  • rtol: passed to numpy.allclose() when deciding whether a matrix is equal to the identity matrix or to the zero matrix

Example

from correldata import _uc
from correldata import _np
from correldata import *

X = uarray(_uc.correlated_values([1., 2., 3.], _np.eye(3)*0.09))
Y = uarray(_uc.correlated_values([4., 5., 6.], _np.eye(3)*0.16))

data = dict(X=X, Y=Y, Z=X+Y)

print(data_string(data, float_format = 'z.1f', correl_format = 'z.1f'))

# yields:
# 
#   X, SE_X,   Y, SE_Y,   Z, SE_Z, correl_X_Z,    ,    , correl_Y_Z,    ,    
# 1.0,  0.3, 4.0,  0.4, 5.0,  0.5,        0.6, 0.0, 0.0,        0.8, 0.0, 0.0
# 2.0,  0.3, 5.0,  0.4, 7.0,  0.5,        0.0, 0.6, 0.0,        0.0, 0.8, 0.0
# 3.0,  0.3, 6.0,  0.4, 9.0,  0.5,        0.0, 0.0, 0.6,        0.0, 0.0, 0.8
def save_data_to_file(data, filename, **kwargs):
463def save_data_to_file(data, filename, **kwargs):
464	'''
465	aaa
466	
467	Write correlated data to a CSV file.
468
469	**Arguments**
470	- `data`: dict of arrays with strings, floats or correlated data
471	- `filename`: `str` or path to the file to read from
472	- `kwargs`: passed to correldata.data_string()
473	'''
474	with open(filename, 'w') as fid:
475		return fid.write(data_string(data, **kwargs))

aaa

Write correlated data to a CSV file.

Arguments

  • data: dict of arrays with strings, floats or correlated data
  • filename: str or path to the file to read from
  • kwargs: passed to data_string()
def as_uarray( X: uarray | numpy.ndarray | uncertainties.core.AffineScalarFunc | float, Xse: numpy.ndarray | float | None = None, CM: numpy.ndarray | None = None) -> uarray:
478def as_uarray(
479	X: (uarray | _np.ndarray | _uc.UFloat | float),
480	Xse: (_np.ndarray | float | None) = None,
481	CM: (_np.ndarray | None) = None,
482) -> uarray:
483	"""
484	Convert the input to an uarray. If the input is a single float or
485	[UFloat](https://pythonhosted.org/uncertainties/tech_guide.html),
486	yields an uarray of size 1.
487	
488	**Arguments**
489	* `X`: nominal value(s)
490	* `CM`: covariance matrix of X; not needed if elements of X are of type
491		[`UFloat`](https://pythonhosted.org/uncertainties/tech_guide.html)
492		or if `Xse` is specified.
493	* `Xse`,: SE of X; not needed if elements of X are of type
494		[`UFloat`](https://pythonhosted.org/uncertainties/tech_guide.html)
495		or if `CM` is specified.
496	
497	If neither `CM` nor `Xse` are specified, assume SE = 0.
498	"""
499	
500	if isinstance(X, uarray):
501		return X
502
503	if isinstance(X, _np.ndarray):
504		if _np.all([isinstance(_, _uc.UFloat) for _ in X]):
505			return uarray(X)
506		else:
507			X = X.astype(float)
508			
509			if CM is not None:
510				if Xse is not None: raise ValueError('Too much information: Xse is redundant because CM is already specified.')
511
512			if CM is None:
513				if Xse is None:
514					Xse = X * 0
515
516				CM = _np.diag((*Xse,))**2
517
518			return uarray(_uc.correlated_values(X, CM))
519				
520	if isinstance(X, _uc.UFloat):
521		return uarray([X])
522
523	if isinstance(X, (float, int)):
524
525		if CM is not None:
526			if Xse is not None: raise ValueError('Too much information: Xse is redundant because CM is already specified.')
527			Xse = CM[0,0]**0.5
528
529		return uarray([_uc.ufloat(X, Xse)])

Convert the input to an uarray. If the input is a single float or UFloat, yields an uarray of size 1.

Arguments

  • X: nominal value(s)
  • CM: covariance matrix of X; not needed if elements of X are of type UFloat or if Xse is specified.
  • Xse,: SE of X; not needed if elements of X are of type UFloat or if CM is specified.

If neither CM nor Xse are specified, assume SE = 0.

def as_pair_of_uarrays( X: uarray | numpy.ndarray | uncertainties.core.AffineScalarFunc | float, Y: uarray | numpy.ndarray | uncertainties.core.AffineScalarFunc | float, Xse: numpy.ndarray | float | None = None, Yse: numpy.ndarray | float | None = None, CM: numpy.ndarray | None = None) -> uarray:
532def as_pair_of_uarrays(
533	X: (uarray | _np.ndarray | _uc.UFloat | float),
534	Y: (uarray | _np.ndarray | _uc.UFloat | float),
535	Xse: (_np.ndarray | float | None) = None,
536	Yse: (_np.ndarray | float | None) = None,
537	CM: (_np.ndarray | None) = None,
538) -> uarray:
539	"""
540	Convert the input to a pair of uarrays.
541	
542	**Arguments**
543	* `X`: x values
544	* `Y`: y values
545	* `CM`: covariance matrix of `(*X, *Y)`; not needed if elements of X and Y are of type
546		[`uncertainties.UFloat`](https://pythonhosted.org/uncertainties/tech_guide.html)
547		or if (`Xse`, `Yse`) are specified.
548	* `Xse`, `Yse`: SE of X and Y; not needed if elements of X and Y are of type
549		[`uncertainties.UFloat`](https://pythonhosted.org/uncertainties/tech_guide.html)
550		or if `CM` is specified.
551	
552	If neither `CM`, `Xse` nor `Yse` are specified, assume SE = 0.
553	"""
554	
555	if type(X) is not type(Y):
556		raise TypeError(f'X ({type(X)}) and Y ({type(Y)}) must have the same type.')
557
558	if isinstance(X, uarray):
559		return (X, Y)
560
561	if isinstance(X, _np.ndarray):
562		if (
563			_np.all([isinstance(_, _uc.UFloat) for _ in X])
564			and
565			_np.all([isinstance(_, _uc.UFloat) for _ in Y])
566		):
567			return uarray(X), uarray(Y)
568		else:
569			X = X.astype(float)
570			Y = Y.astype(float)
571			
572			if CM is not None:
573				if Xse is not None: raise ValueError('Too much information: Xse is redundant because CM is already specified.')
574				if Yse is not None: raise ValueError('Too much information: Yse is redundant because CM is already specified.')
575
576			if CM is None:
577				if Xse is None:
578					Xse = X * 0
579				if Yse is None:
580					Yse = Y * 0
581
582				CMx = _np.diag((*Xse,))**2
583				CMy = _np.diag((*Yse,))**2			
584				return uarray(_uc.correlated_values(X, CMx)), uarray(_uc.correlated_values(Y, CMy))
585
586			else:
587				XY = uarray(_uc.correlated_values([*X, *Y], CM))
588				return XY[:X.size], XY[X.size:]
589				
590	if isinstance(X, _uc.UFloat):
591		return uarray([X]), uarray([Y])
592
593	if isinstance(X, (float, int)):
594
595		if CM is not None:
596			if Xse is not None: raise ValueError('Too much information: Xse is redundant because CM is already specified.')
597			if Yse is not None: raise ValueError('Too much information: Yse is redundant because CM is already specified.')
598
599		if CM is None:
600			if Xse is None: raise ValueError('Not enough information: specify either CM or Xse.')
601			if Yse is None: raise ValueError('Not enough information: specify either CM or Yse.')				
602
603			CM = _np.diag([Xse, Yse])**2
604
605		XY = uarray(_uc.correlated_values([X, Y], CM))
606		return XY[:1], XY[1:]

Convert the input to a pair of uarrays.

Arguments

  • X: x values
  • Y: y values
  • CM: covariance matrix of (*X, *Y); not needed if elements of X and Y are of type uncertainties.UFloat or if (Xse, Yse) are specified.
  • Xse, Yse: SE of X and Y; not needed if elements of X and Y are of type uncertainties.UFloat or if CM is specified.

If neither CM, Xse nor Yse are specified, assume SE = 0.