correldata

Read/write vectors of correlated data from/to a csv file.

These data are stored in a dictionary, whose values are numpy arrays with elements which may be strings, floats, or floats with associated uncertainties as defined in the uncertainties library.

  1"""
  2Read/write vectors of correlated data from/to a csv file.
  3
  4These data are stored in a dictionary, whose values are numpy arrays
  5with elements which may be strings, floats, or floats with associated uncertainties
  6as defined in the [uncertainties](https://pypi.org/project/uncertainties) library.
  7"""
  8
  9
 10__author__    = 'Mathieu Daëron'
 11__contact__   = 'mathieu@daeron.fr'
 12__copyright__ = 'Copyright (c) 2024 Mathieu Daëron'
 13__license__   = 'MIT License - https://opensource.org/licenses/MIT'
 14__date__      = '2024-10-08'
 15__version__   = '1.0.2'
 16
 17
 18import os as _os
 19import numpy as _np
 20import uncertainties as _uc
 21
 22
 23class _correl_array(_np.ndarray):
 24
 25    def __new__(cls, a):
 26        obj = _np.asarray(a).view(cls)
 27        return obj
 28    
 29    n = property(fget = _np.vectorize(lambda x : x.n))
 30    s = property(fget = _np.vectorize(lambda x : x.s))
 31
 32
 33def is_symmetric_positive_semidefinite(M: _np.ndarray) -> bool:
 34	'''
 35	Test whether 2-D array `M` is symmetric and positive semidefinite.
 36	'''
 37	return _np.all(_np.linalg.eigvals(M) >= 0) and _np.all(M - M.T == 0)
 38
 39
 40def smart_type(x: str):
 41	'''
 42	Tries to convert string `x` to a float if it includes a decimal point, or
 43	to an integer if it does not. If both attempts fail, return the original
 44	string unchanged.
 45	'''
 46	try:
 47		y = float(x)
 48	except ValueError:
 49		return x
 50	if y % 1 == 0 and '.' not in x:
 51		return int(y)
 52	return y
 53
 54
 55def read_data(data: str, sep: str = ',', validate_covar: bool = True):
 56	'''
 57	Read correlated data from a CSV-like string.
 58	
 59	Column names are interpreted in the following way:
 60	* In most cases, each columns is converted to a dict value, with the corresponding
 61	dict key being the column's label.
 62	* Columns whose label starts with `SE` are interpreted as specifying the standard
 63	error for the latest preceding data column.
 64	* Columns whose label starts with `correl` are interpreted as specifying the
 65	correlation matrix for the latest preceding data column. In that case, column labels
 66	are ignored for the rest of the columns belonging to this matrix.
 67	* Columns whose label starts with `covar` are interpreted as specifying the
 68	covariance matrix for the latest preceding data column. In that case, column labels
 69	are ignored for the rest of the columns belonging to this matrix.
 70	* `SE`, `correl`, and `covar` may be specified for any arbitrary variable other than
 71	the latest preceding data column, by adding an underscore followed by the variable's
 72	label (ex: `SE_foo`, `correl_bar`, `covar_baz`).
 73	* `correl`, and `covar` may also be specified for any pair of variable, by adding an
 74	underscore followed by the two variable labels, joined by a second underscore
 75	(ex: `correl_foo_bar`, `covar_X_Y`). The elements of the first and second variables
 76	correspond, respectively, to the lines and columns of this matrix.
 77	* Exceptions will be raised, for any given variable:
 78		- when specifying both `covar` and any combination of (`SE`, `correl`)
 79		- when specifying `correl` without `SE`
 80
 81	**Arguments**
 82	- `data`: a CSV-like string
 83	- `sep`: the CSV separator
 84	- `validate_covar`: whether to check that the overall covariance matrix
 85	is symmetric and positive semidefinite. Specifying `validate_covar = False`
 86	bypasses this computationally expensive step.
 87	
 88	**Example**
 89	```py
 90	import correldata
 91	data  = """
 92	Sample, Tacid,  D47,   SE,         correl,,,  D48, covar,,,          correl_D47_D48
 93	   FOO,   90., .245, .005,      1, 0.5, 0.5, .145,  4e-4, 1e-4, 1e-4, 0.5,   0,   0
 94	   BAR,   90., .246, .005,    0.5,   1, 0.5, .146,  1e-4, 4e-4, 1e-4,   0, 0.5,   0
 95	   BAZ,   90., .247, .005,    0.5, 0.5,   1, .147,  1e-4, 1e-4, 4e-4,   0,   0, 0.5
 96	"""[1:-1]
 97	print(correldata.read_data(data))
 98	
 99	# yields:
100	# 
101	# > {
102	#     'Sample': array(['FOO', 'BAR', 'BAZ'], dtype='<U3'),
103	#     'Tacid': array([90., 90., 90.]),
104	#     'D47': _correl_array([0.245+/-0.004999999999999998, 0.246+/-0.004999999999999997, 0.247+/-0.005], dtype=object),
105	#     'D48': _correl_array([0.145+/-0.019999999999999993, 0.146+/-0.019999999999999993, 0.147+/-0.019999999999999997], dtype=object)
106	#   }
107	```
108	'''
109
110	data = [[smart_type(e.strip()) for e in l.split(sep)] for l in data.split('\n')]
111	N = len(data) - 1
112
113	values, se, correl, covar = {}, {}, {}, {}
114	j = 0
115	while j < len(data[0]):
116		field = data[0][j]
117		if not (
118			field.startswith('SE_')
119			or field.startswith('correl_')
120			or field.startswith('covar_')
121			or field == 'SE'
122			or field == 'correl'
123			or field == 'covar'
124			or len(field) == 0
125		):
126			values[field] = _np.array([l[j] for l in data[1:]])
127			j += 1
128			oldfield = field
129		elif field.startswith('SE_'):
130			se[field[3:]] = _np.array([l[j] for l in data[1:]])
131			j += 1
132		elif field == 'SE':
133			se[oldfield] = _np.array([l[j] for l in data[1:]])
134			j += 1
135		elif field.startswith('correl_'):
136			correl[field[7:]] = _np.array([l[j:j+N] for l in data[1:]])
137			j += N
138		elif field == 'correl':
139			correl[oldfield] = _np.array([l[j:j+N] for l in data[1:]])
140			j += N
141		elif field.startswith('covar_'):
142			covar[field[6:]] = _np.array([l[j:j+N] for l in data[1:]])
143			j += N
144		elif field == 'covar':
145			covar[oldfield] = _np.array([l[j:j+N] for l in data[1:]])
146			j += N
147
148	nakedvalues = {}
149	for k in [_ for _ in values]:
150		if (
151			k not in se
152			and k not in correl
153			and k not in covar
154		):
155			nakedvalues[k] = values.pop(k)
156
157	for x in values:
158		if x in covar:
159			if x in se:
160				raise KeyError(f'Too much information: both SE and covar are specified for variable "{x}".')
161			if x in correl:
162				raise KeyError(f'Too much information: both correl and covar are specified for variable "{x}".')
163		if x in correl:
164			if x not in se:
165				raise KeyError(f'Not enough information: correl is specified without SE for variable "{x}".')
166
167	for x in correl:
168		if x in values:
169			covar[x] = _np.diag(se[x]) @ correl[x] @ _np.diag(se[x])
170		else:
171			for x1 in values:
172				for x2 in values:
173					if x == f'{x1}_{x2}':
174						if x1 in se:
175							se1 = se[x1]
176						else:
177							if x1 in covar:
178								se1 = _np.diag(covar[x1])**0.5
179							else:
180								raise KeyError(f'Not enough information: correl_{x} is specified without SE for variable "{x1}".')
181						if x2 in se:
182							se2 = se[x2]
183						else:
184							if x2 in covar:
185								se2 = _np.diag(covar[x2])**0.5
186							else:
187								raise KeyError(f'Not enough information: correl_{x} is specified without SE for variable "{x1}".')
188
189						covar[x] = _np.diag(se1) @ correl[x] @ _np.diag(se2)
190
191	for x in se:
192		if x in values and x not in correl:
193			covar[x] = _np.diag(se[x]**2)
194
195	for k in [_ for _ in covar]:
196		if k not in values:
197			for j1 in values:
198				for j2 in values:
199					if k == f'{j1}_{j2}':
200						covar[f'{j2}_{j1}'] = covar[f'{j1}_{j2}'].T
201
202	X = _np.array([_ for k in values for _ in values[k]])
203	CM = _np.zeros((X.size, X.size))
204	for i, vi in enumerate(values):
205		for j, vj in enumerate(values):
206			if vi == vj:
207				if vi in covar:
208					CM[N*i:N*i+N,N*j:N*j+N] = covar[vi]
209			else:
210				if f'{vi}_{vj}' in covar:
211					CM[N*i:N*i+N,N*j:N*j+N] = covar[f'{vi}_{vj}']
212
213	if validate_covar and not is_symmetric_positive_semidefinite(CM):
214		raise _np.linalg.LinAlgError('The complete covariance matrix is not symmetric positive-semidefinite.')
215
216	corvalues = _correl_array(_uc.correlated_values(X, CM))
217
218	allvalues = nakedvalues
219
220	for i, x in enumerate(values):
221		allvalues[x] = corvalues[i*N:i*N+N]
222
223	return allvalues
224
225
226def read_data_from_file(filename: str | _os.PathLike, **kwargs):
227	'''
228	Read correlated data from a CSV file.
229
230	**Arguments**
231	- `filename`: `str` or path to the file to read from
232	- `kwargs`: passed to correldata.read_data()
233	'''
234	with open(filename) as fid:
235		return read_data(fid.read(), **kwargs)
236
237def data_string(
238	data: dict,
239	sep: str = ',',
240	float_fmt: str = 'zg',
241	max_correl_precision: int = 9,
242	fields: list = None,
243	align: str = '>',
244	atol: float = 1e-12,
245	rtol: float = 1e-12,
246):
247	'''
248	Generate CSV-like string from correlated data
249
250	**Arguments**
251	- `data`: dict of arrays with strings, floats or correlated data
252	- `sep`: the CSV separator
253	- `float_fmt`: formatting string for float values
254	- `max_correl_precision`: number of post-decimal digits for correlation values
255	- `fields`: subset of fields to write; if `None`, write all fields
256	- `align`: right-align (`>`), left-align (`<`), or don't align (empty string) CSV values
257	- `atol`: passed to _np.allclose(),
258	- `rtol`: passed to [numpy.allclose()](https://numpy.org/doc/stable/reference/generated/numpy.allclose.html),
259	'''
260	if fields is None:
261		fields = [_ for _ in data]
262	cols, ufields = [], []
263	for f in fields:
264		if isinstance(data[f], _correl_array):
265			ufields.append(f)
266			N = data[f].size
267			cols.append([f] + [f'{_.n:{float_fmt}}' for _ in data[f]])
268			cols.append([f'SE_{f}'] + [f'{_.s:{float_fmt}}' for _ in data[f]])
269			CM = _uc.correlation_matrix(data[f])
270			if not _np.allclose(CM, _np.eye(N), atol = atol, rtol = rtol):
271				for i in range(N):
272					cols.append(['' if i else f'correl_{f}'] + [f'{CM[i,j] if abs(CM[i,j]) > atol else 0:z.{max_correl_precision}f}'.rstrip('0') for j in range(N)])
273
274		else:
275			cols.append([f] + [str(_) for _ in data[f]])
276
277	for i in range(len(ufields)):
278		for j in range(i):
279			CM = _uc.correlation_matrix((*data[ufields[i]], *data[ufields[j]]))[:N,N:]
280			if not _np.allclose(CM, _np.eye(N), atol = atol, rtol = rtol):
281				for k in range(N):
282					cols.append(['' if k else f'correl_{ufields[i]}_{ufields[j]}'] + [f'{CM[k,l] if abs(CM[k,l]) > atol else 0:z.{max_correl_precision}f}'.rstrip('0') for l in range(N)])
283
284	lines = list(map(list, zip(*cols)))
285
286	if align:
287		lengths = [max([len(e) for e in l]) for l in cols]
288		for l in lines:
289			for k,ln in enumerate(lengths):
290				l[k] = f'{l[k]:{align}{ln}s}'
291		return '\n'.join([(sep+' ').join(l) for l in lines])
292
293	return '\n'.join([sep.join(l) for l in lines])
294
295
296
297def save_data_to_file(data, filename, **kwargs):
298	'''
299	Write correlated data to a CSV file.
300
301	**Arguments**
302	- `data`: dict of arrays with strings, floats or correlated data
303	- `filename`: `str` or path to the file to read from
304	- `kwargs`: passed to correldata.data_string()
305	'''
306	with open(filename, 'w') as fid:
307		return fid.write(data_string(data, **kwargs))
def is_symmetric_positive_semidefinite(M: numpy.ndarray) -> bool:
34def is_symmetric_positive_semidefinite(M: _np.ndarray) -> bool:
35	'''
36	Test whether 2-D array `M` is symmetric and positive semidefinite.
37	'''
38	return _np.all(_np.linalg.eigvals(M) >= 0) and _np.all(M - M.T == 0)

Test whether 2-D array M is symmetric and positive semidefinite.

def smart_type(x: str):
41def smart_type(x: str):
42	'''
43	Tries to convert string `x` to a float if it includes a decimal point, or
44	to an integer if it does not. If both attempts fail, return the original
45	string unchanged.
46	'''
47	try:
48		y = float(x)
49	except ValueError:
50		return x
51	if y % 1 == 0 and '.' not in x:
52		return int(y)
53	return y

Tries to convert string x to a float if it includes a decimal point, or to an integer if it does not. If both attempts fail, return the original string unchanged.

def read_data(data: str, sep: str = ',', validate_covar: bool = True):
 56def read_data(data: str, sep: str = ',', validate_covar: bool = True):
 57	'''
 58	Read correlated data from a CSV-like string.
 59	
 60	Column names are interpreted in the following way:
 61	* In most cases, each columns is converted to a dict value, with the corresponding
 62	dict key being the column's label.
 63	* Columns whose label starts with `SE` are interpreted as specifying the standard
 64	error for the latest preceding data column.
 65	* Columns whose label starts with `correl` are interpreted as specifying the
 66	correlation matrix for the latest preceding data column. In that case, column labels
 67	are ignored for the rest of the columns belonging to this matrix.
 68	* Columns whose label starts with `covar` are interpreted as specifying the
 69	covariance matrix for the latest preceding data column. In that case, column labels
 70	are ignored for the rest of the columns belonging to this matrix.
 71	* `SE`, `correl`, and `covar` may be specified for any arbitrary variable other than
 72	the latest preceding data column, by adding an underscore followed by the variable's
 73	label (ex: `SE_foo`, `correl_bar`, `covar_baz`).
 74	* `correl`, and `covar` may also be specified for any pair of variable, by adding an
 75	underscore followed by the two variable labels, joined by a second underscore
 76	(ex: `correl_foo_bar`, `covar_X_Y`). The elements of the first and second variables
 77	correspond, respectively, to the lines and columns of this matrix.
 78	* Exceptions will be raised, for any given variable:
 79		- when specifying both `covar` and any combination of (`SE`, `correl`)
 80		- when specifying `correl` without `SE`
 81
 82	**Arguments**
 83	- `data`: a CSV-like string
 84	- `sep`: the CSV separator
 85	- `validate_covar`: whether to check that the overall covariance matrix
 86	is symmetric and positive semidefinite. Specifying `validate_covar = False`
 87	bypasses this computationally expensive step.
 88	
 89	**Example**
 90	```py
 91	import correldata
 92	data  = """
 93	Sample, Tacid,  D47,   SE,         correl,,,  D48, covar,,,          correl_D47_D48
 94	   FOO,   90., .245, .005,      1, 0.5, 0.5, .145,  4e-4, 1e-4, 1e-4, 0.5,   0,   0
 95	   BAR,   90., .246, .005,    0.5,   1, 0.5, .146,  1e-4, 4e-4, 1e-4,   0, 0.5,   0
 96	   BAZ,   90., .247, .005,    0.5, 0.5,   1, .147,  1e-4, 1e-4, 4e-4,   0,   0, 0.5
 97	"""[1:-1]
 98	print(correldata.read_data(data))
 99	
100	# yields:
101	# 
102	# > {
103	#     'Sample': array(['FOO', 'BAR', 'BAZ'], dtype='<U3'),
104	#     'Tacid': array([90., 90., 90.]),
105	#     'D47': _correl_array([0.245+/-0.004999999999999998, 0.246+/-0.004999999999999997, 0.247+/-0.005], dtype=object),
106	#     'D48': _correl_array([0.145+/-0.019999999999999993, 0.146+/-0.019999999999999993, 0.147+/-0.019999999999999997], dtype=object)
107	#   }
108	```
109	'''
110
111	data = [[smart_type(e.strip()) for e in l.split(sep)] for l in data.split('\n')]
112	N = len(data) - 1
113
114	values, se, correl, covar = {}, {}, {}, {}
115	j = 0
116	while j < len(data[0]):
117		field = data[0][j]
118		if not (
119			field.startswith('SE_')
120			or field.startswith('correl_')
121			or field.startswith('covar_')
122			or field == 'SE'
123			or field == 'correl'
124			or field == 'covar'
125			or len(field) == 0
126		):
127			values[field] = _np.array([l[j] for l in data[1:]])
128			j += 1
129			oldfield = field
130		elif field.startswith('SE_'):
131			se[field[3:]] = _np.array([l[j] for l in data[1:]])
132			j += 1
133		elif field == 'SE':
134			se[oldfield] = _np.array([l[j] for l in data[1:]])
135			j += 1
136		elif field.startswith('correl_'):
137			correl[field[7:]] = _np.array([l[j:j+N] for l in data[1:]])
138			j += N
139		elif field == 'correl':
140			correl[oldfield] = _np.array([l[j:j+N] for l in data[1:]])
141			j += N
142		elif field.startswith('covar_'):
143			covar[field[6:]] = _np.array([l[j:j+N] for l in data[1:]])
144			j += N
145		elif field == 'covar':
146			covar[oldfield] = _np.array([l[j:j+N] for l in data[1:]])
147			j += N
148
149	nakedvalues = {}
150	for k in [_ for _ in values]:
151		if (
152			k not in se
153			and k not in correl
154			and k not in covar
155		):
156			nakedvalues[k] = values.pop(k)
157
158	for x in values:
159		if x in covar:
160			if x in se:
161				raise KeyError(f'Too much information: both SE and covar are specified for variable "{x}".')
162			if x in correl:
163				raise KeyError(f'Too much information: both correl and covar are specified for variable "{x}".')
164		if x in correl:
165			if x not in se:
166				raise KeyError(f'Not enough information: correl is specified without SE for variable "{x}".')
167
168	for x in correl:
169		if x in values:
170			covar[x] = _np.diag(se[x]) @ correl[x] @ _np.diag(se[x])
171		else:
172			for x1 in values:
173				for x2 in values:
174					if x == f'{x1}_{x2}':
175						if x1 in se:
176							se1 = se[x1]
177						else:
178							if x1 in covar:
179								se1 = _np.diag(covar[x1])**0.5
180							else:
181								raise KeyError(f'Not enough information: correl_{x} is specified without SE for variable "{x1}".')
182						if x2 in se:
183							se2 = se[x2]
184						else:
185							if x2 in covar:
186								se2 = _np.diag(covar[x2])**0.5
187							else:
188								raise KeyError(f'Not enough information: correl_{x} is specified without SE for variable "{x1}".')
189
190						covar[x] = _np.diag(se1) @ correl[x] @ _np.diag(se2)
191
192	for x in se:
193		if x in values and x not in correl:
194			covar[x] = _np.diag(se[x]**2)
195
196	for k in [_ for _ in covar]:
197		if k not in values:
198			for j1 in values:
199				for j2 in values:
200					if k == f'{j1}_{j2}':
201						covar[f'{j2}_{j1}'] = covar[f'{j1}_{j2}'].T
202
203	X = _np.array([_ for k in values for _ in values[k]])
204	CM = _np.zeros((X.size, X.size))
205	for i, vi in enumerate(values):
206		for j, vj in enumerate(values):
207			if vi == vj:
208				if vi in covar:
209					CM[N*i:N*i+N,N*j:N*j+N] = covar[vi]
210			else:
211				if f'{vi}_{vj}' in covar:
212					CM[N*i:N*i+N,N*j:N*j+N] = covar[f'{vi}_{vj}']
213
214	if validate_covar and not is_symmetric_positive_semidefinite(CM):
215		raise _np.linalg.LinAlgError('The complete covariance matrix is not symmetric positive-semidefinite.')
216
217	corvalues = _correl_array(_uc.correlated_values(X, CM))
218
219	allvalues = nakedvalues
220
221	for i, x in enumerate(values):
222		allvalues[x] = corvalues[i*N:i*N+N]
223
224	return allvalues

Read correlated data from a CSV-like string.

Column names are interpreted in the following way:

  • In most cases, each columns is converted to a dict value, with the corresponding dict key being the column's label.
  • Columns whose label starts with SE are interpreted as specifying the standard error for the latest preceding data column.
  • Columns whose label starts with correl are interpreted as specifying the correlation matrix for the latest preceding data column. In that case, column labels are ignored for the rest of the columns belonging to this matrix.
  • Columns whose label starts with covar are interpreted as specifying the covariance matrix for the latest preceding data column. In that case, column labels are ignored for the rest of the columns belonging to this matrix.
  • SE, correl, and covar may be specified for any arbitrary variable other than the latest preceding data column, by adding an underscore followed by the variable's label (ex: SE_foo, correl_bar, covar_baz).
  • correl, and covar may also be specified for any pair of variable, by adding an underscore followed by the two variable labels, joined by a second underscore (ex: correl_foo_bar, covar_X_Y). The elements of the first and second variables correspond, respectively, to the lines and columns of this matrix.
  • Exceptions will be raised, for any given variable:
    • when specifying both covar and any combination of (SE, correl)
    • when specifying correl without SE

Arguments

  • data: a CSV-like string
  • sep: the CSV separator
  • validate_covar: whether to check that the overall covariance matrix is symmetric and positive semidefinite. Specifying validate_covar = False bypasses this computationally expensive step.

Example

import correldata
data  = """
Sample, Tacid,  D47,   SE,         correl,,,  D48, covar,,,          correl_D47_D48
   FOO,   90., .245, .005,      1, 0.5, 0.5, .145,  4e-4, 1e-4, 1e-4, 0.5,   0,   0
   BAR,   90., .246, .005,    0.5,   1, 0.5, .146,  1e-4, 4e-4, 1e-4,   0, 0.5,   0
   BAZ,   90., .247, .005,    0.5, 0.5,   1, .147,  1e-4, 1e-4, 4e-4,   0,   0, 0.5
"""[1:-1]
print(read_data(data))

# yields:
# 
# > {
#     'Sample': array(['FOO', 'BAR', 'BAZ'], dtype='<U3'),
#     'Tacid': array([90., 90., 90.]),
#     'D47': _correl_array([0.245+/-0.004999999999999998, 0.246+/-0.004999999999999997, 0.247+/-0.005], dtype=object),
#     'D48': _correl_array([0.145+/-0.019999999999999993, 0.146+/-0.019999999999999993, 0.147+/-0.019999999999999997], dtype=object)
#   }
def read_data_from_file(filename: str | os.PathLike, **kwargs):
227def read_data_from_file(filename: str | _os.PathLike, **kwargs):
228	'''
229	Read correlated data from a CSV file.
230
231	**Arguments**
232	- `filename`: `str` or path to the file to read from
233	- `kwargs`: passed to correldata.read_data()
234	'''
235	with open(filename) as fid:
236		return read_data(fid.read(), **kwargs)

Read correlated data from a CSV file.

Arguments

  • filename: str or path to the file to read from
  • kwargs: passed to read_data()
def data_string( data: dict, sep: str = ',', float_fmt: str = 'zg', max_correl_precision: int = 9, fields: list = None, align: str = '>', atol: float = 1e-12, rtol: float = 1e-12):
238def data_string(
239	data: dict,
240	sep: str = ',',
241	float_fmt: str = 'zg',
242	max_correl_precision: int = 9,
243	fields: list = None,
244	align: str = '>',
245	atol: float = 1e-12,
246	rtol: float = 1e-12,
247):
248	'''
249	Generate CSV-like string from correlated data
250
251	**Arguments**
252	- `data`: dict of arrays with strings, floats or correlated data
253	- `sep`: the CSV separator
254	- `float_fmt`: formatting string for float values
255	- `max_correl_precision`: number of post-decimal digits for correlation values
256	- `fields`: subset of fields to write; if `None`, write all fields
257	- `align`: right-align (`>`), left-align (`<`), or don't align (empty string) CSV values
258	- `atol`: passed to _np.allclose(),
259	- `rtol`: passed to [numpy.allclose()](https://numpy.org/doc/stable/reference/generated/numpy.allclose.html),
260	'''
261	if fields is None:
262		fields = [_ for _ in data]
263	cols, ufields = [], []
264	for f in fields:
265		if isinstance(data[f], _correl_array):
266			ufields.append(f)
267			N = data[f].size
268			cols.append([f] + [f'{_.n:{float_fmt}}' for _ in data[f]])
269			cols.append([f'SE_{f}'] + [f'{_.s:{float_fmt}}' for _ in data[f]])
270			CM = _uc.correlation_matrix(data[f])
271			if not _np.allclose(CM, _np.eye(N), atol = atol, rtol = rtol):
272				for i in range(N):
273					cols.append(['' if i else f'correl_{f}'] + [f'{CM[i,j] if abs(CM[i,j]) > atol else 0:z.{max_correl_precision}f}'.rstrip('0') for j in range(N)])
274
275		else:
276			cols.append([f] + [str(_) for _ in data[f]])
277
278	for i in range(len(ufields)):
279		for j in range(i):
280			CM = _uc.correlation_matrix((*data[ufields[i]], *data[ufields[j]]))[:N,N:]
281			if not _np.allclose(CM, _np.eye(N), atol = atol, rtol = rtol):
282				for k in range(N):
283					cols.append(['' if k else f'correl_{ufields[i]}_{ufields[j]}'] + [f'{CM[k,l] if abs(CM[k,l]) > atol else 0:z.{max_correl_precision}f}'.rstrip('0') for l in range(N)])
284
285	lines = list(map(list, zip(*cols)))
286
287	if align:
288		lengths = [max([len(e) for e in l]) for l in cols]
289		for l in lines:
290			for k,ln in enumerate(lengths):
291				l[k] = f'{l[k]:{align}{ln}s}'
292		return '\n'.join([(sep+' ').join(l) for l in lines])
293
294	return '\n'.join([sep.join(l) for l in lines])

Generate CSV-like string from correlated data

Arguments

  • data: dict of arrays with strings, floats or correlated data
  • sep: the CSV separator
  • float_fmt: formatting string for float values
  • max_correl_precision: number of post-decimal digits for correlation values
  • fields: subset of fields to write; if None, write all fields
  • align: right-align (>), left-align (<), or don't align (empty string) CSV values
  • atol: passed to _np.allclose(),
  • rtol: passed to numpy.allclose(),
def save_data_to_file(data, filename, **kwargs):
298def save_data_to_file(data, filename, **kwargs):
299	'''
300	Write correlated data to a CSV file.
301
302	**Arguments**
303	- `data`: dict of arrays with strings, floats or correlated data
304	- `filename`: `str` or path to the file to read from
305	- `kwargs`: passed to correldata.data_string()
306	'''
307	with open(filename, 'w') as fid:
308		return fid.write(data_string(data, **kwargs))

Write correlated data to a CSV file.

Arguments

  • data: dict of arrays with strings, floats or correlated data
  • filename: str or path to the file to read from
  • kwargs: passed to data_string()