Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/numpy/core/records.py : 13%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Record Arrays
3=============
4Record arrays expose the fields of structured arrays as properties.
6Most commonly, ndarrays contain elements of a single type, e.g. floats,
7integers, bools etc. However, it is possible for elements to be combinations
8of these using structured types, such as::
10 >>> a = np.array([(1, 2.0), (1, 2.0)], dtype=[('x', np.int64), ('y', np.float64)])
11 >>> a
12 array([(1, 2.), (1, 2.)], dtype=[('x', '<i8'), ('y', '<f8')])
14Here, each element consists of two fields: x (and int), and y (a float).
15This is known as a structured array. The different fields are analogous
16to columns in a spread-sheet. The different fields can be accessed as
17one would a dictionary::
19 >>> a['x']
20 array([1, 1])
22 >>> a['y']
23 array([2., 2.])
25Record arrays allow us to access fields as properties::
27 >>> ar = np.rec.array(a)
29 >>> ar.x
30 array([1, 1])
32 >>> ar.y
33 array([2., 2.])
35"""
36import os
37import warnings
38from collections import Counter, OrderedDict
40from . import numeric as sb
41from . import numerictypes as nt
42from numpy.compat import (
43 isfileobj, os_fspath, contextlib_nullcontext
44)
45from numpy.core.overrides import set_module
46from .arrayprint import get_printoptions
48# All of the functions allow formats to be a dtype
49__all__ = ['record', 'recarray', 'format_parser']
52ndarray = sb.ndarray
54_byteorderconv = {'b':'>',
55 'l':'<',
56 'n':'=',
57 'B':'>',
58 'L':'<',
59 'N':'=',
60 'S':'s',
61 's':'s',
62 '>':'>',
63 '<':'<',
64 '=':'=',
65 '|':'|',
66 'I':'|',
67 'i':'|'}
69# formats regular expression
70# allows multidimension spec with a tuple syntax in front
71# of the letter code '(2,3)f4' and ' ( 2 , 3 ) f4 '
72# are equally allowed
74numfmt = nt.typeDict
76# taken from OrderedDict recipes in the Python documentation
77# https://docs.python.org/3.3/library/collections.html#ordereddict-examples-and-recipes
78class _OrderedCounter(Counter, OrderedDict):
79 """Counter that remembers the order elements are first encountered"""
81 def __repr__(self):
82 return '%s(%r)' % (self.__class__.__name__, OrderedDict(self))
84 def __reduce__(self):
85 return self.__class__, (OrderedDict(self),)
88def find_duplicate(list):
89 """Find duplication in a list, return a list of duplicated elements"""
90 return [
91 item
92 for item, counts in _OrderedCounter(list).items()
93 if counts > 1
94 ]
97@set_module('numpy')
98class format_parser:
99 """
100 Class to convert formats, names, titles description to a dtype.
102 After constructing the format_parser object, the dtype attribute is
103 the converted data-type:
104 ``dtype = format_parser(formats, names, titles).dtype``
106 Attributes
107 ----------
108 dtype : dtype
109 The converted data-type.
111 Parameters
112 ----------
113 formats : str or list of str
114 The format description, either specified as a string with
115 comma-separated format descriptions in the form ``'f8, i4, a5'``, or
116 a list of format description strings in the form
117 ``['f8', 'i4', 'a5']``.
118 names : str or list/tuple of str
119 The field names, either specified as a comma-separated string in the
120 form ``'col1, col2, col3'``, or as a list or tuple of strings in the
121 form ``['col1', 'col2', 'col3']``.
122 An empty list can be used, in that case default field names
123 ('f0', 'f1', ...) are used.
124 titles : sequence
125 Sequence of title strings. An empty list can be used to leave titles
126 out.
127 aligned : bool, optional
128 If True, align the fields by padding as the C-compiler would.
129 Default is False.
130 byteorder : str, optional
131 If specified, all the fields will be changed to the
132 provided byte-order. Otherwise, the default byte-order is
133 used. For all available string specifiers, see `dtype.newbyteorder`.
135 See Also
136 --------
137 dtype, typename, sctype2char
139 Examples
140 --------
141 >>> np.format_parser(['<f8', '<i4', '<a5'], ['col1', 'col2', 'col3'],
142 ... ['T1', 'T2', 'T3']).dtype
143 dtype([(('T1', 'col1'), '<f8'), (('T2', 'col2'), '<i4'), (('T3', 'col3'), 'S5')])
145 `names` and/or `titles` can be empty lists. If `titles` is an empty list,
146 titles will simply not appear. If `names` is empty, default field names
147 will be used.
149 >>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
150 ... []).dtype
151 dtype([('col1', '<f8'), ('col2', '<i4'), ('col3', '<S5')])
152 >>> np.format_parser(['<f8', '<i4', '<a5'], [], []).dtype
153 dtype([('f0', '<f8'), ('f1', '<i4'), ('f2', 'S5')])
155 """
157 def __init__(self, formats, names, titles, aligned=False, byteorder=None):
158 self._parseFormats(formats, aligned)
159 self._setfieldnames(names, titles)
160 self._createdtype(byteorder)
162 def _parseFormats(self, formats, aligned=False):
163 """ Parse the field formats """
165 if formats is None:
166 raise ValueError("Need formats argument")
167 if isinstance(formats, list):
168 dtype = sb.dtype(
169 [('f{}'.format(i), format_) for i, format_ in enumerate(formats)],
170 aligned,
171 )
172 else:
173 dtype = sb.dtype(formats, aligned)
174 fields = dtype.fields
175 if fields is None:
176 dtype = sb.dtype([('f1', dtype)], aligned)
177 fields = dtype.fields
178 keys = dtype.names
179 self._f_formats = [fields[key][0] for key in keys]
180 self._offsets = [fields[key][1] for key in keys]
181 self._nfields = len(keys)
183 def _setfieldnames(self, names, titles):
184 """convert input field names into a list and assign to the _names
185 attribute """
187 if names:
188 if type(names) in [list, tuple]:
189 pass
190 elif isinstance(names, str):
191 names = names.split(',')
192 else:
193 raise NameError("illegal input names %s" % repr(names))
195 self._names = [n.strip() for n in names[:self._nfields]]
196 else:
197 self._names = []
199 # if the names are not specified, they will be assigned as
200 # "f0, f1, f2,..."
201 # if not enough names are specified, they will be assigned as "f[n],
202 # f[n+1],..." etc. where n is the number of specified names..."
203 self._names += ['f%d' % i for i in range(len(self._names),
204 self._nfields)]
205 # check for redundant names
206 _dup = find_duplicate(self._names)
207 if _dup:
208 raise ValueError("Duplicate field names: %s" % _dup)
210 if titles:
211 self._titles = [n.strip() for n in titles[:self._nfields]]
212 else:
213 self._titles = []
214 titles = []
216 if self._nfields > len(titles):
217 self._titles += [None] * (self._nfields - len(titles))
219 def _createdtype(self, byteorder):
220 dtype = sb.dtype({
221 'names': self._names,
222 'formats': self._f_formats,
223 'offsets': self._offsets,
224 'titles': self._titles,
225 })
226 if byteorder is not None:
227 byteorder = _byteorderconv[byteorder[0]]
228 dtype = dtype.newbyteorder(byteorder)
230 self.dtype = dtype
233class record(nt.void):
234 """A data-type scalar that allows field access as attribute lookup.
235 """
237 # manually set name and module so that this class's type shows up
238 # as numpy.record when printed
239 __name__ = 'record'
240 __module__ = 'numpy'
242 def __repr__(self):
243 if get_printoptions()['legacy'] == '1.13':
244 return self.__str__()
245 return super(record, self).__repr__()
247 def __str__(self):
248 if get_printoptions()['legacy'] == '1.13':
249 return str(self.item())
250 return super(record, self).__str__()
252 def __getattribute__(self, attr):
253 if attr in ('setfield', 'getfield', 'dtype'):
254 return nt.void.__getattribute__(self, attr)
255 try:
256 return nt.void.__getattribute__(self, attr)
257 except AttributeError:
258 pass
259 fielddict = nt.void.__getattribute__(self, 'dtype').fields
260 res = fielddict.get(attr, None)
261 if res:
262 obj = self.getfield(*res[:2])
263 # if it has fields return a record,
264 # otherwise return the object
265 try:
266 dt = obj.dtype
267 except AttributeError:
268 #happens if field is Object type
269 return obj
270 if dt.names is not None:
271 return obj.view((self.__class__, obj.dtype))
272 return obj
273 else:
274 raise AttributeError("'record' object has no "
275 "attribute '%s'" % attr)
277 def __setattr__(self, attr, val):
278 if attr in ('setfield', 'getfield', 'dtype'):
279 raise AttributeError("Cannot set '%s' attribute" % attr)
280 fielddict = nt.void.__getattribute__(self, 'dtype').fields
281 res = fielddict.get(attr, None)
282 if res:
283 return self.setfield(val, *res[:2])
284 else:
285 if getattr(self, attr, None):
286 return nt.void.__setattr__(self, attr, val)
287 else:
288 raise AttributeError("'record' object has no "
289 "attribute '%s'" % attr)
291 def __getitem__(self, indx):
292 obj = nt.void.__getitem__(self, indx)
294 # copy behavior of record.__getattribute__,
295 if isinstance(obj, nt.void) and obj.dtype.names is not None:
296 return obj.view((self.__class__, obj.dtype))
297 else:
298 # return a single element
299 return obj
301 def pprint(self):
302 """Pretty-print all fields."""
303 # pretty-print all fields
304 names = self.dtype.names
305 maxlen = max(len(name) for name in names)
306 fmt = '%% %ds: %%s' % maxlen
307 rows = [fmt % (name, getattr(self, name)) for name in names]
308 return "\n".join(rows)
310# The recarray is almost identical to a standard array (which supports
311# named fields already) The biggest difference is that it can use
312# attribute-lookup to find the fields and it is constructed using
313# a record.
315# If byteorder is given it forces a particular byteorder on all
316# the fields (and any subfields)
318class recarray(ndarray):
319 """Construct an ndarray that allows field access using attributes.
321 Arrays may have a data-types containing fields, analogous
322 to columns in a spread sheet. An example is ``[(x, int), (y, float)]``,
323 where each entry in the array is a pair of ``(int, float)``. Normally,
324 these attributes are accessed using dictionary lookups such as ``arr['x']``
325 and ``arr['y']``. Record arrays allow the fields to be accessed as members
326 of the array, using ``arr.x`` and ``arr.y``.
328 Parameters
329 ----------
330 shape : tuple
331 Shape of output array.
332 dtype : data-type, optional
333 The desired data-type. By default, the data-type is determined
334 from `formats`, `names`, `titles`, `aligned` and `byteorder`.
335 formats : list of data-types, optional
336 A list containing the data-types for the different columns, e.g.
337 ``['i4', 'f8', 'i4']``. `formats` does *not* support the new
338 convention of using types directly, i.e. ``(int, float, int)``.
339 Note that `formats` must be a list, not a tuple.
340 Given that `formats` is somewhat limited, we recommend specifying
341 `dtype` instead.
342 names : tuple of str, optional
343 The name of each column, e.g. ``('x', 'y', 'z')``.
344 buf : buffer, optional
345 By default, a new array is created of the given shape and data-type.
346 If `buf` is specified and is an object exposing the buffer interface,
347 the array will use the memory from the existing buffer. In this case,
348 the `offset` and `strides` keywords are available.
350 Other Parameters
351 ----------------
352 titles : tuple of str, optional
353 Aliases for column names. For example, if `names` were
354 ``('x', 'y', 'z')`` and `titles` is
355 ``('x_coordinate', 'y_coordinate', 'z_coordinate')``, then
356 ``arr['x']`` is equivalent to both ``arr.x`` and ``arr.x_coordinate``.
357 byteorder : {'<', '>', '='}, optional
358 Byte-order for all fields.
359 aligned : bool, optional
360 Align the fields in memory as the C-compiler would.
361 strides : tuple of ints, optional
362 Buffer (`buf`) is interpreted according to these strides (strides
363 define how many bytes each array element, row, column, etc.
364 occupy in memory).
365 offset : int, optional
366 Start reading buffer (`buf`) from this offset onwards.
367 order : {'C', 'F'}, optional
368 Row-major (C-style) or column-major (Fortran-style) order.
370 Returns
371 -------
372 rec : recarray
373 Empty array of the given shape and type.
375 See Also
376 --------
377 rec.fromrecords : Construct a record array from data.
378 record : fundamental data-type for `recarray`.
379 format_parser : determine a data-type from formats, names, titles.
381 Notes
382 -----
383 This constructor can be compared to ``empty``: it creates a new record
384 array but does not fill it with data. To create a record array from data,
385 use one of the following methods:
387 1. Create a standard ndarray and convert it to a record array,
388 using ``arr.view(np.recarray)``
389 2. Use the `buf` keyword.
390 3. Use `np.rec.fromrecords`.
392 Examples
393 --------
394 Create an array with two fields, ``x`` and ``y``:
396 >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', '<f8'), ('y', '<i8')])
397 >>> x
398 array([(1., 2), (3., 4)], dtype=[('x', '<f8'), ('y', '<i8')])
400 >>> x['x']
401 array([1., 3.])
403 View the array as a record array:
405 >>> x = x.view(np.recarray)
407 >>> x.x
408 array([1., 3.])
410 >>> x.y
411 array([2, 4])
413 Create a new, empty record array:
415 >>> np.recarray((2,),
416 ... dtype=[('x', int), ('y', float), ('z', int)]) #doctest: +SKIP
417 rec.array([(-1073741821, 1.2249118382103472e-301, 24547520),
418 (3471280, 1.2134086255804012e-316, 0)],
419 dtype=[('x', '<i4'), ('y', '<f8'), ('z', '<i4')])
421 """
423 # manually set name and module so that this class's type shows
424 # up as "numpy.recarray" when printed
425 __name__ = 'recarray'
426 __module__ = 'numpy'
428 def __new__(subtype, shape, dtype=None, buf=None, offset=0, strides=None,
429 formats=None, names=None, titles=None,
430 byteorder=None, aligned=False, order='C'):
432 if dtype is not None:
433 descr = sb.dtype(dtype)
434 else:
435 descr = format_parser(formats, names, titles, aligned, byteorder).dtype
437 if buf is None:
438 self = ndarray.__new__(subtype, shape, (record, descr), order=order)
439 else:
440 self = ndarray.__new__(subtype, shape, (record, descr),
441 buffer=buf, offset=offset,
442 strides=strides, order=order)
443 return self
445 def __array_finalize__(self, obj):
446 if self.dtype.type is not record and self.dtype.names is not None:
447 # if self.dtype is not np.record, invoke __setattr__ which will
448 # convert it to a record if it is a void dtype.
449 self.dtype = self.dtype
451 def __getattribute__(self, attr):
452 # See if ndarray has this attr, and return it if so. (note that this
453 # means a field with the same name as an ndarray attr cannot be
454 # accessed by attribute).
455 try:
456 return object.__getattribute__(self, attr)
457 except AttributeError: # attr must be a fieldname
458 pass
460 # look for a field with this name
461 fielddict = ndarray.__getattribute__(self, 'dtype').fields
462 try:
463 res = fielddict[attr][:2]
464 except (TypeError, KeyError):
465 raise AttributeError("recarray has no attribute %s" % attr)
466 obj = self.getfield(*res)
468 # At this point obj will always be a recarray, since (see
469 # PyArray_GetField) the type of obj is inherited. Next, if obj.dtype is
470 # non-structured, convert it to an ndarray. Then if obj is structured
471 # with void type convert it to the same dtype.type (eg to preserve
472 # numpy.record type if present), since nested structured fields do not
473 # inherit type. Don't do this for non-void structures though.
474 if obj.dtype.names is not None:
475 if issubclass(obj.dtype.type, nt.void):
476 return obj.view(dtype=(self.dtype.type, obj.dtype))
477 return obj
478 else:
479 return obj.view(ndarray)
481 # Save the dictionary.
482 # If the attr is a field name and not in the saved dictionary
483 # Undo any "setting" of the attribute and do a setfield
484 # Thus, you can't create attributes on-the-fly that are field names.
485 def __setattr__(self, attr, val):
487 # Automatically convert (void) structured types to records
488 # (but not non-void structures, subarrays, or non-structured voids)
489 if attr == 'dtype' and issubclass(val.type, nt.void) and val.names is not None:
490 val = sb.dtype((record, val))
492 newattr = attr not in self.__dict__
493 try:
494 ret = object.__setattr__(self, attr, val)
495 except Exception:
496 fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
497 if attr not in fielddict:
498 raise
499 else:
500 fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
501 if attr not in fielddict:
502 return ret
503 if newattr:
504 # We just added this one or this setattr worked on an
505 # internal attribute.
506 try:
507 object.__delattr__(self, attr)
508 except Exception:
509 return ret
510 try:
511 res = fielddict[attr][:2]
512 except (TypeError, KeyError):
513 raise AttributeError("record array has no attribute %s" % attr)
514 return self.setfield(val, *res)
516 def __getitem__(self, indx):
517 obj = super(recarray, self).__getitem__(indx)
519 # copy behavior of getattr, except that here
520 # we might also be returning a single element
521 if isinstance(obj, ndarray):
522 if obj.dtype.names is not None:
523 obj = obj.view(type(self))
524 if issubclass(obj.dtype.type, nt.void):
525 return obj.view(dtype=(self.dtype.type, obj.dtype))
526 return obj
527 else:
528 return obj.view(type=ndarray)
529 else:
530 # return a single element
531 return obj
533 def __repr__(self):
535 repr_dtype = self.dtype
536 if self.dtype.type is record or not issubclass(self.dtype.type, nt.void):
537 # If this is a full record array (has numpy.record dtype),
538 # or if it has a scalar (non-void) dtype with no records,
539 # represent it using the rec.array function. Since rec.array
540 # converts dtype to a numpy.record for us, convert back
541 # to non-record before printing
542 if repr_dtype.type is record:
543 repr_dtype = sb.dtype((nt.void, repr_dtype))
544 prefix = "rec.array("
545 fmt = 'rec.array(%s,%sdtype=%s)'
546 else:
547 # otherwise represent it using np.array plus a view
548 # This should only happen if the user is playing
549 # strange games with dtypes.
550 prefix = "array("
551 fmt = 'array(%s,%sdtype=%s).view(numpy.recarray)'
553 # get data/shape string. logic taken from numeric.array_repr
554 if self.size > 0 or self.shape == (0,):
555 lst = sb.array2string(
556 self, separator=', ', prefix=prefix, suffix=',')
557 else:
558 # show zero-length shape unless it is (0,)
559 lst = "[], shape=%s" % (repr(self.shape),)
561 lf = '\n'+' '*len(prefix)
562 if get_printoptions()['legacy'] == '1.13':
563 lf = ' ' + lf # trailing space
564 return fmt % (lst, lf, repr_dtype)
566 def field(self, attr, val=None):
567 if isinstance(attr, int):
568 names = ndarray.__getattribute__(self, 'dtype').names
569 attr = names[attr]
571 fielddict = ndarray.__getattribute__(self, 'dtype').fields
573 res = fielddict[attr][:2]
575 if val is None:
576 obj = self.getfield(*res)
577 if obj.dtype.names is not None:
578 return obj
579 return obj.view(ndarray)
580 else:
581 return self.setfield(val, *res)
584def _deprecate_shape_0_as_None(shape):
585 if shape == 0:
586 warnings.warn(
587 "Passing `shape=0` to have the shape be inferred is deprecated, "
588 "and in future will be equivalent to `shape=(0,)`. To infer "
589 "the shape and suppress this warning, pass `shape=None` instead.",
590 FutureWarning, stacklevel=3)
591 return None
592 else:
593 return shape
596def fromarrays(arrayList, dtype=None, shape=None, formats=None,
597 names=None, titles=None, aligned=False, byteorder=None):
598 """Create a record array from a (flat) list of arrays
600 Parameters
601 ----------
602 arrayList : list or tuple
603 List of array-like objects (such as lists, tuples,
604 and ndarrays).
605 dtype : data-type, optional
606 valid dtype for all arrays
607 shape : int or tuple of ints, optional
608 Shape of the resulting array. If not provided, inferred from
609 ``arrayList[0]``.
610 formats, names, titles, aligned, byteorder :
611 If `dtype` is ``None``, these arguments are passed to
612 `numpy.format_parser` to construct a dtype. See that function for
613 detailed documentation.
615 Returns
616 -------
617 np.recarray
618 Record array consisting of given arrayList columns.
620 Examples
621 --------
622 >>> x1=np.array([1,2,3,4])
623 >>> x2=np.array(['a','dd','xyz','12'])
624 >>> x3=np.array([1.1,2,3,4])
625 >>> r = np.core.records.fromarrays([x1,x2,x3],names='a,b,c')
626 >>> print(r[1])
627 (2, 'dd', 2.0) # may vary
628 >>> x1[1]=34
629 >>> r.a
630 array([1, 2, 3, 4])
632 >>> x1 = np.array([1, 2, 3, 4])
633 >>> x2 = np.array(['a', 'dd', 'xyz', '12'])
634 >>> x3 = np.array([1.1, 2, 3,4])
635 >>> r = np.core.records.fromarrays(
636 ... [x1, x2, x3],
637 ... dtype=np.dtype([('a', np.int32), ('b', 'S3'), ('c', np.float32)]))
638 >>> r
639 rec.array([(1, b'a', 1.1), (2, b'dd', 2. ), (3, b'xyz', 3. ),
640 (4, b'12', 4. )],
641 dtype=[('a', '<i4'), ('b', 'S3'), ('c', '<f4')])
642 """
644 arrayList = [sb.asarray(x) for x in arrayList]
646 # NumPy 1.19.0, 2020-01-01
647 shape = _deprecate_shape_0_as_None(shape)
649 if shape is None:
650 shape = arrayList[0].shape
651 elif isinstance(shape, int):
652 shape = (shape,)
654 if formats is None and dtype is None:
655 # go through each object in the list to see if it is an ndarray
656 # and determine the formats.
657 formats = [obj.dtype for obj in arrayList]
659 if dtype is not None:
660 descr = sb.dtype(dtype)
661 else:
662 descr = format_parser(formats, names, titles, aligned, byteorder).dtype
663 _names = descr.names
665 # Determine shape from data-type.
666 if len(descr) != len(arrayList):
667 raise ValueError("mismatch between the number of fields "
668 "and the number of arrays")
670 d0 = descr[0].shape
671 nn = len(d0)
672 if nn > 0:
673 shape = shape[:-nn]
675 for k, obj in enumerate(arrayList):
676 nn = descr[k].ndim
677 testshape = obj.shape[:obj.ndim - nn]
678 if testshape != shape:
679 raise ValueError("array-shape mismatch in array %d" % k)
681 _array = recarray(shape, descr)
683 # populate the record array (makes a copy)
684 for i in range(len(arrayList)):
685 _array[_names[i]] = arrayList[i]
687 return _array
689def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
690 titles=None, aligned=False, byteorder=None):
691 """Create a recarray from a list of records in text form.
693 Parameters
694 ----------
695 recList : sequence
696 data in the same field may be heterogeneous - they will be promoted
697 to the highest data type.
698 dtype : data-type, optional
699 valid dtype for all arrays
700 shape : int or tuple of ints, optional
701 shape of each array.
702 formats, names, titles, aligned, byteorder :
703 If `dtype` is ``None``, these arguments are passed to
704 `numpy.format_parser` to construct a dtype. See that function for
705 detailed documentation.
707 If both `formats` and `dtype` are None, then this will auto-detect
708 formats. Use list of tuples rather than list of lists for faster
709 processing.
711 Returns
712 -------
713 np.recarray
714 record array consisting of given recList rows.
716 Examples
717 --------
718 >>> r=np.core.records.fromrecords([(456,'dbe',1.2),(2,'de',1.3)],
719 ... names='col1,col2,col3')
720 >>> print(r[0])
721 (456, 'dbe', 1.2)
722 >>> r.col1
723 array([456, 2])
724 >>> r.col2
725 array(['dbe', 'de'], dtype='<U3')
726 >>> import pickle
727 >>> pickle.loads(pickle.dumps(r))
728 rec.array([(456, 'dbe', 1.2), ( 2, 'de', 1.3)],
729 dtype=[('col1', '<i8'), ('col2', '<U3'), ('col3', '<f8')])
730 """
732 if formats is None and dtype is None: # slower
733 obj = sb.array(recList, dtype=object)
734 arrlist = [sb.array(obj[..., i].tolist()) for i in range(obj.shape[-1])]
735 return fromarrays(arrlist, formats=formats, shape=shape, names=names,
736 titles=titles, aligned=aligned, byteorder=byteorder)
738 if dtype is not None:
739 descr = sb.dtype((record, dtype))
740 else:
741 descr = format_parser(formats, names, titles, aligned, byteorder).dtype
743 try:
744 retval = sb.array(recList, dtype=descr)
745 except (TypeError, ValueError):
746 # NumPy 1.19.0, 2020-01-01
747 shape = _deprecate_shape_0_as_None(shape)
748 if shape is None:
749 shape = len(recList)
750 if isinstance(shape, int):
751 shape = (shape,)
752 if len(shape) > 1:
753 raise ValueError("Can only deal with 1-d array.")
754 _array = recarray(shape, descr)
755 for k in range(_array.size):
756 _array[k] = tuple(recList[k])
757 # list of lists instead of list of tuples ?
758 # 2018-02-07, 1.14.1
759 warnings.warn(
760 "fromrecords expected a list of tuples, may have received a list "
761 "of lists instead. In the future that will raise an error",
762 FutureWarning, stacklevel=2)
763 return _array
764 else:
765 if shape is not None and retval.shape != shape:
766 retval.shape = shape
768 res = retval.view(recarray)
770 return res
773def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None,
774 names=None, titles=None, aligned=False, byteorder=None):
775 r"""Create a record array from binary data
777 Note that despite the name of this function it does not accept `str`
778 instances.
780 Parameters
781 ----------
782 datastring : bytes-like
783 Buffer of binary data
784 dtype : data-type, optional
785 Valid dtype for all arrays
786 shape : int or tuple of ints, optional
787 Shape of each array.
788 offset : int, optional
789 Position in the buffer to start reading from.
790 formats, names, titles, aligned, byteorder :
791 If `dtype` is ``None``, these arguments are passed to
792 `numpy.format_parser` to construct a dtype. See that function for
793 detailed documentation.
796 Returns
797 -------
798 np.recarray
799 Record array view into the data in datastring. This will be readonly
800 if `datastring` is readonly.
802 See Also
803 --------
804 numpy.frombuffer
806 Examples
807 --------
808 >>> a = b'\x01\x02\x03abc'
809 >>> np.core.records.fromstring(a, dtype='u1,u1,u1,S3')
810 rec.array([(1, 2, 3, b'abc')],
811 dtype=[('f0', 'u1'), ('f1', 'u1'), ('f2', 'u1'), ('f3', 'S3')])
813 >>> grades_dtype = [('Name', (np.str_, 10)), ('Marks', np.float64),
814 ... ('GradeLevel', np.int32)]
815 >>> grades_array = np.array([('Sam', 33.3, 3), ('Mike', 44.4, 5),
816 ... ('Aadi', 66.6, 6)], dtype=grades_dtype)
817 >>> np.core.records.fromstring(grades_array.tobytes(), dtype=grades_dtype)
818 rec.array([('Sam', 33.3, 3), ('Mike', 44.4, 5), ('Aadi', 66.6, 6)],
819 dtype=[('Name', '<U10'), ('Marks', '<f8'), ('GradeLevel', '<i4')])
821 >>> s = '\x01\x02\x03abc'
822 >>> np.core.records.fromstring(s, dtype='u1,u1,u1,S3')
823 Traceback (most recent call last)
824 ...
825 TypeError: a bytes-like object is required, not 'str'
826 """
828 if dtype is None and formats is None:
829 raise TypeError("fromstring() needs a 'dtype' or 'formats' argument")
831 if dtype is not None:
832 descr = sb.dtype(dtype)
833 else:
834 descr = format_parser(formats, names, titles, aligned, byteorder).dtype
836 itemsize = descr.itemsize
838 # NumPy 1.19.0, 2020-01-01
839 shape = _deprecate_shape_0_as_None(shape)
841 if shape in (None, -1):
842 shape = (len(datastring) - offset) // itemsize
844 _array = recarray(shape, descr, buf=datastring, offset=offset)
845 return _array
847def get_remaining_size(fd):
848 try:
849 fn = fd.fileno()
850 except AttributeError:
851 return os.path.getsize(fd.name) - fd.tell()
852 st = os.fstat(fn)
853 size = st.st_size - fd.tell()
854 return size
856def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
857 names=None, titles=None, aligned=False, byteorder=None):
858 """Create an array from binary file data
860 Parameters
861 ----------
862 fd : str or file type
863 If file is a string or a path-like object then that file is opened,
864 else it is assumed to be a file object. The file object must
865 support random access (i.e. it must have tell and seek methods).
866 dtype : data-type, optional
867 valid dtype for all arrays
868 shape : int or tuple of ints, optional
869 shape of each array.
870 offset : int, optional
871 Position in the file to start reading from.
872 formats, names, titles, aligned, byteorder :
873 If `dtype` is ``None``, these arguments are passed to
874 `numpy.format_parser` to construct a dtype. See that function for
875 detailed documentation
877 Returns
878 -------
879 np.recarray
880 record array consisting of data enclosed in file.
882 Examples
883 --------
884 >>> from tempfile import TemporaryFile
885 >>> a = np.empty(10,dtype='f8,i4,a5')
886 >>> a[5] = (0.5,10,'abcde')
887 >>>
888 >>> fd=TemporaryFile()
889 >>> a = a.newbyteorder('<')
890 >>> a.tofile(fd)
891 >>>
892 >>> _ = fd.seek(0)
893 >>> r=np.core.records.fromfile(fd, formats='f8,i4,a5', shape=10,
894 ... byteorder='<')
895 >>> print(r[5])
896 (0.5, 10, 'abcde')
897 >>> r.shape
898 (10,)
899 """
901 if dtype is None and formats is None:
902 raise TypeError("fromfile() needs a 'dtype' or 'formats' argument")
904 # NumPy 1.19.0, 2020-01-01
905 shape = _deprecate_shape_0_as_None(shape)
907 if shape is None:
908 shape = (-1,)
909 elif isinstance(shape, int):
910 shape = (shape,)
912 if isfileobj(fd):
913 # file already opened
914 ctx = contextlib_nullcontext(fd)
915 else:
916 # open file
917 ctx = open(os_fspath(fd), 'rb')
919 with ctx as fd:
920 if offset > 0:
921 fd.seek(offset, 1)
922 size = get_remaining_size(fd)
924 if dtype is not None:
925 descr = sb.dtype(dtype)
926 else:
927 descr = format_parser(formats, names, titles, aligned, byteorder).dtype
929 itemsize = descr.itemsize
931 shapeprod = sb.array(shape).prod(dtype=nt.intp)
932 shapesize = shapeprod * itemsize
933 if shapesize < 0:
934 shape = list(shape)
935 shape[shape.index(-1)] = size // -shapesize
936 shape = tuple(shape)
937 shapeprod = sb.array(shape).prod(dtype=nt.intp)
939 nbytes = shapeprod * itemsize
941 if nbytes > size:
942 raise ValueError(
943 "Not enough bytes left in file for specified shape and type")
945 # create the array
946 _array = recarray(shape, descr)
947 nbytesread = fd.readinto(_array.data)
948 if nbytesread != nbytes:
949 raise IOError("Didn't read as many bytes as expected")
951 return _array
953def array(obj, dtype=None, shape=None, offset=0, strides=None, formats=None,
954 names=None, titles=None, aligned=False, byteorder=None, copy=True):
955 """Construct a record array from a wide-variety of objects.
956 """
958 if ((isinstance(obj, (type(None), str)) or isfileobj(obj)) and
959 formats is None and dtype is None):
960 raise ValueError("Must define formats (or dtype) if object is "
961 "None, string, or an open file")
963 kwds = {}
964 if dtype is not None:
965 dtype = sb.dtype(dtype)
966 elif formats is not None:
967 dtype = format_parser(formats, names, titles,
968 aligned, byteorder).dtype
969 else:
970 kwds = {'formats': formats,
971 'names': names,
972 'titles': titles,
973 'aligned': aligned,
974 'byteorder': byteorder
975 }
977 if obj is None:
978 if shape is None:
979 raise ValueError("Must define a shape if obj is None")
980 return recarray(shape, dtype, buf=obj, offset=offset, strides=strides)
982 elif isinstance(obj, bytes):
983 return fromstring(obj, dtype, shape=shape, offset=offset, **kwds)
985 elif isinstance(obj, (list, tuple)):
986 if isinstance(obj[0], (tuple, list)):
987 return fromrecords(obj, dtype=dtype, shape=shape, **kwds)
988 else:
989 return fromarrays(obj, dtype=dtype, shape=shape, **kwds)
991 elif isinstance(obj, recarray):
992 if dtype is not None and (obj.dtype != dtype):
993 new = obj.view(dtype)
994 else:
995 new = obj
996 if copy:
997 new = new.copy()
998 return new
1000 elif isfileobj(obj):
1001 return fromfile(obj, dtype=dtype, shape=shape, offset=offset)
1003 elif isinstance(obj, ndarray):
1004 if dtype is not None and (obj.dtype != dtype):
1005 new = obj.view(dtype)
1006 else:
1007 new = obj
1008 if copy:
1009 new = new.copy()
1010 return new.view(recarray)
1012 else:
1013 interface = getattr(obj, "__array_interface__", None)
1014 if interface is None or not isinstance(interface, dict):
1015 raise ValueError("Unknown input type")
1016 obj = sb.array(obj)
1017 if dtype is not None and (obj.dtype != dtype):
1018 obj = obj.view(dtype)
1019 return obj.view(recarray)