Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/arrays/numpy_.py : 36%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import numbers
2from typing import Union
4import numpy as np
5from numpy.lib.mixins import NDArrayOperatorsMixin
7from pandas._libs import lib
8from pandas.compat.numpy import function as nv
9from pandas.util._decorators import Appender
10from pandas.util._validators import validate_fillna_kwargs
12from pandas.core.dtypes.dtypes import ExtensionDtype
13from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
14from pandas.core.dtypes.inference import is_array_like
15from pandas.core.dtypes.missing import isna
17from pandas import compat
18from pandas.core import nanops
19from pandas.core.algorithms import searchsorted, take, unique
20from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
21from pandas.core.construction import extract_array
22from pandas.core.indexers import check_array_indexer
23from pandas.core.missing import backfill_1d, pad_1d
26class PandasDtype(ExtensionDtype):
27 """
28 A Pandas ExtensionDtype for NumPy dtypes.
30 .. versionadded:: 0.24.0
32 This is mostly for internal compatibility, and is not especially
33 useful on its own.
35 Parameters
36 ----------
37 dtype : numpy.dtype
38 """
40 _metadata = ("_dtype",)
42 def __init__(self, dtype):
43 dtype = np.dtype(dtype)
44 self._dtype = dtype
45 self._type = dtype.type
47 def __repr__(self) -> str:
48 return f"PandasDtype({repr(self.name)})"
50 @property
51 def numpy_dtype(self):
52 """The NumPy dtype this PandasDtype wraps."""
53 return self._dtype
55 @property
56 def name(self):
57 return self._dtype.name
59 @property
60 def type(self):
61 return self._type
63 @property
64 def _is_numeric(self):
65 # exclude object, str, unicode, void.
66 return self.kind in set("biufc")
68 @property
69 def _is_boolean(self):
70 return self.kind == "b"
72 @classmethod
73 def construct_from_string(cls, string):
74 try:
75 return cls(np.dtype(string))
76 except TypeError as err:
77 raise TypeError(
78 f"Cannot construct a 'PandasDtype' from '{string}'"
79 ) from err
81 @classmethod
82 def construct_array_type(cls):
83 """
84 Return the array type associated with this dtype.
86 Returns
87 -------
88 type
89 """
90 return PandasArray
92 @property
93 def kind(self):
94 return self._dtype.kind
96 @property
97 def itemsize(self):
98 """The element size of this data-type object."""
99 return self._dtype.itemsize
102class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin):
103 """
104 A pandas ExtensionArray for NumPy data.
106 .. versionadded:: 0.24.0
108 This is mostly for internal compatibility, and is not especially
109 useful on its own.
111 Parameters
112 ----------
113 values : ndarray
114 The NumPy ndarray to wrap. Must be 1-dimensional.
115 copy : bool, default False
116 Whether to copy `values`.
118 Attributes
119 ----------
120 None
122 Methods
123 -------
124 None
125 """
127 # If you're wondering why pd.Series(cls) doesn't put the array in an
128 # ExtensionBlock, search for `ABCPandasArray`. We check for
129 # that _typ to ensure that that users don't unnecessarily use EAs inside
130 # pandas internals, which turns off things like block consolidation.
131 _typ = "npy_extension"
132 __array_priority__ = 1000
133 _ndarray: np.ndarray
135 # ------------------------------------------------------------------------
136 # Constructors
138 def __init__(self, values: Union[np.ndarray, "PandasArray"], copy: bool = False):
139 if isinstance(values, type(self)):
140 values = values._ndarray
141 if not isinstance(values, np.ndarray):
142 raise ValueError(
143 f"'values' must be a NumPy array, not {type(values).__name__}"
144 )
146 if values.ndim != 1:
147 raise ValueError("PandasArray must be 1-dimensional.")
149 if copy:
150 values = values.copy()
152 self._ndarray = values
153 self._dtype = PandasDtype(values.dtype)
155 @classmethod
156 def _from_sequence(cls, scalars, dtype=None, copy=False):
157 if isinstance(dtype, PandasDtype):
158 dtype = dtype._dtype
160 result = np.asarray(scalars, dtype=dtype)
161 if copy and result is scalars:
162 result = result.copy()
163 return cls(result)
165 @classmethod
166 def _from_factorized(cls, values, original):
167 return cls(values)
169 @classmethod
170 def _concat_same_type(cls, to_concat):
171 return cls(np.concatenate(to_concat))
173 # ------------------------------------------------------------------------
174 # Data
176 @property
177 def dtype(self):
178 return self._dtype
180 # ------------------------------------------------------------------------
181 # NumPy Array Interface
183 def __array__(self, dtype=None) -> np.ndarray:
184 return np.asarray(self._ndarray, dtype=dtype)
186 _HANDLED_TYPES = (np.ndarray, numbers.Number)
188 def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
189 # Lightly modified version of
190 # https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/\
191 # numpy.lib.mixins.NDArrayOperatorsMixin.html
192 # The primary modification is not boxing scalar return values
193 # in PandasArray, since pandas' ExtensionArrays are 1-d.
194 out = kwargs.get("out", ())
195 for x in inputs + out:
196 # Only support operations with instances of _HANDLED_TYPES.
197 # Use PandasArray instead of type(self) for isinstance to
198 # allow subclasses that don't override __array_ufunc__ to
199 # handle PandasArray objects.
200 if not isinstance(x, self._HANDLED_TYPES + (PandasArray,)):
201 return NotImplemented
203 # Defer to the implementation of the ufunc on unwrapped values.
204 inputs = tuple(x._ndarray if isinstance(x, PandasArray) else x for x in inputs)
205 if out:
206 kwargs["out"] = tuple(
207 x._ndarray if isinstance(x, PandasArray) else x for x in out
208 )
209 result = getattr(ufunc, method)(*inputs, **kwargs)
211 if type(result) is tuple and len(result):
212 # multiple return values
213 if not lib.is_scalar(result[0]):
214 # re-box array-like results
215 return tuple(type(self)(x) for x in result)
216 else:
217 # but not scalar reductions
218 return result
219 elif method == "at":
220 # no return value
221 return None
222 else:
223 # one return value
224 if not lib.is_scalar(result):
225 # re-box array-like results, but not scalar reductions
226 result = type(self)(result)
227 return result
229 # ------------------------------------------------------------------------
230 # Pandas ExtensionArray Interface
232 def __getitem__(self, item):
233 if isinstance(item, type(self)):
234 item = item._ndarray
236 item = check_array_indexer(self, item)
238 result = self._ndarray[item]
239 if not lib.is_scalar(item):
240 result = type(self)(result)
241 return result
243 def __setitem__(self, key, value):
244 value = extract_array(value, extract_numpy=True)
246 key = check_array_indexer(self, key)
247 scalar_value = lib.is_scalar(value)
249 if not scalar_value:
250 value = np.asarray(value, dtype=self._ndarray.dtype)
252 self._ndarray[key] = value
254 def __len__(self) -> int:
255 return len(self._ndarray)
257 @property
258 def nbytes(self) -> int:
259 return self._ndarray.nbytes
261 def isna(self):
262 return isna(self._ndarray)
264 def fillna(self, value=None, method=None, limit=None):
265 # TODO(_values_for_fillna): remove this
266 value, method = validate_fillna_kwargs(value, method)
268 mask = self.isna()
270 if is_array_like(value):
271 if len(value) != len(self):
272 raise ValueError(
273 f"Length of 'value' does not match. Got ({len(value)}) "
274 f" expected {len(self)}"
275 )
276 value = value[mask]
278 if mask.any():
279 if method is not None:
280 func = pad_1d if method == "pad" else backfill_1d
281 new_values = func(self._ndarray, limit=limit, mask=mask)
282 new_values = self._from_sequence(new_values, dtype=self.dtype)
283 else:
284 # fill with value
285 new_values = self.copy()
286 new_values[mask] = value
287 else:
288 new_values = self.copy()
289 return new_values
291 def take(self, indices, allow_fill=False, fill_value=None):
292 if fill_value is None:
293 # Primarily for subclasses
294 fill_value = self.dtype.na_value
295 result = take(
296 self._ndarray, indices, allow_fill=allow_fill, fill_value=fill_value
297 )
298 return type(self)(result)
300 def copy(self):
301 return type(self)(self._ndarray.copy())
303 def _values_for_argsort(self):
304 return self._ndarray
306 def _values_for_factorize(self):
307 return self._ndarray, -1
309 def unique(self):
310 return type(self)(unique(self._ndarray))
312 # ------------------------------------------------------------------------
313 # Reductions
315 def _reduce(self, name, skipna=True, **kwargs):
316 meth = getattr(self, name, None)
317 if meth:
318 return meth(skipna=skipna, **kwargs)
319 else:
320 msg = f"'{type(self).__name__}' does not implement reduction '{name}'"
321 raise TypeError(msg)
323 def any(self, axis=None, out=None, keepdims=False, skipna=True):
324 nv.validate_any((), dict(out=out, keepdims=keepdims))
325 return nanops.nanany(self._ndarray, axis=axis, skipna=skipna)
327 def all(self, axis=None, out=None, keepdims=False, skipna=True):
328 nv.validate_all((), dict(out=out, keepdims=keepdims))
329 return nanops.nanall(self._ndarray, axis=axis, skipna=skipna)
331 def min(self, axis=None, out=None, keepdims=False, skipna=True):
332 nv.validate_min((), dict(out=out, keepdims=keepdims))
333 return nanops.nanmin(self._ndarray, axis=axis, skipna=skipna)
335 def max(self, axis=None, out=None, keepdims=False, skipna=True):
336 nv.validate_max((), dict(out=out, keepdims=keepdims))
337 return nanops.nanmax(self._ndarray, axis=axis, skipna=skipna)
339 def sum(
340 self,
341 axis=None,
342 dtype=None,
343 out=None,
344 keepdims=False,
345 initial=None,
346 skipna=True,
347 min_count=0,
348 ):
349 nv.validate_sum(
350 (), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial)
351 )
352 return nanops.nansum(
353 self._ndarray, axis=axis, skipna=skipna, min_count=min_count
354 )
356 def prod(
357 self,
358 axis=None,
359 dtype=None,
360 out=None,
361 keepdims=False,
362 initial=None,
363 skipna=True,
364 min_count=0,
365 ):
366 nv.validate_prod(
367 (), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial)
368 )
369 return nanops.nanprod(
370 self._ndarray, axis=axis, skipna=skipna, min_count=min_count
371 )
373 def mean(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
374 nv.validate_mean((), dict(dtype=dtype, out=out, keepdims=keepdims))
375 return nanops.nanmean(self._ndarray, axis=axis, skipna=skipna)
377 def median(
378 self, axis=None, out=None, overwrite_input=False, keepdims=False, skipna=True
379 ):
380 nv.validate_median(
381 (), dict(out=out, overwrite_input=overwrite_input, keepdims=keepdims)
382 )
383 return nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
385 def std(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True):
386 nv.validate_stat_ddof_func(
387 (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="std"
388 )
389 return nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
391 def var(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True):
392 nv.validate_stat_ddof_func(
393 (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="var"
394 )
395 return nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
397 def sem(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True):
398 nv.validate_stat_ddof_func(
399 (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="sem"
400 )
401 return nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
403 def kurt(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
404 nv.validate_stat_ddof_func(
405 (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="kurt"
406 )
407 return nanops.nankurt(self._ndarray, axis=axis, skipna=skipna)
409 def skew(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
410 nv.validate_stat_ddof_func(
411 (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="skew"
412 )
413 return nanops.nanskew(self._ndarray, axis=axis, skipna=skipna)
415 # ------------------------------------------------------------------------
416 # Additional Methods
417 def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default):
418 result = np.asarray(self._ndarray, dtype=dtype)
420 if (copy or na_value is not lib.no_default) and result is self._ndarray:
421 result = result.copy()
423 if na_value is not lib.no_default:
424 result[self.isna()] = na_value
426 return result
428 @Appender(ExtensionArray.searchsorted.__doc__)
429 def searchsorted(self, value, side="left", sorter=None):
430 return searchsorted(self.to_numpy(), value, side=side, sorter=sorter)
432 # ------------------------------------------------------------------------
433 # Ops
435 def __invert__(self):
436 return type(self)(~self._ndarray)
438 @classmethod
439 def _create_arithmetic_method(cls, op):
440 def arithmetic_method(self, other):
441 if isinstance(other, (ABCIndexClass, ABCSeries)):
442 return NotImplemented
444 elif isinstance(other, cls):
445 other = other._ndarray
447 with np.errstate(all="ignore"):
448 result = op(self._ndarray, other)
450 if op is divmod:
451 a, b = result
452 return cls(a), cls(b)
454 return cls(result)
456 return compat.set_function_name(arithmetic_method, f"__{op.__name__}__", cls)
458 _create_comparison_method = _create_arithmetic_method
461PandasArray._add_arithmetic_ops()
462PandasArray._add_comparison_ops()