Package MFnetCDF4_classic ::
Module MFnetCDF4_classic
|
|
1 """
2 Module for reading multi-file netCDF Datasets, making variables
3 spanning multiple files appear as if they were in one file.
4
5 Datasets must be in C{NETCDF4_CLASSIC, NETCDF3_CLASSIC or NETCDF3_64BIT}
6 format (C{NETCDF4} Datasets won't work).
7
8 Adapted from U{pycdf <http://pysclint.sourceforge.net/pycdf>} by Andre Gosselin.
9
10 Example usage:
11
12 >>> import MFnetCDF4_classic, netCDF4_classic, numpy
13 >>> # create a series of netCDF files with a variable sharing
14 >>> # the same unlimited dimension.
15 >>> for nfile in range(10):
16 >>> f = netCDF4_classic.Dataset('mftest'+repr(nfile)+'.nc','w')
17 >>> f.createDimension('x',None)
18 >>> x = f.createVariable('x','i',('x',))
19 >>> x[0:10] = numpy.arange(nfile*10,10*(nfile+1))
20 >>> f.close()
21 >>> # now read all those files in at once, in one Dataset.
22 >>> f = MFnetCDF4_classic.Dataset('mftest*nc')
23 >>> print f.variables['x'][:]
24 [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
25 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
26 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
27 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99]
28 """
29
30 import netCDF4_classic
31 import numpy
32 from glob import glob
33
34 __version__ = "0.6"
35
36 -class Dataset(netCDF4_classic.Dataset):
37 """
38 class for reading a multi-file netCDF dataset.
39 """
40
42 """
43 Open a Dataset spanning multiple files, making it look as if it was a
44 single file. Variables in the list of files that share the same unlimited
45 dimension are aggregated.
46
47 Adapted from U{pycdf <http://pysclint.sourceforge.net/pycdf>} by Andre Gosselin.
48
49 Usage:
50
51 nc = MFnetCDF4_classic.Dataset(files, check=False)
52
53 @param files: either a sequence of netCDF files or a string with a
54 wildcard (converted to a sorted list of files using glob) The first file
55 in the list will become the "master" file, defining all the record
56 variables (variables with an unlimited dimension) which may span
57 subsequent files. Attribute access returns attributes only from "master"
58 file. The files are always opened in read-only mode.
59
60 @param check: True if you want to do consistency checking to ensure the
61 correct variables structure for all of the netcdf files. Checking makes
62 the initialization of the MFnetCDF4_classic instance much slower. Default is
63 False.
64 """
65
66
67
68 if isinstance(files, str):
69 files = sorted(glob(files))
70
71 master = files[0]
72
73
74
75 cdfm = netCDF4_classic.Dataset(master)
76
77 for name, value in cdfm.__dict__.items():
78 self.__dict__[name] = value
79
80
81 unlimDimId = None
82 for dimname,dim in cdfm.dimensions.items():
83 if dim.isunlimited():
84 unlimDimId = dim
85 unlimDimName = dimname
86 if unlimDimId is None:
87 raise IOError("master dataset %s does not have an unlimited dimension" % master)
88
89
90
91 masterRecVar = {}
92 for vName,v in cdfm.variables.items():
93 dims = v.dimensions
94 shape = v.shape
95 type = v.dtype
96
97
98 if (len(dims) > 0 and unlimDimName == dims[0]):
99 masterRecVar[vName] = (dims, shape, type)
100 if len(masterRecVar) == 0:
101 raise IOError("master dataset %s does not have any record variable" % master)
102
103
104
105
106
107
108
109 cdf = [cdfm]
110 self._cdf = cdf
111 cdfVLen = [len(unlimDimId)]
112 cdfRecVar = {}
113 for v in masterRecVar.keys():
114 cdfRecVar[v] = [cdfm.variables[v]]
115
116
117
118
119 for f in files[1:]:
120 part = netCDF4_classic.Dataset(f)
121 varInfo = part.variables
122 for v in masterRecVar.keys():
123 if check:
124
125 if v not in varInfo.keys():
126 raise IOError("record variable %s not defined in %s" % (v, f))
127
128
129 vInst = part.variables[v]
130 if not part.dimensions[vInst.dimensions[0]].isunlimited():
131 raise MFnetCDF4_classic("variable %s is not a record var inside %s" % (v, f))
132
133 masterDims, masterShape, masterType = masterRecVar[v][:3]
134 extDims, extShape, extType = varInfo[v][:3]
135 extDims = varInfo[v].dimensions
136 extShape = varInfo[v].shape
137 extType = varInfo[v].dtype
138
139 if masterDims != extDims:
140 raise IOError("variable %s : dimensions mismatch between "
141 "master %s (%s) and extension %s (%s)" %
142 (v, master, masterDims, f, extDims))
143
144
145
146
147 if len(masterShape) != len(extShape):
148 raise IOError("variable %s : rank mismatch between "
149 "master %s (%s) and extension %s (%s)" %
150 (v, master, len(masterShape), f, len(extShape)))
151 if masterShape[1:] != extShape[1:]:
152 raise IOError("variable %s : shape mismatch between "
153 "master %s (%s) and extension %s (%s)" %
154 (v, master, masterShape, f, extShape))
155
156
157 if masterType != extType:
158 raise IOError("variable %s : data type mismatch between "
159 "master %s (%s) and extension %s (%s)" %
160 (v, master, masterType, f, extType))
161
162
163 cdfRecVar[v].append(vInst)
164 else:
165
166 vInst = part.variables[v]
167 cdfRecVar[v].append(vInst)
168
169 cdf.append(part)
170 cdfVLen.append(len(part.dimensions[unlimDimName]))
171
172
173
174 self._files = files
175 self._cdfVLen = cdfVLen
176 self._cdfTLen = reduce(lambda x, y: x + y, cdfVLen)
177 self._cdfRecVar = cdfRecVar
178
179 self._dims = cdfm.dimensions
180 for dimname, dim in self._dims.items():
181 if dim.isunlimited():
182 self._dims[dimname] = _Dimension(dimname, dim, self._cdfVLen, self._cdfTLen)
183 self._vars = cdfm.variables
184 for varname,var in self._vars.items():
185 if varname in self._cdfRecVar.keys():
186 self._vars[varname] = _Variable(self, varname, var, unlimDimName)
187 self._file_format = []
188 for dset in self._cdf:
189 self._file_format.append(dset.file_format)
190
192 """override base class attribute creation"""
193 self.__dict__[name] = value
194
196 if name in ['variables','dimensions','file_format']:
197 if name == 'dimensions': return self._dims
198 if name == 'variables': return self._vars
199 if name == 'file_format': return self._file_format
200 else:
201 return netCDF4_classic.Dataset.__getattribute__(self, name)
202
204 return self._cdf[0].__dict__.keys()
205
207 for dset in self._cdf:
208 dset.close()
209
211 - def __init__(self, dimname, dim, dimlens, dimtotlen):
212 self.dimlens = dimlens
213 self.dimtotlen = dimtotlen
215 return self.dimtotlen
217 return True
218
220 - def __init__(self, dset, varname, var, recdimname):
221 self.dimensions = var.dimensions
222 self._dset = dset
223 self._mastervar = var
224 self._recVar = dset._cdfRecVar[varname]
225 self._recdimname = recdimname
226 self._recLen = dset._cdfVLen
227 self.dtype = var.dtype
228
229 for name, value in var.__dict__.items():
230 self.__dict__[name] = value
234 return self._mastervar.__dict__.keys()
236 if name == 'shape': return self._shape()
237 return self.__dict__[name]
239 recdimlen = len(self._dset.dimensions[self._recdimname])
240 return (recdimlen,) + self._mastervar.shape[1:]
242 """Get records from a concatenated set of variables."""
243
244 nv = len(self._recLen)
245
246
247 start, count, stride = netCDF4_classic._buildStartCountStride(elem, self.shape, self.dimensions, self._dset)
248
249 count = [abs(cnt) for cnt in count]
250 if (numpy.array(stride) < 0).any():
251 raise IndexError('negative strides not allowed when slicing MFVariable Variable instance')
252
253
254 sta = start[0]
255 step = stride[0]
256 stop = sta + count[0] * step
257
258
259
260
261
262
263 idx = []
264 vid = []
265 for n in range(nv):
266 k = self._recLen[n]
267 idx.extend(range(k))
268 vid.extend([n] * k)
269
270
271
272 lst = zip(idx, vid).__getitem__(slice(sta, stop, step))
273
274
275 newSlice = [slice(None, None, None)]
276 for n in range(1, len(start)):
277 newSlice.append(slice(start[n],
278 start[n] + count[n] * stride[n], stride[n]))
279
280
281
282 lstArr = []
283 for n in range(nv):
284
285 idx = [i for i,numv in lst if numv == n]
286 if idx:
287
288 newSlice[0] = slice(idx[0], idx[-1] + 1, step)
289
290
291 lstArr.append(netCDF4_classic.Variable.__getitem__(self._recVar[n], tuple(newSlice)))
292
293
294 if lstArr:
295 lstArr = numpy.concatenate(lstArr)
296 return numpy.squeeze(lstArr)
297