Package MFnetCDF4_classic :: Module MFnetCDF4_classic
[hide private]
[frames] | no frames]

Source Code for Module MFnetCDF4_classic.MFnetCDF4_classic

  1  """ 
  2  Module for reading multi-file netCDF Datasets, making variables 
  3  spanning multiple files appear as if they were in one file. 
  4   
  5  Datasets must be in C{NETCDF4_CLASSIC, NETCDF3_CLASSIC or NETCDF3_64BIT} 
  6  format (C{NETCDF4} Datasets won't work). 
  7   
  8  Adapted from U{pycdf <http://pysclint.sourceforge.net/pycdf>} by Andre Gosselin. 
  9   
 10  Example usage: 
 11   
 12  >>> import MFnetCDF4_classic, netCDF4_classic, numpy 
 13  >>> # create a series of netCDF files with a variable sharing 
 14  >>> # the same unlimited dimension. 
 15  >>> for nfile in range(10): 
 16  >>>     f = netCDF4_classic.Dataset('mftest'+repr(nfile)+'.nc','w') 
 17  >>>     f.createDimension('x',None) 
 18  >>>     x = f.createVariable('x','i',('x',)) 
 19  >>>     x[0:10] = numpy.arange(nfile*10,10*(nfile+1)) 
 20  >>>     f.close() 
 21  >>> # now read all those files in at once, in one Dataset. 
 22  >>> f = MFnetCDF4_classic.Dataset('mftest*nc') 
 23  >>> print f.variables['x'][:] 
 24  [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 
 25   25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 
 26   50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 
 27   75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99] 
 28  """ 
 29   
 30  import netCDF4_classic 
 31  import numpy 
 32  from glob import glob 
 33   
 34  __version__ = "0.6" 
 35   
36 -class Dataset(netCDF4_classic.Dataset):
37 """ 38 class for reading a multi-file netCDF dataset. 39 """ 40
41 - def __init__(self, files, check=False):
42 """ 43 Open a Dataset spanning multiple files, making it look as if it was a 44 single file. Variables in the list of files that share the same unlimited 45 dimension are aggregated. 46 47 Adapted from U{pycdf <http://pysclint.sourceforge.net/pycdf>} by Andre Gosselin. 48 49 Usage: 50 51 nc = MFnetCDF4_classic.Dataset(files, check=False) 52 53 @param files: either a sequence of netCDF files or a string with a 54 wildcard (converted to a sorted list of files using glob) The first file 55 in the list will become the "master" file, defining all the record 56 variables (variables with an unlimited dimension) which may span 57 subsequent files. Attribute access returns attributes only from "master" 58 file. The files are always opened in read-only mode. 59 60 @param check: True if you want to do consistency checking to ensure the 61 correct variables structure for all of the netcdf files. Checking makes 62 the initialization of the MFnetCDF4_classic instance much slower. Default is 63 False. 64 """ 65 66 # Open the master file in the base class, so that the CDFMF instance 67 # can be used like a CDF instance. 68 if isinstance(files, str): 69 files = sorted(glob(files)) 70 71 master = files[0] 72 73 # Open the master again, this time as a classic CDF instance. This will avoid 74 # calling methods of the CDFMF subclass when querying the master file. 75 cdfm = netCDF4_classic.Dataset(master) 76 # copy attributes from master. 77 for name, value in cdfm.__dict__.items(): 78 self.__dict__[name] = value 79 80 # Make sure the master defines an unlimited dimension. 81 unlimDimId = None 82 for dimname,dim in cdfm.dimensions.items(): 83 if dim.isunlimited(): 84 unlimDimId = dim 85 unlimDimName = dimname 86 if unlimDimId is None: 87 raise IOError("master dataset %s does not have an unlimited dimension" % master) 88 89 # Get info on all record variables defined in the master. 90 # Make sure the master defines at least one record variable. 91 masterRecVar = {} 92 for vName,v in cdfm.variables.items(): 93 dims = v.dimensions 94 shape = v.shape 95 type = v.dtype 96 # Be carefull: we may deal with a scalar (dimensionless) variable. 97 # Unlimited dimension always occupies index 0. 98 if (len(dims) > 0 and unlimDimName == dims[0]): 99 masterRecVar[vName] = (dims, shape, type) 100 if len(masterRecVar) == 0: 101 raise IOError("master dataset %s does not have any record variable" % master) 102 103 # Create the following: 104 # cdf list of Dataset instances 105 # cdfVLen list unlimited dimension lengths in each CDF instance 106 # cdfRecVar dictionnary indexed by the record var names; each key holds 107 # a list of the corresponding Variable instance, one for each 108 # cdf file of the file set 109 cdf = [cdfm] 110 self._cdf = cdf # Store this now, because dim() method needs it 111 cdfVLen = [len(unlimDimId)] 112 cdfRecVar = {} 113 for v in masterRecVar.keys(): 114 cdfRecVar[v] = [cdfm.variables[v]] 115 116 # Open each remaining file in read-only mode. 117 # Make sure each file defines the same record variables as the master 118 # and that the variables are defined in the same way (name, shape and type) 119 for f in files[1:]: 120 part = netCDF4_classic.Dataset(f) 121 varInfo = part.variables 122 for v in masterRecVar.keys(): 123 if check: 124 # Make sure master rec var is also defined here. 125 if v not in varInfo.keys(): 126 raise IOError("record variable %s not defined in %s" % (v, f)) 127 128 # Make sure it is a record var. 129 vInst = part.variables[v] 130 if not part.dimensions[vInst.dimensions[0]].isunlimited(): 131 raise MFnetCDF4_classic("variable %s is not a record var inside %s" % (v, f)) 132 133 masterDims, masterShape, masterType = masterRecVar[v][:3] 134 extDims, extShape, extType = varInfo[v][:3] 135 extDims = varInfo[v].dimensions 136 extShape = varInfo[v].shape 137 extType = varInfo[v].dtype 138 # Check that dimension names are identical. 139 if masterDims != extDims: 140 raise IOError("variable %s : dimensions mismatch between " 141 "master %s (%s) and extension %s (%s)" % 142 (v, master, masterDims, f, extDims)) 143 144 # Check that the ranks are identical, and the dimension lengths are 145 # identical (except for that of the unlimited dimension, which of 146 # course may vary. 147 if len(masterShape) != len(extShape): 148 raise IOError("variable %s : rank mismatch between " 149 "master %s (%s) and extension %s (%s)" % 150 (v, master, len(masterShape), f, len(extShape))) 151 if masterShape[1:] != extShape[1:]: 152 raise IOError("variable %s : shape mismatch between " 153 "master %s (%s) and extension %s (%s)" % 154 (v, master, masterShape, f, extShape)) 155 156 # Check that the data types are identical. 157 if masterType != extType: 158 raise IOError("variable %s : data type mismatch between " 159 "master %s (%s) and extension %s (%s)" % 160 (v, master, masterType, f, extType)) 161 162 # Everythig ok. 163 cdfRecVar[v].append(vInst) 164 else: 165 # No making sure of anything -- assume this is ok.. 166 vInst = part.variables[v] 167 cdfRecVar[v].append(vInst) 168 169 cdf.append(part) 170 cdfVLen.append(len(part.dimensions[unlimDimName])) 171 172 # Attach attributes to the MFnetCDF4_classic.Dataset instance. 173 # A local __setattr__() method is required for them. 174 self._files = files # list of cdf file names in the set 175 self._cdfVLen = cdfVLen # list of unlimited lengths 176 self._cdfTLen = reduce(lambda x, y: x + y, cdfVLen) # total length 177 self._cdfRecVar = cdfRecVar # dictionary of Variable instances for all 178 # the record variables 179 self._dims = cdfm.dimensions 180 for dimname, dim in self._dims.items(): 181 if dim.isunlimited(): 182 self._dims[dimname] = _Dimension(dimname, dim, self._cdfVLen, self._cdfTLen) 183 self._vars = cdfm.variables 184 for varname,var in self._vars.items(): 185 if varname in self._cdfRecVar.keys(): 186 self._vars[varname] = _Variable(self, varname, var, unlimDimName) 187 self._file_format = [] 188 for dset in self._cdf: 189 self._file_format.append(dset.file_format)
190
191 - def __setattr__(self, name, value):
192 """override base class attribute creation""" 193 self.__dict__[name] = value
194
195 - def __getattribute__(self, name):
196 if name in ['variables','dimensions','file_format']: 197 if name == 'dimensions': return self._dims 198 if name == 'variables': return self._vars 199 if name == 'file_format': return self._file_format 200 else: 201 return netCDF4_classic.Dataset.__getattribute__(self, name)
202
203 - def ncattrs(self):
204 return self._cdf[0].__dict__.keys()
205
206 - def close(self):
207 for dset in self._cdf: 208 dset.close()
209
210 -class _Dimension(object):
211 - def __init__(self, dimname, dim, dimlens, dimtotlen):
212 self.dimlens = dimlens 213 self.dimtotlen = dimtotlen
214 - def __len__(self):
215 return self.dimtotlen
216 - def isunlimited(self):
217 return True
218
219 -class _Variable(object):
220 - def __init__(self, dset, varname, var, recdimname):
221 self.dimensions = var.dimensions 222 self._dset = dset 223 self._mastervar = var 224 self._recVar = dset._cdfRecVar[varname] 225 self._recdimname = recdimname 226 self._recLen = dset._cdfVLen 227 self.dtype = var.dtype 228 # copy attributes from master. 229 for name, value in var.__dict__.items(): 230 self.__dict__[name] = value
231 - def typecode(self):
232 return self.dtype
233 - def ncattrs(self):
234 return self._mastervar.__dict__.keys()
235 - def __getattr__(self,name):
236 if name == 'shape': return self._shape() 237 return self.__dict__[name]
238 - def _shape(self):
239 recdimlen = len(self._dset.dimensions[self._recdimname]) 240 return (recdimlen,) + self._mastervar.shape[1:]
241 - def __getitem__(self, elem):
242 """Get records from a concatenated set of variables.""" 243 # Number of variables making up the MFVariable.Variable. 244 nv = len(self._recLen) 245 # Parse the slicing expression, needed to properly handle 246 # a possible ellipsis. 247 start, count, stride = netCDF4_classic._buildStartCountStride(elem, self.shape, self.dimensions, self._dset) 248 # make sure count=-1 becomes count=1 249 count = [abs(cnt) for cnt in count] 250 if (numpy.array(stride) < 0).any(): 251 raise IndexError('negative strides not allowed when slicing MFVariable Variable instance') 252 # Start, stop and step along 1st dimension, eg the unlimited 253 # dimension. 254 sta = start[0] 255 step = stride[0] 256 stop = sta + count[0] * step 257 258 # Build a list representing the concatenated list of all records in 259 # the MFVariable variable set. The list is composed of 2-elem lists 260 # each holding: 261 # the record index inside the variables, from 0 to n 262 # the index of the Variable instance to which each record belongs 263 idx = [] # list of record indices 264 vid = [] # list of Variable indices 265 for n in range(nv): 266 k = self._recLen[n] # number of records in this variable 267 idx.extend(range(k)) 268 vid.extend([n] * k) 269 270 # Merge the two lists to get a list of 2-elem lists. 271 # Slice this list along the first dimension. 272 lst = zip(idx, vid).__getitem__(slice(sta, stop, step)) 273 274 # Rebuild the slicing expression for dimensions 1 and ssq. 275 newSlice = [slice(None, None, None)] 276 for n in range(1, len(start)): # skip dimension 0 277 newSlice.append(slice(start[n], 278 start[n] + count[n] * stride[n], stride[n])) 279 280 # Apply the slicing expression to each var in turn, extracting records 281 # in a list of arrays. 282 lstArr = [] 283 for n in range(nv): 284 # Get the list of indices for variable 'n'. 285 idx = [i for i,numv in lst if numv == n] 286 if idx: 287 # Rebuild slicing expression for dimension 0. 288 newSlice[0] = slice(idx[0], idx[-1] + 1, step) 289 # Extract records from the var, and append them to a list 290 # of arrays. 291 lstArr.append(netCDF4_classic.Variable.__getitem__(self._recVar[n], tuple(newSlice))) 292 293 # Return the extracted records as a unified array. 294 if lstArr: 295 lstArr = numpy.concatenate(lstArr) 296 return numpy.squeeze(lstArr)
297