Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""List of Lists sparse matrix class 

2""" 

3 

4__docformat__ = "restructuredtext en" 

5 

6__all__ = ['lil_matrix', 'isspmatrix_lil'] 

7 

8from bisect import bisect_left 

9 

10import numpy as np 

11 

12from .base import spmatrix, isspmatrix 

13from ._index import IndexMixin, INT_TYPES, _broadcast_arrays 

14from .sputils import (getdtype, isshape, isscalarlike, upcast_scalar, 

15 get_index_dtype, check_shape, check_reshape_kwargs, 

16 asmatrix) 

17from . import _csparsetools 

18 

19 

20class lil_matrix(spmatrix, IndexMixin): 

21 """Row-based list of lists sparse matrix 

22 

23 This is a structure for constructing sparse matrices incrementally. 

24 Note that inserting a single item can take linear time in the worst case; 

25 to construct a matrix efficiently, make sure the items are pre-sorted by 

26 index, per row. 

27 

28 This can be instantiated in several ways: 

29 lil_matrix(D) 

30 with a dense matrix or rank-2 ndarray D 

31 

32 lil_matrix(S) 

33 with another sparse matrix S (equivalent to S.tolil()) 

34 

35 lil_matrix((M, N), [dtype]) 

36 to construct an empty matrix with shape (M, N) 

37 dtype is optional, defaulting to dtype='d'. 

38 

39 Attributes 

40 ---------- 

41 dtype : dtype 

42 Data type of the matrix 

43 shape : 2-tuple 

44 Shape of the matrix 

45 ndim : int 

46 Number of dimensions (this is always 2) 

47 nnz 

48 Number of stored values, including explicit zeros 

49 data 

50 LIL format data array of the matrix 

51 rows 

52 LIL format row index array of the matrix 

53 

54 Notes 

55 ----- 

56 

57 Sparse matrices can be used in arithmetic operations: they support 

58 addition, subtraction, multiplication, division, and matrix power. 

59 

60 Advantages of the LIL format 

61 - supports flexible slicing 

62 - changes to the matrix sparsity structure are efficient 

63 

64 Disadvantages of the LIL format 

65 - arithmetic operations LIL + LIL are slow (consider CSR or CSC) 

66 - slow column slicing (consider CSC) 

67 - slow matrix vector products (consider CSR or CSC) 

68 

69 Intended Usage 

70 - LIL is a convenient format for constructing sparse matrices 

71 - once a matrix has been constructed, convert to CSR or 

72 CSC format for fast arithmetic and matrix vector operations 

73 - consider using the COO format when constructing large matrices 

74 

75 Data Structure 

76 - An array (``self.rows``) of rows, each of which is a sorted 

77 list of column indices of non-zero elements. 

78 - The corresponding nonzero values are stored in similar 

79 fashion in ``self.data``. 

80 

81 

82 """ 

83 format = 'lil' 

84 

85 def __init__(self, arg1, shape=None, dtype=None, copy=False): 

86 spmatrix.__init__(self) 

87 self.dtype = getdtype(dtype, arg1, default=float) 

88 

89 # First get the shape 

90 if isspmatrix(arg1): 

91 if isspmatrix_lil(arg1) and copy: 

92 A = arg1.copy() 

93 else: 

94 A = arg1.tolil() 

95 

96 if dtype is not None: 

97 A = A.astype(dtype, copy=False) 

98 

99 self._shape = check_shape(A.shape) 

100 self.dtype = A.dtype 

101 self.rows = A.rows 

102 self.data = A.data 

103 elif isinstance(arg1,tuple): 

104 if isshape(arg1): 

105 if shape is not None: 

106 raise ValueError('invalid use of shape parameter') 

107 M, N = arg1 

108 self._shape = check_shape((M, N)) 

109 self.rows = np.empty((M,), dtype=object) 

110 self.data = np.empty((M,), dtype=object) 

111 for i in range(M): 

112 self.rows[i] = [] 

113 self.data[i] = [] 

114 else: 

115 raise TypeError('unrecognized lil_matrix constructor usage') 

116 else: 

117 # assume A is dense 

118 try: 

119 A = asmatrix(arg1) 

120 except TypeError: 

121 raise TypeError('unsupported matrix type') 

122 else: 

123 from .csr import csr_matrix 

124 A = csr_matrix(A, dtype=dtype).tolil() 

125 

126 self._shape = check_shape(A.shape) 

127 self.dtype = A.dtype 

128 self.rows = A.rows 

129 self.data = A.data 

130 

131 def __iadd__(self,other): 

132 self[:,:] = self + other 

133 return self 

134 

135 def __isub__(self,other): 

136 self[:,:] = self - other 

137 return self 

138 

139 def __imul__(self,other): 

140 if isscalarlike(other): 

141 self[:,:] = self * other 

142 return self 

143 else: 

144 return NotImplemented 

145 

146 def __itruediv__(self,other): 

147 if isscalarlike(other): 

148 self[:,:] = self / other 

149 return self 

150 else: 

151 return NotImplemented 

152 

153 # Whenever the dimensions change, empty lists should be created for each 

154 # row 

155 

156 def getnnz(self, axis=None): 

157 if axis is None: 

158 return sum([len(rowvals) for rowvals in self.data]) 

159 if axis < 0: 

160 axis += 2 

161 if axis == 0: 

162 out = np.zeros(self.shape[1], dtype=np.intp) 

163 for row in self.rows: 

164 out[row] += 1 

165 return out 

166 elif axis == 1: 

167 return np.array([len(rowvals) for rowvals in self.data], dtype=np.intp) 

168 else: 

169 raise ValueError('axis out of bounds') 

170 

171 def count_nonzero(self): 

172 return sum(np.count_nonzero(rowvals) for rowvals in self.data) 

173 

174 getnnz.__doc__ = spmatrix.getnnz.__doc__ 

175 count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__ 

176 

177 def __str__(self): 

178 val = '' 

179 for i, row in enumerate(self.rows): 

180 for pos, j in enumerate(row): 

181 val += " %s\t%s\n" % (str((i, j)), str(self.data[i][pos])) 

182 return val[:-1] 

183 

184 def getrowview(self, i): 

185 """Returns a view of the 'i'th row (without copying). 

186 """ 

187 new = lil_matrix((1, self.shape[1]), dtype=self.dtype) 

188 new.rows[0] = self.rows[i] 

189 new.data[0] = self.data[i] 

190 return new 

191 

192 def getrow(self, i): 

193 """Returns a copy of the 'i'th row. 

194 """ 

195 M, N = self.shape 

196 if i < 0: 

197 i += M 

198 if i < 0 or i >= M: 

199 raise IndexError('row index out of bounds') 

200 new = lil_matrix((1, N), dtype=self.dtype) 

201 new.rows[0] = self.rows[i][:] 

202 new.data[0] = self.data[i][:] 

203 return new 

204 

205 def __getitem__(self, key): 

206 # Fast path for simple (int, int) indexing. 

207 if (isinstance(key, tuple) and len(key) == 2 and 

208 isinstance(key[0], INT_TYPES) and 

209 isinstance(key[1], INT_TYPES)): 

210 # lil_get1 handles validation for us. 

211 return self._get_intXint(*key) 

212 # Everything else takes the normal path. 

213 return IndexMixin.__getitem__(self, key) 

214 

215 def _asindices(self, idx, N): 

216 # LIL routines handle bounds-checking for us, so don't do it here. 

217 try: 

218 x = np.asarray(idx) 

219 except (ValueError, TypeError, MemoryError): 

220 raise IndexError('invalid index') 

221 if x.ndim not in (1, 2): 

222 raise IndexError('Index dimension must be <= 2') 

223 return x 

224 

225 def _get_intXint(self, row, col): 

226 v = _csparsetools.lil_get1(self.shape[0], self.shape[1], self.rows, 

227 self.data, row, col) 

228 return self.dtype.type(v) 

229 

230 def _get_sliceXint(self, row, col): 

231 row = range(*row.indices(self.shape[0])) 

232 return self._get_row_ranges(row, slice(col, col+1)) 

233 

234 def _get_arrayXint(self, row, col): 

235 return self._get_row_ranges(row, slice(col, col+1)) 

236 

237 def _get_intXslice(self, row, col): 

238 return self._get_row_ranges((row,), col) 

239 

240 def _get_sliceXslice(self, row, col): 

241 row = range(*row.indices(self.shape[0])) 

242 return self._get_row_ranges(row, col) 

243 

244 def _get_arrayXslice(self, row, col): 

245 return self._get_row_ranges(row, col) 

246 

247 def _get_intXarray(self, row, col): 

248 row = np.array(row, dtype=col.dtype, ndmin=1) 

249 return self._get_columnXarray(row, col) 

250 

251 def _get_sliceXarray(self, row, col): 

252 row = np.arange(*row.indices(self.shape[0])) 

253 return self._get_columnXarray(row, col) 

254 

255 def _get_columnXarray(self, row, col): 

256 # outer indexing 

257 row, col = _broadcast_arrays(row[:,None], col) 

258 return self._get_arrayXarray(row, col) 

259 

260 def _get_arrayXarray(self, row, col): 

261 # inner indexing 

262 i, j = map(np.atleast_2d, _prepare_index_for_memoryview(row, col)) 

263 new = lil_matrix(i.shape, dtype=self.dtype) 

264 _csparsetools.lil_fancy_get(self.shape[0], self.shape[1], 

265 self.rows, self.data, 

266 new.rows, new.data, 

267 i, j) 

268 return new 

269 

270 def _get_row_ranges(self, rows, col_slice): 

271 """ 

272 Fast path for indexing in the case where column index is slice. 

273 

274 This gains performance improvement over brute force by more 

275 efficient skipping of zeros, by accessing the elements 

276 column-wise in order. 

277 

278 Parameters 

279 ---------- 

280 rows : sequence or range 

281 Rows indexed. If range, must be within valid bounds. 

282 col_slice : slice 

283 Columns indexed 

284 

285 """ 

286 j_start, j_stop, j_stride = col_slice.indices(self.shape[1]) 

287 col_range = range(j_start, j_stop, j_stride) 

288 nj = len(col_range) 

289 new = lil_matrix((len(rows), nj), dtype=self.dtype) 

290 

291 _csparsetools.lil_get_row_ranges(self.shape[0], self.shape[1], 

292 self.rows, self.data, 

293 new.rows, new.data, 

294 rows, 

295 j_start, j_stop, j_stride, nj) 

296 

297 return new 

298 

299 def _set_intXint(self, row, col, x): 

300 _csparsetools.lil_insert(self.shape[0], self.shape[1], self.rows, 

301 self.data, row, col, x) 

302 

303 def _set_arrayXarray(self, row, col, x): 

304 i, j, x = map(np.atleast_2d, _prepare_index_for_memoryview(row, col, x)) 

305 _csparsetools.lil_fancy_set(self.shape[0], self.shape[1], 

306 self.rows, self.data, 

307 i, j, x) 

308 

309 def _set_arrayXarray_sparse(self, row, col, x): 

310 # Special case: full matrix assignment 

311 if (x.shape == self.shape and 

312 isinstance(row, slice) and row == slice(None) and 

313 isinstance(col, slice) and col == slice(None)): 

314 x = lil_matrix(x, dtype=self.dtype) 

315 self.rows = x.rows 

316 self.data = x.data 

317 return 

318 # Fall back to densifying x 

319 x = np.asarray(x.toarray(), dtype=self.dtype) 

320 x, _ = _broadcast_arrays(x, row) 

321 self._set_arrayXarray(row, col, x) 

322 

323 def __setitem__(self, key, x): 

324 # Fast path for simple (int, int) indexing. 

325 if (isinstance(key, tuple) and len(key) == 2 and 

326 isinstance(key[0], INT_TYPES) and 

327 isinstance(key[1], INT_TYPES)): 

328 x = self.dtype.type(x) 

329 if x.size > 1: 

330 raise ValueError("Trying to assign a sequence to an item") 

331 return self._set_intXint(key[0], key[1], x) 

332 # Everything else takes the normal path. 

333 IndexMixin.__setitem__(self, key, x) 

334 

335 def _mul_scalar(self, other): 

336 if other == 0: 

337 # Multiply by zero: return the zero matrix 

338 new = lil_matrix(self.shape, dtype=self.dtype) 

339 else: 

340 res_dtype = upcast_scalar(self.dtype, other) 

341 

342 new = self.copy() 

343 new = new.astype(res_dtype) 

344 # Multiply this scalar by every element. 

345 for j, rowvals in enumerate(new.data): 

346 new.data[j] = [val*other for val in rowvals] 

347 return new 

348 

349 def __truediv__(self, other): # self / other 

350 if isscalarlike(other): 

351 new = self.copy() 

352 # Divide every element by this scalar 

353 for j, rowvals in enumerate(new.data): 

354 new.data[j] = [val/other for val in rowvals] 

355 return new 

356 else: 

357 return self.tocsr() / other 

358 

359 def copy(self): 

360 M, N = self.shape 

361 new = lil_matrix(self.shape, dtype=self.dtype) 

362 # This is ~14x faster than calling deepcopy() on rows and data. 

363 _csparsetools.lil_get_row_ranges(M, N, self.rows, self.data, 

364 new.rows, new.data, range(M), 

365 0, N, 1, N) 

366 return new 

367 

368 copy.__doc__ = spmatrix.copy.__doc__ 

369 

370 def reshape(self, *args, **kwargs): 

371 shape = check_shape(args, self.shape) 

372 order, copy = check_reshape_kwargs(kwargs) 

373 

374 # Return early if reshape is not required 

375 if shape == self.shape: 

376 if copy: 

377 return self.copy() 

378 else: 

379 return self 

380 

381 new = lil_matrix(shape, dtype=self.dtype) 

382 

383 if order == 'C': 

384 ncols = self.shape[1] 

385 for i, row in enumerate(self.rows): 

386 for col, j in enumerate(row): 

387 new_r, new_c = np.unravel_index(i * ncols + j, shape) 

388 new[new_r, new_c] = self[i, j] 

389 elif order == 'F': 

390 nrows = self.shape[0] 

391 for i, row in enumerate(self.rows): 

392 for col, j in enumerate(row): 

393 new_r, new_c = np.unravel_index(i + j * nrows, shape, order) 

394 new[new_r, new_c] = self[i, j] 

395 else: 

396 raise ValueError("'order' must be 'C' or 'F'") 

397 

398 return new 

399 

400 reshape.__doc__ = spmatrix.reshape.__doc__ 

401 

402 def resize(self, *shape): 

403 shape = check_shape(shape) 

404 new_M, new_N = shape 

405 M, N = self.shape 

406 

407 if new_M < M: 

408 self.rows = self.rows[:new_M] 

409 self.data = self.data[:new_M] 

410 elif new_M > M: 

411 self.rows = np.resize(self.rows, new_M) 

412 self.data = np.resize(self.data, new_M) 

413 for i in range(M, new_M): 

414 self.rows[i] = [] 

415 self.data[i] = [] 

416 

417 if new_N < N: 

418 for row, data in zip(self.rows, self.data): 

419 trunc = bisect_left(row, new_N) 

420 del row[trunc:] 

421 del data[trunc:] 

422 

423 self._shape = shape 

424 

425 resize.__doc__ = spmatrix.resize.__doc__ 

426 

427 def toarray(self, order=None, out=None): 

428 d = self._process_toarray_args(order, out) 

429 for i, row in enumerate(self.rows): 

430 for pos, j in enumerate(row): 

431 d[i, j] = self.data[i][pos] 

432 return d 

433 

434 toarray.__doc__ = spmatrix.toarray.__doc__ 

435 

436 def transpose(self, axes=None, copy=False): 

437 return self.tocsr(copy=copy).transpose(axes=axes, copy=False).tolil(copy=False) 

438 

439 transpose.__doc__ = spmatrix.transpose.__doc__ 

440 

441 def tolil(self, copy=False): 

442 if copy: 

443 return self.copy() 

444 else: 

445 return self 

446 

447 tolil.__doc__ = spmatrix.tolil.__doc__ 

448 

449 def tocsr(self, copy=False): 

450 from .csr import csr_matrix 

451 

452 M, N = self.shape 

453 if M == 0 or N == 0: 

454 return csr_matrix((M, N), dtype=self.dtype) 

455 

456 # construct indptr array 

457 if M*N <= np.iinfo(np.int32).max: 

458 # fast path: it is known that 64-bit indexing will not be needed. 

459 idx_dtype = np.int32 

460 indptr = np.empty(M + 1, dtype=idx_dtype) 

461 indptr[0] = 0 

462 _csparsetools.lil_get_lengths(self.rows, indptr[1:]) 

463 np.cumsum(indptr, out=indptr) 

464 nnz = indptr[-1] 

465 else: 

466 idx_dtype = get_index_dtype(maxval=N) 

467 lengths = np.empty(M, dtype=idx_dtype) 

468 _csparsetools.lil_get_lengths(self.rows, lengths) 

469 nnz = lengths.sum() 

470 idx_dtype = get_index_dtype(maxval=max(N, nnz)) 

471 indptr = np.empty(M + 1, dtype=idx_dtype) 

472 indptr[0] = 0 

473 np.cumsum(lengths, dtype=idx_dtype, out=indptr[1:]) 

474 

475 indices = np.empty(nnz, dtype=idx_dtype) 

476 data = np.empty(nnz, dtype=self.dtype) 

477 _csparsetools.lil_flatten_to_array(self.rows, indices) 

478 _csparsetools.lil_flatten_to_array(self.data, data) 

479 

480 # init csr matrix 

481 return csr_matrix((data, indices, indptr), shape=self.shape) 

482 

483 tocsr.__doc__ = spmatrix.tocsr.__doc__ 

484 

485 

486def _prepare_index_for_memoryview(i, j, x=None): 

487 """ 

488 Convert index and data arrays to form suitable for passing to the 

489 Cython fancy getset routines. 

490 

491 The conversions are necessary since to (i) ensure the integer 

492 index arrays are in one of the accepted types, and (ii) to ensure 

493 the arrays are writable so that Cython memoryview support doesn't 

494 choke on them. 

495 

496 Parameters 

497 ---------- 

498 i, j 

499 Index arrays 

500 x : optional 

501 Data arrays 

502 

503 Returns 

504 ------- 

505 i, j, x 

506 Re-formatted arrays (x is omitted, if input was None) 

507 

508 """ 

509 if i.dtype > j.dtype: 

510 j = j.astype(i.dtype) 

511 elif i.dtype < j.dtype: 

512 i = i.astype(j.dtype) 

513 

514 if not i.flags.writeable or i.dtype not in (np.int32, np.int64): 

515 i = i.astype(np.intp) 

516 if not j.flags.writeable or j.dtype not in (np.int32, np.int64): 

517 j = j.astype(np.intp) 

518 

519 if x is not None: 

520 if not x.flags.writeable: 

521 x = x.copy() 

522 return i, j, x 

523 else: 

524 return i, j 

525 

526 

527def isspmatrix_lil(x): 

528 """Is x of lil_matrix type? 

529 

530 Parameters 

531 ---------- 

532 x 

533 object to check for being a lil matrix 

534 

535 Returns 

536 ------- 

537 bool 

538 True if x is a lil matrix, False otherwise 

539 

540 Examples 

541 -------- 

542 >>> from scipy.sparse import lil_matrix, isspmatrix_lil 

543 >>> isspmatrix_lil(lil_matrix([[5]])) 

544 True 

545 

546 >>> from scipy.sparse import lil_matrix, csr_matrix, isspmatrix_lil 

547 >>> isspmatrix_lil(csr_matrix([[5]])) 

548 False 

549 """ 

550 return isinstance(x, lil_matrix)