Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import textwrap 

2from typing import List, Set 

3 

4from pandas._libs import NaT, lib 

5 

6import pandas.core.common as com 

7from pandas.core.indexes.base import ( 

8 Index, 

9 InvalidIndexError, 

10 _new_Index, 

11 ensure_index, 

12 ensure_index_from_sequences, 

13) 

14from pandas.core.indexes.category import CategoricalIndex 

15from pandas.core.indexes.datetimes import DatetimeIndex 

16from pandas.core.indexes.interval import IntervalIndex 

17from pandas.core.indexes.multi import MultiIndex 

18from pandas.core.indexes.numeric import ( 

19 Float64Index, 

20 Int64Index, 

21 NumericIndex, 

22 UInt64Index, 

23) 

24from pandas.core.indexes.period import PeriodIndex 

25from pandas.core.indexes.range import RangeIndex 

26from pandas.core.indexes.timedeltas import TimedeltaIndex 

27 

28_sort_msg = textwrap.dedent( 

29 """\ 

30Sorting because non-concatenation axis is not aligned. A future version 

31of pandas will change to not sort by default. 

32 

33To accept the future behavior, pass 'sort=False'. 

34 

35To retain the current behavior and silence the warning, pass 'sort=True'. 

36""" 

37) 

38 

39 

40__all__ = [ 

41 "Index", 

42 "MultiIndex", 

43 "NumericIndex", 

44 "Float64Index", 

45 "Int64Index", 

46 "CategoricalIndex", 

47 "IntervalIndex", 

48 "RangeIndex", 

49 "UInt64Index", 

50 "InvalidIndexError", 

51 "TimedeltaIndex", 

52 "PeriodIndex", 

53 "DatetimeIndex", 

54 "_new_Index", 

55 "NaT", 

56 "ensure_index", 

57 "ensure_index_from_sequences", 

58 "get_objs_combined_axis", 

59 "union_indexes", 

60 "get_consensus_names", 

61 "all_indexes_same", 

62] 

63 

64 

65def get_objs_combined_axis( 

66 objs, intersect: bool = False, axis=0, sort: bool = True 

67) -> Index: 

68 """ 

69 Extract combined index: return intersection or union (depending on the 

70 value of "intersect") of indexes on given axis, or None if all objects 

71 lack indexes (e.g. they are numpy arrays). 

72 

73 Parameters 

74 ---------- 

75 objs : list 

76 Series or DataFrame objects, may be mix of the two. 

77 intersect : bool, default False 

78 If True, calculate the intersection between indexes. Otherwise, 

79 calculate the union. 

80 axis : {0 or 'index', 1 or 'outer'}, default 0 

81 The axis to extract indexes from. 

82 sort : bool, default True 

83 Whether the result index should come out sorted or not. 

84 

85 Returns 

86 ------- 

87 Index 

88 """ 

89 obs_idxes = [obj._get_axis(axis) for obj in objs] 

90 return _get_combined_index(obs_idxes, intersect=intersect, sort=sort) 

91 

92 

93def _get_distinct_objs(objs: List[Index]) -> List[Index]: 

94 """ 

95 Return a list with distinct elements of "objs" (different ids). 

96 Preserves order. 

97 """ 

98 ids: Set[int] = set() 

99 res = [] 

100 for obj in objs: 

101 if id(obj) not in ids: 

102 ids.add(id(obj)) 

103 res.append(obj) 

104 return res 

105 

106 

107def _get_combined_index( 

108 indexes: List[Index], intersect: bool = False, sort: bool = False 

109) -> Index: 

110 """ 

111 Return the union or intersection of indexes. 

112 

113 Parameters 

114 ---------- 

115 indexes : list of Index or list objects 

116 When intersect=True, do not accept list of lists. 

117 intersect : bool, default False 

118 If True, calculate the intersection between indexes. Otherwise, 

119 calculate the union. 

120 sort : bool, default False 

121 Whether the result index should come out sorted or not. 

122 

123 Returns 

124 ------- 

125 Index 

126 """ 

127 # TODO: handle index names! 

128 indexes = _get_distinct_objs(indexes) 

129 if len(indexes) == 0: 

130 index = Index([]) 

131 elif len(indexes) == 1: 

132 index = indexes[0] 

133 elif intersect: 

134 index = indexes[0] 

135 for other in indexes[1:]: 

136 index = index.intersection(other) 

137 else: 

138 index = union_indexes(indexes, sort=sort) 

139 index = ensure_index(index) 

140 

141 if sort: 

142 try: 

143 index = index.sort_values() 

144 except TypeError: 

145 pass 

146 return index 

147 

148 

149def union_indexes(indexes, sort=True) -> Index: 

150 """ 

151 Return the union of indexes. 

152 

153 The behavior of sort and names is not consistent. 

154 

155 Parameters 

156 ---------- 

157 indexes : list of Index or list objects 

158 sort : bool, default True 

159 Whether the result index should come out sorted or not. 

160 

161 Returns 

162 ------- 

163 Index 

164 """ 

165 if len(indexes) == 0: 

166 raise AssertionError("Must have at least 1 Index to union") 

167 if len(indexes) == 1: 

168 result = indexes[0] 

169 if isinstance(result, list): 

170 result = Index(sorted(result)) 

171 return result 

172 

173 indexes, kind = _sanitize_and_check(indexes) 

174 

175 def _unique_indices(inds) -> Index: 

176 """ 

177 Convert indexes to lists and concatenate them, removing duplicates. 

178 

179 The final dtype is inferred. 

180 

181 Parameters 

182 ---------- 

183 inds : list of Index or list objects 

184 

185 Returns 

186 ------- 

187 Index 

188 """ 

189 

190 def conv(i): 

191 if isinstance(i, Index): 

192 i = i.tolist() 

193 return i 

194 

195 return Index(lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort)) 

196 

197 if kind == "special": 

198 result = indexes[0] 

199 

200 if hasattr(result, "union_many"): 

201 # DatetimeIndex 

202 return result.union_many(indexes[1:]) 

203 else: 

204 for other in indexes[1:]: 

205 result = result.union(other) 

206 return result 

207 elif kind == "array": 

208 index = indexes[0] 

209 for other in indexes[1:]: 

210 if not index.equals(other): 

211 return _unique_indices(indexes) 

212 

213 name = get_consensus_names(indexes)[0] 

214 if name != index.name: 

215 index = index._shallow_copy(name=name) 

216 return index 

217 else: # kind='list' 

218 return _unique_indices(indexes) 

219 

220 

221def _sanitize_and_check(indexes): 

222 """ 

223 Verify the type of indexes and convert lists to Index. 

224 

225 Cases: 

226 

227 - [list, list, ...]: Return ([list, list, ...], 'list') 

228 - [list, Index, ...]: Return _sanitize_and_check([Index, Index, ...]) 

229 Lists are sorted and converted to Index. 

230 - [Index, Index, ...]: Return ([Index, Index, ...], TYPE) 

231 TYPE = 'special' if at least one special type, 'array' otherwise. 

232 

233 Parameters 

234 ---------- 

235 indexes : list of Index or list objects 

236 

237 Returns 

238 ------- 

239 sanitized_indexes : list of Index or list objects 

240 type : {'list', 'array', 'special'} 

241 """ 

242 kinds = list({type(index) for index in indexes}) 

243 

244 if list in kinds: 

245 if len(kinds) > 1: 

246 indexes = [ 

247 Index(com.try_sort(x)) if not isinstance(x, Index) else x 

248 for x in indexes 

249 ] 

250 kinds.remove(list) 

251 else: 

252 return indexes, "list" 

253 

254 if len(kinds) > 1 or Index not in kinds: 

255 return indexes, "special" 

256 else: 

257 return indexes, "array" 

258 

259 

260def get_consensus_names(indexes): 

261 """ 

262 Give a consensus 'names' to indexes. 

263 

264 If there's exactly one non-empty 'names', return this, 

265 otherwise, return empty. 

266 

267 Parameters 

268 ---------- 

269 indexes : list of Index objects 

270 

271 Returns 

272 ------- 

273 list 

274 A list representing the consensus 'names' found. 

275 """ 

276 # find the non-none names, need to tupleify to make 

277 # the set hashable, then reverse on return 

278 consensus_names = {tuple(i.names) for i in indexes if com.any_not_none(*i.names)} 

279 if len(consensus_names) == 1: 

280 return list(list(consensus_names)[0]) 

281 return [None] * indexes[0].nlevels 

282 

283 

284def all_indexes_same(indexes): 

285 """ 

286 Determine if all indexes contain the same elements. 

287 

288 Parameters 

289 ---------- 

290 indexes : list of Index objects 

291 

292 Returns 

293 ------- 

294 bool 

295 True if all indexes contain the same elements, False otherwise. 

296 """ 

297 first = indexes[0] 

298 for index in indexes[1:]: 

299 if not first.equals(index): 

300 return False 

301 return True