Coverage for src/extratools_core/jsontools.py: 77%

132 statements  

« prev     ^ index     » next       coverage.py v7.8.1, created at 2025-06-24 04:41 -0700

1import json 

2import re 

3from csv import DictWriter 

4from io import StringIO 

5from pathlib import Path 

6from re import Match, Pattern 

7from types import NoneType 

8from typing import Any, TypedDict 

9 

10from toolz.itertoolz import groupby 

11 

12type JsonDict = dict[str, Any] 

13 

14type DictOfJsonDicts = dict[str, JsonDict] 

15type ListOfJsonDicts = list[JsonDict] 

16 

17 

18class DictOfJsonDictsDiffUpdate(TypedDict): 

19 old: JsonDict 

20 new: JsonDict 

21 

22 

23class DictOfJsonDictsDiff(TypedDict): 

24 deletes: dict[str, JsonDict] 

25 inserts: dict[str, JsonDict] 

26 updates: dict[str, DictOfJsonDictsDiffUpdate] 

27 

28 

29class ListOfJsonDictsDiff(TypedDict): 

30 deletes: list[JsonDict] 

31 inserts: list[JsonDict] 

32 

33 

34def flatten(data: Any) -> Any: 

35 def flatten_rec(data: Any, path: str) -> None: 

36 if isinstance(data, dict): 

37 for k, v in data.items(): 

38 flatten_rec(v, path + (f".{k}" if path else k)) 

39 elif isinstance(data, list): 

40 for i, v in enumerate(data): 

41 flatten_rec(v, path + f"[{i}]") 

42 else: 

43 flatten_dict[path or "."] = data 

44 

45 flatten_dict: JsonDict = {} 

46 flatten_rec(data, "") 

47 return flatten_dict 

48 

49 

50def json_to_csv( 

51 data: DictOfJsonDicts | ListOfJsonDicts, 

52 /, 

53 csv_path: Path | str | None = None, 

54 *, 

55 key_field_name: str = "_key", 

56) -> str: 

57 if isinstance(data, dict): 

58 data = [ 

59 { 

60 # In case there is already a key field in each record, 

61 # the new key field will be overwritten. 

62 # It is okay though as the existing key field is likely 

63 # serving the purpose of containing keys. 

64 key_field_name: key, 

65 **value, 

66 } 

67 for key, value in data.items() 

68 ] 

69 

70 fields: set[str] = set() 

71 for record in data: 

72 fields.update(record.keys()) 

73 

74 sio = StringIO() 

75 

76 writer = DictWriter(sio, fieldnames=fields) 

77 writer.writeheader() 

78 writer.writerows(data) 

79 

80 csv_str: str = sio.getvalue() 

81 

82 if csv_path: 

83 Path(csv_path).write_text(csv_str) 

84 

85 return csv_str 

86 

87 

88def dict_of_json_dicts_diff( 

89 old: DictOfJsonDicts, 

90 new: DictOfJsonDicts, 

91) -> DictOfJsonDictsDiff: 

92 inserts: dict[str, JsonDict] = {} 

93 updates: dict[str, DictOfJsonDictsDiffUpdate] = {} 

94 

95 for new_key, new_value in new.items(): 

96 old_value: dict[str, Any] | None = old.get(new_key, None) 

97 if old_value is None: 

98 inserts[new_key] = new_value 

99 elif json.dumps(old_value) != json.dumps(new_value): 

100 updates[new_key] = { 

101 "old": old_value, 

102 "new": new_value, 

103 } 

104 

105 deletes: dict[str, JsonDict] = { 

106 old_key: old_value 

107 for old_key, old_value in old.items() 

108 if old_key not in new 

109 } 

110 

111 return { 

112 "deletes": deletes, 

113 "inserts": inserts, 

114 "updates": updates, 

115 } 

116 

117 

118def list_of_json_dicts_diff( 

119 old: ListOfJsonDicts, 

120 new: ListOfJsonDicts, 

121) -> ListOfJsonDictsDiff: 

122 old_dict: DictOfJsonDicts = { 

123 json.dumps(d): d 

124 for d in old 

125 } 

126 new_dict: DictOfJsonDicts = { 

127 json.dumps(d): d 

128 for d in new 

129 } 

130 

131 inserts: list[JsonDict] = [ 

132 new_value 

133 for new_key, new_value in new_dict.items() 

134 if new_key not in old_dict 

135 ] 

136 deletes: list[JsonDict] = [ 

137 old_value 

138 for old_key, old_value in old_dict.items() 

139 if old_key not in new_dict 

140 ] 

141 

142 return { 

143 "deletes": deletes, 

144 "inserts": inserts, 

145 } 

146 

147 

148def merge_json( 

149 *values: Any, 

150 concat_lists: bool = True, 

151) -> Any: 

152 def merge_json_dicts(*jds: JsonDict) -> JsonDict: 

153 groups: dict[str, list[JsonDict]] = groupby( 

154 lambda kv_tuple: kv_tuple[0], 

155 ( 

156 kv_tuple 

157 for jd in jds 

158 for kv_tuple in jd.items() 

159 ), 

160 ) 

161 

162 return { 

163 key: merge_json( 

164 *[value for _, value in kv_tuples], 

165 concat_lists=concat_lists, 

166 ) 

167 for key, kv_tuples in groups.items() 

168 } 

169 

170 first_value_type: type | None = None 

171 

172 not_none_values = [] 

173 

174 for value in values: 

175 value_type: type = type(value) 

176 if value_type is NoneType: 

177 continue 

178 

179 if first_value_type is None: 

180 first_value_type = value_type 

181 elif first_value_type != value_type: 

182 raise ValueError 

183 

184 not_none_values.append(value) 

185 

186 if first_value_type is None or first_value_type is NoneType: 

187 return None 

188 

189 if first_value_type is dict: 

190 return merge_json_dicts(*not_none_values) 

191 

192 if first_value_type is list and concat_lists: 

193 return [ 

194 item 

195 for value in not_none_values 

196 for item in value 

197 ] 

198 

199 return not_none_values[-1] 

200 

201 

202__PATH_PATTERN: Pattern = re.compile(r"(?:\.(?P<field>\w+)|\[(?P<index>[0-9]+)\])(?P<remaining>.*)") 

203 

204 

205def get_by_path(data: Any, path: str) -> Any: 

206 match: Match | None = __PATH_PATTERN.fullmatch(path) 

207 if not match: 

208 raise ValueError 

209 

210 new_data: Any 

211 try: 

212 if field := match.group("field"): 

213 if not isinstance(data, dict): 

214 raise LookupError 

215 

216 new_data = data[field] 

217 elif index := match.group("index"): 

218 if not isinstance(data, list): 

219 raise LookupError 

220 

221 new_data = data[int(index)] 

222 else: 

223 # This should be unreachable 

224 raise NotImplementedError 

225 except (IndexError, KeyError) as e: 

226 raise LookupError from e 

227 

228 remaining_path: str = match.group("remaining") 

229 if remaining_path: 

230 return get_by_path(new_data, remaining_path) 

231 

232 return new_data 

233 

234 

235def set_by_path(data: Any, path: str, value: Any) -> None: 

236 match: Match | None = __PATH_PATTERN.fullmatch(path) 

237 if not match: 

238 raise ValueError 

239 

240 remaining_path: str = match.group("remaining") 

241 

242 try: 

243 if field := match.group("field"): 

244 if not isinstance(data, dict): 

245 raise LookupError 

246 

247 if field not in data and remaining_path: 

248 data[field] = {} 

249 

250 if remaining_path: 

251 set_by_path(data[field], remaining_path, value) 

252 else: 

253 data[field] = value 

254 elif index := match.group("index"): 

255 if not isinstance(data, list): 

256 raise LookupError 

257 

258 index = int(index) 

259 

260 if remaining_path: 

261 set_by_path(data[index], remaining_path, value) 

262 else: 

263 data[index] = value 

264 else: 

265 # This should be unreachable 

266 raise NotImplementedError 

267 except (IndexError, KeyError) as e: 

268 raise LookupError from e