Coverage for src/extratools_core/jsontools.py: 77%
132 statements
« prev ^ index » next coverage.py v7.8.1, created at 2025-06-24 04:41 -0700
« prev ^ index » next coverage.py v7.8.1, created at 2025-06-24 04:41 -0700
1import json
2import re
3from csv import DictWriter
4from io import StringIO
5from pathlib import Path
6from re import Match, Pattern
7from types import NoneType
8from typing import Any, TypedDict
10from toolz.itertoolz import groupby
12type JsonDict = dict[str, Any]
14type DictOfJsonDicts = dict[str, JsonDict]
15type ListOfJsonDicts = list[JsonDict]
18class DictOfJsonDictsDiffUpdate(TypedDict):
19 old: JsonDict
20 new: JsonDict
23class DictOfJsonDictsDiff(TypedDict):
24 deletes: dict[str, JsonDict]
25 inserts: dict[str, JsonDict]
26 updates: dict[str, DictOfJsonDictsDiffUpdate]
29class ListOfJsonDictsDiff(TypedDict):
30 deletes: list[JsonDict]
31 inserts: list[JsonDict]
34def flatten(data: Any) -> Any:
35 def flatten_rec(data: Any, path: str) -> None:
36 if isinstance(data, dict):
37 for k, v in data.items():
38 flatten_rec(v, path + (f".{k}" if path else k))
39 elif isinstance(data, list):
40 for i, v in enumerate(data):
41 flatten_rec(v, path + f"[{i}]")
42 else:
43 flatten_dict[path or "."] = data
45 flatten_dict: JsonDict = {}
46 flatten_rec(data, "")
47 return flatten_dict
50def json_to_csv(
51 data: DictOfJsonDicts | ListOfJsonDicts,
52 /,
53 csv_path: Path | str | None = None,
54 *,
55 key_field_name: str = "_key",
56) -> str:
57 if isinstance(data, dict):
58 data = [
59 {
60 # In case there is already a key field in each record,
61 # the new key field will be overwritten.
62 # It is okay though as the existing key field is likely
63 # serving the purpose of containing keys.
64 key_field_name: key,
65 **value,
66 }
67 for key, value in data.items()
68 ]
70 fields: set[str] = set()
71 for record in data:
72 fields.update(record.keys())
74 sio = StringIO()
76 writer = DictWriter(sio, fieldnames=fields)
77 writer.writeheader()
78 writer.writerows(data)
80 csv_str: str = sio.getvalue()
82 if csv_path:
83 Path(csv_path).write_text(csv_str)
85 return csv_str
88def dict_of_json_dicts_diff(
89 old: DictOfJsonDicts,
90 new: DictOfJsonDicts,
91) -> DictOfJsonDictsDiff:
92 inserts: dict[str, JsonDict] = {}
93 updates: dict[str, DictOfJsonDictsDiffUpdate] = {}
95 for new_key, new_value in new.items():
96 old_value: dict[str, Any] | None = old.get(new_key, None)
97 if old_value is None:
98 inserts[new_key] = new_value
99 elif json.dumps(old_value) != json.dumps(new_value):
100 updates[new_key] = {
101 "old": old_value,
102 "new": new_value,
103 }
105 deletes: dict[str, JsonDict] = {
106 old_key: old_value
107 for old_key, old_value in old.items()
108 if old_key not in new
109 }
111 return {
112 "deletes": deletes,
113 "inserts": inserts,
114 "updates": updates,
115 }
118def list_of_json_dicts_diff(
119 old: ListOfJsonDicts,
120 new: ListOfJsonDicts,
121) -> ListOfJsonDictsDiff:
122 old_dict: DictOfJsonDicts = {
123 json.dumps(d): d
124 for d in old
125 }
126 new_dict: DictOfJsonDicts = {
127 json.dumps(d): d
128 for d in new
129 }
131 inserts: list[JsonDict] = [
132 new_value
133 for new_key, new_value in new_dict.items()
134 if new_key not in old_dict
135 ]
136 deletes: list[JsonDict] = [
137 old_value
138 for old_key, old_value in old_dict.items()
139 if old_key not in new_dict
140 ]
142 return {
143 "deletes": deletes,
144 "inserts": inserts,
145 }
148def merge_json(
149 *values: Any,
150 concat_lists: bool = True,
151) -> Any:
152 def merge_json_dicts(*jds: JsonDict) -> JsonDict:
153 groups: dict[str, list[JsonDict]] = groupby(
154 lambda kv_tuple: kv_tuple[0],
155 (
156 kv_tuple
157 for jd in jds
158 for kv_tuple in jd.items()
159 ),
160 )
162 return {
163 key: merge_json(
164 *[value for _, value in kv_tuples],
165 concat_lists=concat_lists,
166 )
167 for key, kv_tuples in groups.items()
168 }
170 first_value_type: type | None = None
172 not_none_values = []
174 for value in values:
175 value_type: type = type(value)
176 if value_type is NoneType:
177 continue
179 if first_value_type is None:
180 first_value_type = value_type
181 elif first_value_type != value_type:
182 raise ValueError
184 not_none_values.append(value)
186 if first_value_type is None or first_value_type is NoneType:
187 return None
189 if first_value_type is dict:
190 return merge_json_dicts(*not_none_values)
192 if first_value_type is list and concat_lists:
193 return [
194 item
195 for value in not_none_values
196 for item in value
197 ]
199 return not_none_values[-1]
202__PATH_PATTERN: Pattern = re.compile(r"(?:\.(?P<field>\w+)|\[(?P<index>[0-9]+)\])(?P<remaining>.*)")
205def get_by_path(data: Any, path: str) -> Any:
206 match: Match | None = __PATH_PATTERN.fullmatch(path)
207 if not match:
208 raise ValueError
210 new_data: Any
211 try:
212 if field := match.group("field"):
213 if not isinstance(data, dict):
214 raise LookupError
216 new_data = data[field]
217 elif index := match.group("index"):
218 if not isinstance(data, list):
219 raise LookupError
221 new_data = data[int(index)]
222 else:
223 # This should be unreachable
224 raise NotImplementedError
225 except (IndexError, KeyError) as e:
226 raise LookupError from e
228 remaining_path: str = match.group("remaining")
229 if remaining_path:
230 return get_by_path(new_data, remaining_path)
232 return new_data
235def set_by_path(data: Any, path: str, value: Any) -> None:
236 match: Match | None = __PATH_PATTERN.fullmatch(path)
237 if not match:
238 raise ValueError
240 remaining_path: str = match.group("remaining")
242 try:
243 if field := match.group("field"):
244 if not isinstance(data, dict):
245 raise LookupError
247 if field not in data and remaining_path:
248 data[field] = {}
250 if remaining_path:
251 set_by_path(data[field], remaining_path, value)
252 else:
253 data[field] = value
254 elif index := match.group("index"):
255 if not isinstance(data, list):
256 raise LookupError
258 index = int(index)
260 if remaining_path:
261 set_by_path(data[index], remaining_path, value)
262 else:
263 data[index] = value
264 else:
265 # This should be unreachable
266 raise NotImplementedError
267 except (IndexError, KeyError) as e:
268 raise LookupError from e