Coverage for src/edwh_restic_plugin/repositories/__init__.py: 51%

199 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-10 20:54 +0100

1import abc 

2import datetime 

3import heapq 

4import importlib 

5import importlib.util 

6import io 

7import os 

8import re 

9import sys 

10import typing 

11from collections import OrderedDict, defaultdict 

12from pathlib import Path 

13 

14import invoke 

15from invoke import Context 

16from print_color import print # fixme: replace with termcolor 

17from tqdm import tqdm 

18from typing_extensions import NotRequired 

19 

20from ..env import DOTENV, check_env, read_dotenv 

21from ..helpers import camel_to_snake, fix_tags 

22 

23# the path where the restic command is going to be executed 

24DEFAULT_BACKUP_FOLDER = Path("captain-hooks") 

25 

26 

27class Repository(abc.ABC): 

28 # _targets: a list of file and directory paths that should be included in the backup. 

29 _targets = [".env", "./backup"] 

30 # _excluded: a list of file and directory paths that should be excluded from the backup. 

31 _excluded = [ 

32 ".git", 

33 ".idea", 

34 "backups", 

35 "*.pyc", 

36 "*.bak", 

37 "../", 

38 "./..", 

39 "errors", 

40 "sessions", 

41 "__pycache__", 

42 ] 

43 

44 _env_path: Path 

45 env_config: dict[str, str] 

46 

47 def __init__(self, env_path: Path = DOTENV) -> None: 

48 super().__init__() 

49 print("repo init", self) 

50 self._env_path = env_path 

51 self.env_config = env = read_dotenv(env_path) 

52 os.environ |= env 

53 self._restichostname = env.get("RESTICHOSTNAME") # or None if it is not there 

54 

55 @property 

56 def uri(self): 

57 """Return the prefix required for restic to indicate the protocol, for example sftp:hostname:""" 

58 raise NotImplementedError("Prefix unknown in base class") 

59 

60 @abc.abstractmethod 

61 def setup(self): 

62 """Ensure that the settings are in the .env file""" 

63 raise NotImplementedError("Setup undefined") 

64 

65 @abc.abstractmethod 

66 def prepare_for_restic(self, c): 

67 """No environment variables need to be defined for local""" 

68 raise NotImplementedError("Prepare for restic undefined") 

69 # prepare_for_restic implementations should probably start with: 

70 # env = read_dotenv(DOTENV) 

71 # os.environ.update(env) 

72 

73 def check_env( 

74 self, 

75 key: str, 

76 default: str | None, 

77 comment: str, 

78 prefix: str = None, 

79 suffix: str = None, 

80 postfix: str = None, 

81 path: Path = None, 

82 ): 

83 return check_env( 

84 key=key, 

85 default=default, 

86 comment=comment, 

87 prefix=prefix, 

88 suffix=suffix, 

89 postfix=postfix, 

90 path=path or self._env_path, 

91 ) 

92 

93 def configure(self, c): 

94 """Configure the backup environment variables.""" 

95 self.prepare_for_restic(c) 

96 print("configure") 

97 # First, make sure restic is up-to-date 

98 c.run("sudo restic self-update", hide=True, warn=True) 

99 # This is the command used to configure the environment variables properly. 

100 c.run(f"restic init --repository-version 2 -r {self.uri}") 

101 

102 @property 

103 def hostarg(self): 

104 """Return the host argument for restic command.""" 

105 return f" --host {self._restichostname} " if self._restichostname else "" 

106 

107 @property 

108 def targets(self): 

109 """Return the target files and directories for the backup.""" 

110 return " ".join(self._targets) 

111 

112 @property 

113 def excluded(self): 

114 """Return the excluded files and directories for the backup. 

115 Here comes the files that are going to be excluded""" 

116 return " --exclude ".join(self._excluded) 

117 

118 @staticmethod 

119 def get_snapshot_from(stdout: str) -> str: 

120 """ 

121 Parses the stdout from a Restic command to extract the snapshot ID. 

122 

123 Args: 

124 - stdout (str): The stdout output from a Restic command. 

125 

126 Returns: 

127 - The snapshot ID as a string. 

128 """ 

129 snapshots_ids = re.findall(r"snapshot (.*?) saved", stdout) 

130 return snapshots_ids[-1] if snapshots_ids else None 

131 

132 @staticmethod 

133 def get_scripts(target, verb): 

134 """Retrieves the scripts that contain a restic command and returns them to 'execute_files' to execute them. 

135 

136 Args: 

137 - target (str): target is a string that specifies the target of the backup, can be a file, stream, directory, 

138 or any other object that needs to be backed up. 

139 - verb (str): is also a string that specifies the action to be performed on the target. 

140 For example, the verb could be "backup" or "restore". The verb is used in combination with the target to 

141 search for the backup script files that contain the restic command. 

142 """ 

143 # get files by verb and target. EXAMPLE backup_files_*.sh 

144 files = [str(file) for file in DEFAULT_BACKUP_FOLDER.glob(f"{verb}_{target}*")] 

145 # check if no files are found 

146 if not files: 

147 print("no files found with target:", target) 

148 sys.exit(255) 

149 

150 return files 

151 

152 def execute_files( 

153 self, 

154 c: Context, 

155 target: str, 

156 verb: str, 

157 verbose: bool, 

158 message: str = None, 

159 snapshot: str = "latest", 

160 ): 

161 """ 

162 Executes the backup scripts retrieved by 'get_scripts' function. 

163 

164 Args: 

165 - verbose (bool): A flag indicating whether to display verbose output. 

166 - target (str): The target of the backup. 

167 - verb (str): The verb associated with the backup. 

168 - message (str, optional): The message to be associated with the backup. 

169 If not provided, the current local time is used. Defaults to None. 

170 - snapshot (str, optional): The snapshot to be used for the backup. Defaults to "latest". 

171 """ 

172 self.prepare_for_restic(c) 

173 

174 # set snapshot available in environment for sh files 

175 os.environ["SNAPSHOT"] = snapshot 

176 

177 # Here you can make a message that you will see in the snapshots list 

178 if message is None: 

179 # If no message is provided, use the current local time as the backup message 

180 message = f"{datetime.datetime.now()} localtime" 

181 

182 # set MSG in environment for sh files 

183 os.environ["MSG"] = message 

184 

185 # get files by target and verb. see self.get_scripts for more info 

186 files = self.get_scripts(target, verb) 

187 

188 snapshots_created = [] 

189 file_codes = [] 

190 # run all backup/restore files 

191 for file in tqdm(files): 

192 if verbose: 

193 print("\033[1m running", file, "\033[0m") 

194 

195 # run the script by default with pty=True, 

196 # when the script crashes run the script again but then grab the stdout 

197 

198 try: 

199 print(f"{file} output:") 

200 ran_script: invoke.runners.Result = c.run(file, hide=True, pty=True) 

201 file_codes.append(0) 

202 except invoke.exceptions.UnexpectedExit as e: 

203 ran_script = e.result 

204 file_codes.append(e.result.exited) 

205 

206 if verbose: 

207 print(f"{file} output:") 

208 if ran_script.stdout: 

209 print(f"stdout:{ran_script.stdout}") 

210 else: 

211 print("no output found!") 

212 

213 snapshot = self.get_snapshot_from(ran_script.stdout) 

214 snapshots_created.append(snapshot) 

215 

216 # send message with backup. see message for more info 

217 # also if a tag in tags is None it will be removed by fix_tags 

218 if verb != "restore": 

219 tags = fix_tags(["message", *snapshots_created]) 

220 c.run( 

221 f"restic {self.hostarg} -r {self.uri} backup --tag {','.join(tags)} --stdin --stdin-filename message", 

222 in_stream=io.StringIO(message), 

223 hide=True, 

224 ) 

225 

226 print("\n\nfile status codes:") 

227 

228 for idx in range(len(file_codes)): 

229 if file_codes[idx] == 0: 

230 print(files[idx], tag="success", tag_color="green") 

231 else: 

232 print("in", files[idx], tag="failure", tag_color="red") 

233 

234 if worst_status_code := max(file_codes) > 0: 

235 exit(worst_status_code) 

236 

237 def backup(self, c, verbose: bool, target: str, message: str): 

238 """ 

239 Backs up the specified target. 

240 

241 Args: 

242 - verbose (bool): A flag indicating whether to display verbose output. 

243 - target (str): The target of the backup (e.g. 'files', 'stream'; default is all types). 

244 - verb (str): The verb associated with the backup. 

245 - message (str): The message to be associated with the backup. 

246 """ 

247 self.execute_files(c, target, "backup", verbose, message) 

248 

249 def restore(self, c, verbose: bool, target: str, snapshot: str = "latest"): 

250 """ 

251 Restores the specified target using the specified snapshot or the latest if None is given. 

252 

253 Args: 

254 - verbose (bool): A flag indicating whether to display verbose output. 

255 - target (str): The target of the restore. 

256 - verb (str): The verb associated with the restore. 

257 - snapshot (str, optional): The snapshot to be used for the restore. Defaults to "latest". 

258 """ 

259 self.execute_files(c, target, "restore", verbose, snapshot=snapshot) 

260 

261 def check(self, c): 

262 """ 

263 Checks the integrity of the backup repository. 

264 """ 

265 self.prepare_for_restic(c) 

266 c.run(f"restic {self.hostarg} -r {self.uri} check --read-data") 

267 

268 def snapshot(self, c: Context, tags: list[str] = None, n: int = 2, verbose: bool = False): 

269 """ 

270 a list of all the backups with a message 

271 

272 Args: 

273 - tags (list, optional): A list of tags to use for the snapshot. Defaults to None. 

274 - n (int, optional): The number of latest snapshots to show. Defaults to 2. 

275 - verbose (bool): Show more info about what's happening? 

276 

277 Returns: 

278 None. This function only prints the output to the console. 

279 """ 

280 # choose to see only the files or the stream snapshots 

281 if tags is None: 

282 tags = ["files", "stream"] 

283 

284 self.prepare_for_restic(c) 

285 tags = "--tag " + " --tag ".join(tags) if tags else "" 

286 command = f"restic {self.hostarg} -r {self.uri} snapshots --latest {n} {tags} -c" 

287 if verbose: 

288 print("$", command, file=sys.stderr) 

289 

290 stdout = c.run( 

291 command, 

292 hide=True, 

293 ).stdout 

294 

295 if verbose: 

296 print(stdout, file=sys.stderr) 

297 

298 snapshot_lines = re.findall(r"^([0-9a-z]{8})\s", stdout, re.MULTILINE) 

299 main_tag_per_snapshot = { 

300 snapshot: re.findall(rf"^{snapshot}.*?(\w*)$", stdout, re.MULTILINE) 

301 for snapshot in snapshot_lines 

302 # snapshot: re.findall(rf"^{snapshot}", stdout) for snapshot in snapshots 

303 } 

304 

305 message_snapshot_per_snapshot = defaultdict(list) # key is source, value is snapshot containing the message 

306 for snapshot, possible_tag_names in main_tag_per_snapshot.items(): 

307 tag_name = possible_tag_names[0] 

308 if tag_name != "message": 

309 continue 

310 for _, is_message_for_snapshot_id in re.findall(rf"\n{snapshot}.*(\n\s+(.*)\n)+", stdout): 

311 message_snapshot_per_snapshot[is_message_for_snapshot_id].append(snapshot) 

312 

313 for snapshot, message_snapshots in message_snapshot_per_snapshot.items(): 

314 # print all Restic messages 

315 command = f"restic {self.hostarg} -r {self.uri} dump {message_snapshots[0]} --tag message message" 

316 if verbose: 

317 print("$", command, file=sys.stderr) 

318 

319 restore_output = c.run( 

320 command, 

321 hide=True, 

322 warn=True, 

323 ).stdout 

324 

325 if verbose: 

326 print(restore_output, file=sys.stderr) 

327 

328 message = restore_output.strip() 

329 stdout = re.sub(rf"\n{snapshot}(.*)\n", rf"\n{snapshot}\1 : [{message}]\n", stdout) 

330 

331 if verbose: 

332 print("---\n", file=sys.stderr) 

333 

334 print(stdout) 

335 

336 # noop gt, lt etc methods 

337 

338 def __gt__(self, other): 

339 return False 

340 

341 def __lt__(self, other): 

342 return False 

343 

344 

345class RepositoryRegistration(typing.TypedDict): 

346 short_name: str | None 

347 aliases: NotRequired[tuple[str, ...]] 

348 priority: NotRequired[int] 

349 

350 

351class RepositoryRegistrations: 

352 def __init__(self): 

353 # _queue is for internal use by heapq only! 

354 # external api should use .queue !!! 

355 self._queue: list[tuple[int, typing.Type[Repository], RepositoryRegistration]] = [] 

356 # aliases stores a reference for each name to the Repo class 

357 self._aliases: dict[str, typing.Type[Repository]] = {} 

358 

359 def push(self, repo: typing.Type[Repository], settings: RepositoryRegistration): 

360 priority = settings.get("priority", -1) 

361 if priority < 0: 

362 priority = sys.maxsize - priority # very high int 

363 

364 heapq.heappush(self._queue, (priority, repo, settings)) 

365 self._aliases[settings["short_name"]] = repo 

366 for alias in settings.get('aliases'): 

367 self._aliases[alias] = repo 

368 

369 @property 

370 def queue(self): 

371 if not self._queue: 

372 self._find_items() 

373 

374 return self._queue 

375 

376 def clear(self): 

377 self._queue = [] 

378 self._aliases = {} 

379 

380 def to_sorted_list(self): 

381 # No need for sorting here; heapq maintains the heap property 

382 return list(self) 

383 

384 def to_ordered_dict(self) -> OrderedDict[str, typing.Type[Repository]]: 

385 ordered_dict = OrderedDict() 

386 for _, item, settings in self.queue: 

387 ordered_dict[settings["short_name"]] = item 

388 return ordered_dict 

389 

390 def __iter__(self) -> typing.Generator[typing.Type[Repository], None, None]: 

391 return (item[1] for item in self.queue) 

392 

393 def __bool__(self): 

394 return bool(self.queue) 

395 

396 def _find_items(self) -> None: 

397 # import all registrations in this folder, so @register adds them to _queue 

398 package_path = Path(__file__).resolve().parent 

399 

400 for file_path in package_path.glob("*.py"): 

401 pkg = file_path.stem 

402 if not pkg.startswith("__"): 

403 importlib.import_module(f".{pkg}", package=__name__) 

404 

405 

406def register( 

407 short_name: str = None, 

408 aliases: tuple[str, ...] = (), 

409 priority: int = -1, 

410 # **settings: Unpack[RepositoryRegistration] # <- not really supported yet! 

411) -> typing.Callable[[typing.Type[Repository]], typing.Type[Repository]]: 

412 if isinstance(short_name, type): 

413 raise SyntaxError("Please call @register() with parentheses!") 

414 

415 def wraps(cls: typing.Type[Repository]) -> typing.Type[Repository]: 

416 if not (isinstance(cls, type) and issubclass(cls, Repository)): 

417 raise TypeError(f"Decorated class {cls} must be a subclass of Repository!") 

418 

419 settings: RepositoryRegistration = { 

420 "short_name": short_name or camel_to_snake(cls.__name__).removesuffix("_repository"), 

421 "aliases": aliases, 

422 "priority": priority, 

423 } 

424 

425 registrations.push(cls, settings) 

426 return cls 

427 

428 return wraps 

429 

430 

431registrations = RepositoryRegistrations()