Coverage for src/pdfbaker/document.py: 89%

123 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-20 14:42 +1200

1"""PDFBakerDocument class. 

2 

3Document-level processing, variants, custom bake modules. 

4 

5Delegates the jobs of rendering and converting to its pages, 

6combines and compresses the result and reports back to its baker. 

7""" 

8 

9import importlib 

10import os 

11from pathlib import Path 

12from typing import Any 

13 

14from .config import ( 

15 PDFBakerConfiguration, 

16 deep_merge, 

17 render_config, 

18) 

19from .errors import ( 

20 ConfigurationError, 

21 PDFBakerError, 

22 PDFCombineError, 

23 PDFCompressionError, 

24) 

25from .logging import LoggingMixin 

26from .page import PDFBakerPage 

27from .pdf import ( 

28 combine_pdfs, 

29 compress_pdf, 

30) 

31 

32DEFAULT_DOCUMENT_CONFIG = { 

33 # Default to directories relative to the config file 

34 "directories": { 

35 "pages": "pages", 

36 "templates": "templates", 

37 "images": "images", 

38 }, 

39} 

40DEFAULT_DOCUMENT_CONFIG_FILE = "config.yaml" 

41 

42__all__ = ["PDFBakerDocument"] 

43 

44 

45class PDFBakerDocument(LoggingMixin): 

46 """A document being processed.""" 

47 

48 class Configuration(PDFBakerConfiguration): 

49 """PDFBaker document-specific configuration.""" 

50 

51 def __init__( 

52 self, 

53 document: "PDFBakerDocument", 

54 base_config: "PDFBakerConfiguration", # type: ignore # noqa: F821 

55 config_path: Path, 

56 ) -> None: 

57 """Initialize document configuration. 

58 

59 Args: 

60 base_config: The PDFBaker configuration to merge with 

61 config_file: The document configuration (YAML file) 

62 """ 

63 self.document = document 

64 

65 if config_path.is_dir(): 

66 self.name = config_path.name 

67 config_path = config_path / DEFAULT_DOCUMENT_CONFIG_FILE 

68 else: 

69 self.name = config_path.stem 

70 

71 base_config = deep_merge(base_config, DEFAULT_DOCUMENT_CONFIG) 

72 

73 self.document.log_trace_section( 

74 "Loading document configuration: %s", config_path 

75 ) 

76 super().__init__(base_config, config_path) 

77 self.document.log_trace(self.pretty()) 

78 

79 self.bake_path = self["directories"]["config"] / "bake.py" 

80 self.build_dir = self["directories"]["build"] / self.name 

81 self.dist_dir = self["directories"]["dist"] / self.name 

82 

83 # The "pages" may be defined in the variants rather than 

84 # the document itself (when different variants have different pages) 

85 if "pages" not in self: 

86 if "variants" in self: 

87 # A variant not defining pages will fail to process 

88 self.document.log_debug( 

89 'Pages of document "%s" will be determined per variant', 

90 self.name, 

91 ) 

92 else: 

93 self.document.log_warning( 

94 f'Document "{self.name}" has neither "pages" nor "variants"' 

95 ) 

96 raise ConfigurationError( 

97 f'Cannot determine pages of document "{self.name}"' 

98 ) 

99 # Actual pages will be determined during processing 

100 self.pages = [] 

101 

102 def determine_pages(self, config: dict[str, Any]) -> list[Path]: 

103 """Determine pages for the give (document/variant) configuration.""" 

104 if "pages" not in config: 

105 raise ConfigurationError(f'Cannot determine pages for "{self.name}"') 

106 pages = [] 

107 for page_spec in config["pages"]: 

108 if isinstance(page_spec, dict) and "path" in page_spec: 

109 # Path was specified: relative to this config file 

110 page = self.resolve_path( 

111 page_spec["path"], directory=config["directories"]["config"] 

112 ) 

113 else: 

114 # Only name was specified: relative to the pages directory 

115 page = self.resolve_path( 

116 page_spec, directory=config["directories"]["pages"] 

117 ) 

118 if not page.suffix: 

119 page = page.with_suffix(".yaml") 

120 pages.append(page) 

121 self.pages = pages 

122 

123 def __init__( 

124 self, 

125 baker: "PDFBaker", # type: ignore # noqa: F821 

126 base_config: dict[str, Any], 

127 config_path: Path, 

128 ): 

129 """Initialize a document.""" 

130 super().__init__() 

131 self.baker = baker 

132 self.config = self.Configuration( 

133 document=self, 

134 base_config=base_config, 

135 config_path=config_path, 

136 ) 

137 

138 def process_document(self) -> tuple[Path | list[Path] | None, str | None]: 

139 """Process the document - use custom bake module if it exists. 

140 

141 Returns: 

142 Tuple of (pdf_files, error_message) where: 

143 - pdf_files is a Path or list of Paths to the created PDF 

144 files, or None if creation failed 

145 FIXME: could have created SOME PDF files 

146 - error_message is a string describing the error, or None if successful 

147 """ 

148 self.log_info_section('Processing document "%s"...', self.config.name) 

149 

150 self.config.build_dir.mkdir(parents=True, exist_ok=True) 

151 self.config.dist_dir.mkdir(parents=True, exist_ok=True) 

152 

153 try: 

154 if self.config.bake_path.exists(): 

155 return self._process_with_custom_bake(self.config.bake_path), None 

156 return self.process(), None 

157 except PDFBakerError as exc: 

158 return None, str(exc) 

159 

160 def _process_with_custom_bake(self, bake_path: Path) -> Path | list[Path]: 

161 """Process document using custom bake module.""" 

162 try: 

163 spec = importlib.util.spec_from_file_location( 

164 f"documents.{self.config.name}.bake", bake_path 

165 ) 

166 if spec is None or spec.loader is None: 

167 raise PDFBakerError( 

168 f"Failed to load bake module for document {self.config.name}" 

169 ) 

170 module = importlib.util.module_from_spec(spec) 

171 spec.loader.exec_module(module) 

172 return module.process_document(document=self) 

173 except Exception as exc: 

174 raise PDFBakerError( 

175 f"Failed to process document with custom bake: {exc}" 

176 ) from exc 

177 

178 def process(self) -> Path | list[Path]: 

179 """Process document using standard processing.""" 

180 if "variants" in self.config: 

181 # Multiple PDF documents 

182 pdf_files = [] 

183 for variant in self.config["variants"]: 

184 self.log_info_subsection('Processing variant "%s"...', variant["name"]) 

185 variant_config = deep_merge(self.config, variant) 

186 #self.log_trace(variant_config) 

187 variant_config["variant"] = variant 

188 variant_config = render_config(variant_config) 

189 page_pdfs = self._process_pages(variant_config) 

190 pdf_files.append(self._finalize(page_pdfs, variant_config)) 

191 return pdf_files 

192 

193 # Single PDF document 

194 doc_config = render_config(self.config) 

195 page_pdfs = self._process_pages(doc_config) 

196 return self._finalize(page_pdfs, doc_config) 

197 

198 def _process_pages(self, config: dict[str, Any]) -> list[Path]: 

199 """Process pages with given configuration.""" 

200 self.config.determine_pages(config) 

201 self.log_debug_subsection("Pages to process:") 

202 self.log_debug(self.config.pages) 

203 pdf_files = [] 

204 for page_num, page_config in enumerate(self.config.pages, start=1): 

205 page = PDFBakerPage( 

206 document=self, 

207 page_number=page_num, 

208 base_config=config, 

209 config_path=page_config, 

210 ) 

211 pdf_files.append(page.process()) 

212 

213 return pdf_files 

214 

215 def _finalize(self, pdf_files: list[Path], doc_config: dict[str, Any]) -> Path: 

216 """Combine PDF pages and optionally compress.""" 

217 self.log_debug_subsection("Finalizing document...") 

218 self.log_debug("Combining PDF pages...") 

219 try: 

220 combined_pdf = combine_pdfs( 

221 pdf_files, 

222 self.config.build_dir / f"{doc_config['filename']}.pdf", 

223 ) 

224 except PDFCombineError as exc: 

225 raise PDFBakerError(f"Failed to combine PDFs: {exc}") from exc 

226 

227 output_path = self.config.dist_dir / f"{doc_config['filename']}.pdf" 

228 

229 if doc_config.get("compress_pdf", False): 

230 self.log_debug("Compressing PDF document...") 

231 try: 

232 compress_pdf(combined_pdf, output_path) 

233 self.log_info("PDF compressed successfully") 

234 except PDFCompressionError as exc: 

235 self.log_warning( 

236 "Compression failed, using uncompressed PDF: %s", 

237 exc, 

238 ) 

239 os.rename(combined_pdf, output_path) 

240 else: 

241 os.rename(combined_pdf, output_path) 

242 

243 self.log_info("Created %s", output_path.name) 

244 return output_path 

245 

246 def teardown(self) -> None: 

247 """Clean up build directory after processing.""" 

248 self.log_debug_subsection( 

249 "Tearing down build directory: %s", self.config.build_dir 

250 ) 

251 if self.config.build_dir.exists(): 

252 self.log_debug("Removing files in build directory...") 

253 for file_path in self.config.build_dir.iterdir(): 

254 if file_path.is_file(): 

255 file_path.unlink() 

256 

257 try: 

258 self.log_debug("Removing build directory...") 

259 self.config.build_dir.rmdir() 

260 except OSError: 

261 self.log_warning("Build directory not empty - not removing")