Coverage for src/pdfbaker/document.py: 89%
123 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-20 14:42 +1200
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-20 14:42 +1200
1"""PDFBakerDocument class.
3Document-level processing, variants, custom bake modules.
5Delegates the jobs of rendering and converting to its pages,
6combines and compresses the result and reports back to its baker.
7"""
9import importlib
10import os
11from pathlib import Path
12from typing import Any
14from .config import (
15 PDFBakerConfiguration,
16 deep_merge,
17 render_config,
18)
19from .errors import (
20 ConfigurationError,
21 PDFBakerError,
22 PDFCombineError,
23 PDFCompressionError,
24)
25from .logging import LoggingMixin
26from .page import PDFBakerPage
27from .pdf import (
28 combine_pdfs,
29 compress_pdf,
30)
32DEFAULT_DOCUMENT_CONFIG = {
33 # Default to directories relative to the config file
34 "directories": {
35 "pages": "pages",
36 "templates": "templates",
37 "images": "images",
38 },
39}
40DEFAULT_DOCUMENT_CONFIG_FILE = "config.yaml"
42__all__ = ["PDFBakerDocument"]
45class PDFBakerDocument(LoggingMixin):
46 """A document being processed."""
48 class Configuration(PDFBakerConfiguration):
49 """PDFBaker document-specific configuration."""
51 def __init__(
52 self,
53 document: "PDFBakerDocument",
54 base_config: "PDFBakerConfiguration", # type: ignore # noqa: F821
55 config_path: Path,
56 ) -> None:
57 """Initialize document configuration.
59 Args:
60 base_config: The PDFBaker configuration to merge with
61 config_file: The document configuration (YAML file)
62 """
63 self.document = document
65 if config_path.is_dir():
66 self.name = config_path.name
67 config_path = config_path / DEFAULT_DOCUMENT_CONFIG_FILE
68 else:
69 self.name = config_path.stem
71 base_config = deep_merge(base_config, DEFAULT_DOCUMENT_CONFIG)
73 self.document.log_trace_section(
74 "Loading document configuration: %s", config_path
75 )
76 super().__init__(base_config, config_path)
77 self.document.log_trace(self.pretty())
79 self.bake_path = self["directories"]["config"] / "bake.py"
80 self.build_dir = self["directories"]["build"] / self.name
81 self.dist_dir = self["directories"]["dist"] / self.name
83 # The "pages" may be defined in the variants rather than
84 # the document itself (when different variants have different pages)
85 if "pages" not in self:
86 if "variants" in self:
87 # A variant not defining pages will fail to process
88 self.document.log_debug(
89 'Pages of document "%s" will be determined per variant',
90 self.name,
91 )
92 else:
93 self.document.log_warning(
94 f'Document "{self.name}" has neither "pages" nor "variants"'
95 )
96 raise ConfigurationError(
97 f'Cannot determine pages of document "{self.name}"'
98 )
99 # Actual pages will be determined during processing
100 self.pages = []
102 def determine_pages(self, config: dict[str, Any]) -> list[Path]:
103 """Determine pages for the give (document/variant) configuration."""
104 if "pages" not in config:
105 raise ConfigurationError(f'Cannot determine pages for "{self.name}"')
106 pages = []
107 for page_spec in config["pages"]:
108 if isinstance(page_spec, dict) and "path" in page_spec:
109 # Path was specified: relative to this config file
110 page = self.resolve_path(
111 page_spec["path"], directory=config["directories"]["config"]
112 )
113 else:
114 # Only name was specified: relative to the pages directory
115 page = self.resolve_path(
116 page_spec, directory=config["directories"]["pages"]
117 )
118 if not page.suffix:
119 page = page.with_suffix(".yaml")
120 pages.append(page)
121 self.pages = pages
123 def __init__(
124 self,
125 baker: "PDFBaker", # type: ignore # noqa: F821
126 base_config: dict[str, Any],
127 config_path: Path,
128 ):
129 """Initialize a document."""
130 super().__init__()
131 self.baker = baker
132 self.config = self.Configuration(
133 document=self,
134 base_config=base_config,
135 config_path=config_path,
136 )
138 def process_document(self) -> tuple[Path | list[Path] | None, str | None]:
139 """Process the document - use custom bake module if it exists.
141 Returns:
142 Tuple of (pdf_files, error_message) where:
143 - pdf_files is a Path or list of Paths to the created PDF
144 files, or None if creation failed
145 FIXME: could have created SOME PDF files
146 - error_message is a string describing the error, or None if successful
147 """
148 self.log_info_section('Processing document "%s"...', self.config.name)
150 self.config.build_dir.mkdir(parents=True, exist_ok=True)
151 self.config.dist_dir.mkdir(parents=True, exist_ok=True)
153 try:
154 if self.config.bake_path.exists():
155 return self._process_with_custom_bake(self.config.bake_path), None
156 return self.process(), None
157 except PDFBakerError as exc:
158 return None, str(exc)
160 def _process_with_custom_bake(self, bake_path: Path) -> Path | list[Path]:
161 """Process document using custom bake module."""
162 try:
163 spec = importlib.util.spec_from_file_location(
164 f"documents.{self.config.name}.bake", bake_path
165 )
166 if spec is None or spec.loader is None:
167 raise PDFBakerError(
168 f"Failed to load bake module for document {self.config.name}"
169 )
170 module = importlib.util.module_from_spec(spec)
171 spec.loader.exec_module(module)
172 return module.process_document(document=self)
173 except Exception as exc:
174 raise PDFBakerError(
175 f"Failed to process document with custom bake: {exc}"
176 ) from exc
178 def process(self) -> Path | list[Path]:
179 """Process document using standard processing."""
180 if "variants" in self.config:
181 # Multiple PDF documents
182 pdf_files = []
183 for variant in self.config["variants"]:
184 self.log_info_subsection('Processing variant "%s"...', variant["name"])
185 variant_config = deep_merge(self.config, variant)
186 #self.log_trace(variant_config)
187 variant_config["variant"] = variant
188 variant_config = render_config(variant_config)
189 page_pdfs = self._process_pages(variant_config)
190 pdf_files.append(self._finalize(page_pdfs, variant_config))
191 return pdf_files
193 # Single PDF document
194 doc_config = render_config(self.config)
195 page_pdfs = self._process_pages(doc_config)
196 return self._finalize(page_pdfs, doc_config)
198 def _process_pages(self, config: dict[str, Any]) -> list[Path]:
199 """Process pages with given configuration."""
200 self.config.determine_pages(config)
201 self.log_debug_subsection("Pages to process:")
202 self.log_debug(self.config.pages)
203 pdf_files = []
204 for page_num, page_config in enumerate(self.config.pages, start=1):
205 page = PDFBakerPage(
206 document=self,
207 page_number=page_num,
208 base_config=config,
209 config_path=page_config,
210 )
211 pdf_files.append(page.process())
213 return pdf_files
215 def _finalize(self, pdf_files: list[Path], doc_config: dict[str, Any]) -> Path:
216 """Combine PDF pages and optionally compress."""
217 self.log_debug_subsection("Finalizing document...")
218 self.log_debug("Combining PDF pages...")
219 try:
220 combined_pdf = combine_pdfs(
221 pdf_files,
222 self.config.build_dir / f"{doc_config['filename']}.pdf",
223 )
224 except PDFCombineError as exc:
225 raise PDFBakerError(f"Failed to combine PDFs: {exc}") from exc
227 output_path = self.config.dist_dir / f"{doc_config['filename']}.pdf"
229 if doc_config.get("compress_pdf", False):
230 self.log_debug("Compressing PDF document...")
231 try:
232 compress_pdf(combined_pdf, output_path)
233 self.log_info("PDF compressed successfully")
234 except PDFCompressionError as exc:
235 self.log_warning(
236 "Compression failed, using uncompressed PDF: %s",
237 exc,
238 )
239 os.rename(combined_pdf, output_path)
240 else:
241 os.rename(combined_pdf, output_path)
243 self.log_info("Created %s", output_path.name)
244 return output_path
246 def teardown(self) -> None:
247 """Clean up build directory after processing."""
248 self.log_debug_subsection(
249 "Tearing down build directory: %s", self.config.build_dir
250 )
251 if self.config.build_dir.exists():
252 self.log_debug("Removing files in build directory...")
253 for file_path in self.config.build_dir.iterdir():
254 if file_path.is_file():
255 file_path.unlink()
257 try:
258 self.log_debug("Removing build directory...")
259 self.config.build_dir.rmdir()
260 except OSError:
261 self.log_warning("Build directory not empty - not removing")