Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# vim: sw=4:expandtab:foldmethod=marker 

2# 

3# Copyright (c) 2006, Mathieu Fenniak 

4# All rights reserved. 

5# 

6# Redistribution and use in source and binary forms, with or without 

7# modification, are permitted provided that the following conditions are 

8# met: 

9# 

10# * Redistributions of source code must retain the above copyright notice, 

11# this list of conditions and the following disclaimer. 

12# * Redistributions in binary form must reproduce the above copyright notice, 

13# this list of conditions and the following disclaimer in the documentation 

14# and/or other materials provided with the distribution. 

15# * The name of the author may not be used to endorse or promote products 

16# derived from this software without specific prior written permission. 

17# 

18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 

19# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 

20# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 

21# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 

22# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 

23# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 

24# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 

25# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 

26# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 

27# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 

28# POSSIBILITY OF SUCH DAMAGE. 

29 

30 

31""" 

32Implementation of generic PDF objects (dictionary, number, string, and so on) 

33""" 

34__author__ = "Mathieu Fenniak" 

35__author_email__ = "biziqe@mathieu.fenniak.net" 

36 

37import re 

38from .utils import readNonWhitespace, RC4_encrypt, skipOverComment 

39from .utils import b_, u_, chr_, ord_ 

40from .utils import PdfStreamError 

41import warnings 

42from . import filters 

43from . import utils 

44import decimal 

45import codecs 

46import sys 

47#import debugging 

48 

49ObjectPrefix = b_('/<[tf(n%') 

50NumberSigns = b_('+-') 

51IndirectPattern = re.compile(b_(r"(\d+)\s+(\d+)\s+R[^a-zA-Z]")) 

52 

53 

54def readObject(stream, pdf): 

55 tok = stream.read(1) 

56 stream.seek(-1, 1) # reset to start 

57 idx = ObjectPrefix.find(tok) 

58 if idx == 0: 

59 # name object 

60 return NameObject.readFromStream(stream, pdf) 

61 elif idx == 1: 

62 # hexadecimal string OR dictionary 

63 peek = stream.read(2) 

64 stream.seek(-2, 1) # reset to start 

65 if peek == b_('<<'): 

66 return DictionaryObject.readFromStream(stream, pdf) 

67 else: 

68 return readHexStringFromStream(stream) 

69 elif idx == 2: 

70 # array object 

71 return ArrayObject.readFromStream(stream, pdf) 

72 elif idx == 3 or idx == 4: 

73 # boolean object 

74 return BooleanObject.readFromStream(stream) 

75 elif idx == 5: 

76 # string object 

77 return readStringFromStream(stream) 

78 elif idx == 6: 

79 # null object 

80 return NullObject.readFromStream(stream) 

81 elif idx == 7: 

82 # comment 

83 while tok not in (b_('\r'), b_('\n')): 

84 tok = stream.read(1) 

85 tok = readNonWhitespace(stream) 

86 stream.seek(-1, 1) 

87 return readObject(stream, pdf) 

88 else: 

89 # number object OR indirect reference 

90 if tok in NumberSigns: 

91 # number 

92 return NumberObject.readFromStream(stream) 

93 peek = stream.read(20) 

94 stream.seek(-len(peek), 1) # reset to start 

95 if IndirectPattern.match(peek) != None: 

96 return IndirectObject.readFromStream(stream, pdf) 

97 else: 

98 return NumberObject.readFromStream(stream) 

99 

100 

101class PdfObject(object): 

102 def getObject(self): 

103 """Resolves indirect references.""" 

104 return self 

105 

106 

107class NullObject(PdfObject): 

108 def writeToStream(self, stream, encryption_key): 

109 stream.write(b_("null")) 

110 

111 def readFromStream(stream): 

112 nulltxt = stream.read(4) 

113 if nulltxt != b_("null"): 

114 raise utils.PdfReadError("Could not read Null object") 

115 return NullObject() 

116 readFromStream = staticmethod(readFromStream) 

117 

118 

119class BooleanObject(PdfObject): 

120 def __init__(self, value): 

121 self.value = value 

122 

123 def writeToStream(self, stream, encryption_key): 

124 if self.value: 

125 stream.write(b_("true")) 

126 else: 

127 stream.write(b_("false")) 

128 

129 def readFromStream(stream): 

130 word = stream.read(4) 

131 if word == b_("true"): 

132 return BooleanObject(True) 

133 elif word == b_("fals"): 

134 stream.read(1) 

135 return BooleanObject(False) 

136 else: 

137 raise utils.PdfReadError('Could not read Boolean object') 

138 readFromStream = staticmethod(readFromStream) 

139 

140 

141class ArrayObject(list, PdfObject): 

142 def writeToStream(self, stream, encryption_key): 

143 stream.write(b_("[")) 

144 for data in self: 

145 stream.write(b_(" ")) 

146 data.writeToStream(stream, encryption_key) 

147 stream.write(b_(" ]")) 

148 

149 def readFromStream(stream, pdf): 

150 arr = ArrayObject() 

151 tmp = stream.read(1) 

152 if tmp != b_("["): 

153 raise utils.PdfReadError("Could not read array") 

154 while True: 

155 # skip leading whitespace 

156 tok = stream.read(1) 

157 while tok.isspace(): 

158 tok = stream.read(1) 

159 stream.seek(-1, 1) 

160 # check for array ending 

161 peekahead = stream.read(1) 

162 if peekahead == b_("]"): 

163 break 

164 stream.seek(-1, 1) 

165 # read and append obj 

166 arr.append(readObject(stream, pdf)) 

167 return arr 

168 readFromStream = staticmethod(readFromStream) 

169 

170 

171class IndirectObject(PdfObject): 

172 def __init__(self, idnum, generation, pdf): 

173 self.idnum = idnum 

174 self.generation = generation 

175 self.pdf = pdf 

176 

177 def getObject(self): 

178 return self.pdf.getObject(self).getObject() 

179 

180 def __repr__(self): 

181 return "IndirectObject(%r, %r)" % (self.idnum, self.generation) 

182 

183 def __eq__(self, other): 

184 return ( 

185 other != None and 

186 isinstance(other, IndirectObject) and 

187 self.idnum == other.idnum and 

188 self.generation == other.generation and 

189 self.pdf is other.pdf 

190 ) 

191 

192 def __ne__(self, other): 

193 return not self.__eq__(other) 

194 

195 def writeToStream(self, stream, encryption_key): 

196 stream.write(b_("%s %s R" % (self.idnum, self.generation))) 

197 

198 def readFromStream(stream, pdf): 

199 idnum = b_("") 

200 while True: 

201 tok = stream.read(1) 

202 if not tok: 

203 # stream has truncated prematurely 

204 raise PdfStreamError("Stream has ended unexpectedly") 

205 if tok.isspace(): 

206 break 

207 idnum += tok 

208 generation = b_("") 

209 while True: 

210 tok = stream.read(1) 

211 if not tok: 

212 # stream has truncated prematurely 

213 raise PdfStreamError("Stream has ended unexpectedly") 

214 if tok.isspace(): 

215 if not generation: 

216 continue 

217 break 

218 generation += tok 

219 r = readNonWhitespace(stream) 

220 if r != b_("R"): 

221 raise utils.PdfReadError("Error reading indirect object reference at byte %s" % utils.hexStr(stream.tell())) 

222 return IndirectObject(int(idnum), int(generation), pdf) 

223 readFromStream = staticmethod(readFromStream) 

224 

225 

226class FloatObject(decimal.Decimal, PdfObject): 

227 def __new__(cls, value="0", context=None): 

228 try: 

229 return decimal.Decimal.__new__(cls, utils.str_(value), context) 

230 except: 

231 return decimal.Decimal.__new__(cls, str(value)) 

232 

233 def __repr__(self): 

234 if self == self.to_integral(): 

235 return str(self.quantize(decimal.Decimal(1))) 

236 else: 

237 # Standard formatting adds useless extraneous zeros. 

238 o = "%.5f" % self 

239 # Remove the zeros. 

240 while o and o[-1] == '0': 

241 o = o[:-1] 

242 return o 

243 

244 def as_numeric(self): 

245 return float(b_(repr(self))) 

246 

247 def writeToStream(self, stream, encryption_key): 

248 stream.write(b_(repr(self))) 

249 

250 

251class NumberObject(int, PdfObject): 

252 NumberPattern = re.compile(b_('[^+-.0-9]')) 

253 ByteDot = b_(".") 

254 

255 def __new__(cls, value): 

256 val = int(value) 

257 try: 

258 return int.__new__(cls, val) 

259 except OverflowError: 

260 return int.__new__(cls, 0) 

261 

262 def as_numeric(self): 

263 return int(b_(repr(self))) 

264 

265 def writeToStream(self, stream, encryption_key): 

266 stream.write(b_(repr(self))) 

267 

268 def readFromStream(stream): 

269 num = utils.readUntilRegex(stream, NumberObject.NumberPattern) 

270 if num.find(NumberObject.ByteDot) != -1: 

271 return FloatObject(num) 

272 else: 

273 return NumberObject(num) 

274 readFromStream = staticmethod(readFromStream) 

275 

276 

277## 

278# Given a string (either a "str" or "unicode"), create a ByteStringObject or a 

279# TextStringObject to represent the string. 

280def createStringObject(string): 

281 if isinstance(string, utils.string_type): 

282 return TextStringObject(string) 

283 elif isinstance(string, utils.bytes_type): 

284 try: 

285 if string.startswith(codecs.BOM_UTF16_BE): 

286 retval = TextStringObject(string.decode("utf-16")) 

287 retval.autodetect_utf16 = True 

288 return retval 

289 else: 

290 # This is probably a big performance hit here, but we need to 

291 # convert string objects into the text/unicode-aware version if 

292 # possible... and the only way to check if that's possible is 

293 # to try. Some strings are strings, some are just byte arrays. 

294 retval = TextStringObject(decode_pdfdocencoding(string)) 

295 retval.autodetect_pdfdocencoding = True 

296 return retval 

297 except UnicodeDecodeError: 

298 return ByteStringObject(string) 

299 else: 

300 raise TypeError("createStringObject should have str or unicode arg") 

301 

302 

303def readHexStringFromStream(stream): 

304 stream.read(1) 

305 txt = "" 

306 x = b_("") 

307 while True: 

308 tok = readNonWhitespace(stream) 

309 if not tok: 

310 # stream has truncated prematurely 

311 raise PdfStreamError("Stream has ended unexpectedly") 

312 if tok == b_(">"): 

313 break 

314 x += tok 

315 if len(x) == 2: 

316 txt += chr(int(x, base=16)) 

317 x = b_("") 

318 if len(x) == 1: 

319 x += b_("0") 

320 if len(x) == 2: 

321 txt += chr(int(x, base=16)) 

322 return createStringObject(b_(txt)) 

323 

324 

325def readStringFromStream(stream): 

326 tok = stream.read(1) 

327 parens = 1 

328 txt = b_("") 

329 while True: 

330 tok = stream.read(1) 

331 if not tok: 

332 # stream has truncated prematurely 

333 raise PdfStreamError("Stream has ended unexpectedly") 

334 if tok == b_("("): 

335 parens += 1 

336 elif tok == b_(")"): 

337 parens -= 1 

338 if parens == 0: 

339 break 

340 elif tok == b_("\\"): 

341 tok = stream.read(1) 

342 if tok == b_("n"): 

343 tok = b_("\n") 

344 elif tok == b_("r"): 

345 tok = b_("\r") 

346 elif tok == b_("t"): 

347 tok = b_("\t") 

348 elif tok == b_("b"): 

349 tok = b_("\b") 

350 elif tok == b_("f"): 

351 tok = b_("\f") 

352 elif tok == b_("c"): 

353 tok = b_("\c") 

354 elif tok == b_("("): 

355 tok = b_("(") 

356 elif tok == b_(")"): 

357 tok = b_(")") 

358 elif tok == b_("/"): 

359 tok = b_("/") 

360 elif tok == b_("\\"): 

361 tok = b_("\\") 

362 elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["), 

363 b_("]"), b_("#"), b_("_"), b_("&"), b_('$')): 

364 # odd/unnessecary escape sequences we have encountered 

365 tok = b_(tok) 

366 elif tok.isdigit(): 

367 # "The number ddd may consist of one, two, or three 

368 # octal digits; high-order overflow shall be ignored. 

369 # Three octal digits shall be used, with leading zeros 

370 # as needed, if the next character of the string is also 

371 # a digit." (PDF reference 7.3.4.2, p 16) 

372 for i in range(2): 

373 ntok = stream.read(1) 

374 if ntok.isdigit(): 

375 tok += ntok 

376 else: 

377 break 

378 tok = b_(chr(int(tok, base=8))) 

379 elif tok in b_("\n\r"): 

380 # This case is hit when a backslash followed by a line 

381 # break occurs. If it's a multi-char EOL, consume the 

382 # second character: 

383 tok = stream.read(1) 

384 if not tok in b_("\n\r"): 

385 stream.seek(-1, 1) 

386 # Then don't add anything to the actual string, since this 

387 # line break was escaped: 

388 tok = b_('') 

389 else: 

390 raise utils.PdfReadError(r"Unexpected escaped string: %s" % tok) 

391 txt += tok 

392 return createStringObject(txt) 

393 

394 

395## 

396# Represents a string object where the text encoding could not be determined. 

397# This occurs quite often, as the PDF spec doesn't provide an alternate way to 

398# represent strings -- for example, the encryption data stored in files (like 

399# /O) is clearly not text, but is still stored in a "String" object. 

400class ByteStringObject(utils.bytes_type, PdfObject): 

401 

402 ## 

403 # For compatibility with TextStringObject.original_bytes. This method 

404 # returns self. 

405 original_bytes = property(lambda self: self) 

406 

407 def writeToStream(self, stream, encryption_key): 

408 bytearr = self 

409 if encryption_key: 

410 bytearr = RC4_encrypt(encryption_key, bytearr) 

411 stream.write(b_("<")) 

412 stream.write(utils.hexencode(bytearr)) 

413 stream.write(b_(">")) 

414 

415 

416## 

417# Represents a string object that has been decoded into a real unicode string. 

418# If read from a PDF document, this string appeared to match the 

419# PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to 

420# occur. 

421class TextStringObject(utils.string_type, PdfObject): 

422 autodetect_pdfdocencoding = False 

423 autodetect_utf16 = False 

424 

425 ## 

426 # It is occasionally possible that a text string object gets created where 

427 # a byte string object was expected due to the autodetection mechanism -- 

428 # if that occurs, this "original_bytes" property can be used to 

429 # back-calculate what the original encoded bytes were. 

430 original_bytes = property(lambda self: self.get_original_bytes()) 

431 

432 def get_original_bytes(self): 

433 # We're a text string object, but the library is trying to get our raw 

434 # bytes. This can happen if we auto-detected this string as text, but 

435 # we were wrong. It's pretty common. Return the original bytes that 

436 # would have been used to create this object, based upon the autodetect 

437 # method. 

438 if self.autodetect_utf16: 

439 return codecs.BOM_UTF16_BE + self.encode("utf-16be") 

440 elif self.autodetect_pdfdocencoding: 

441 return encode_pdfdocencoding(self) 

442 else: 

443 raise Exception("no information about original bytes") 

444 

445 def writeToStream(self, stream, encryption_key): 

446 # Try to write the string out as a PDFDocEncoding encoded string. It's 

447 # nicer to look at in the PDF file. Sadly, we take a performance hit 

448 # here for trying... 

449 try: 

450 bytearr = encode_pdfdocencoding(self) 

451 except UnicodeEncodeError: 

452 bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be") 

453 if encryption_key: 

454 bytearr = RC4_encrypt(encryption_key, bytearr) 

455 obj = ByteStringObject(bytearr) 

456 obj.writeToStream(stream, None) 

457 else: 

458 stream.write(b_("(")) 

459 for c in bytearr: 

460 if not chr_(c).isalnum() and c != b_(' '): 

461 stream.write(b_("\\%03o" % ord_(c))) 

462 else: 

463 stream.write(b_(chr_(c))) 

464 stream.write(b_(")")) 

465 

466 

467class NameObject(str, PdfObject): 

468 delimiterPattern = re.compile(b_(r"\s+|[\(\)<>\[\]{}/%]")) 

469 surfix = b_("/") 

470 

471 def writeToStream(self, stream, encryption_key): 

472 stream.write(b_(self)) 

473 

474 def readFromStream(stream, pdf): 

475 debug = False 

476 if debug: print((stream.tell())) 

477 name = stream.read(1) 

478 if name != NameObject.surfix: 

479 raise utils.PdfReadError("name read error") 

480 name += utils.readUntilRegex(stream, NameObject.delimiterPattern, 

481 ignore_eof=True) 

482 if debug: print(name) 

483 try: 

484 return NameObject(name.decode('utf-8')) 

485 except (UnicodeEncodeError, UnicodeDecodeError) as e: 

486 # Name objects should represent irregular characters 

487 # with a '#' followed by the symbol's hex number 

488 if not pdf.strict: 

489 warnings.warn("Illegal character in Name Object", utils.PdfReadWarning) 

490 return NameObject(name) 

491 else: 

492 raise utils.PdfReadError("Illegal character in Name Object") 

493 

494 readFromStream = staticmethod(readFromStream) 

495 

496 

497class DictionaryObject(dict, PdfObject): 

498 def raw_get(self, key): 

499 return dict.__getitem__(self, key) 

500 

501 def __setitem__(self, key, value): 

502 if not isinstance(key, PdfObject): 

503 raise ValueError("key must be PdfObject") 

504 if not isinstance(value, PdfObject): 

505 raise ValueError("value must be PdfObject") 

506 return dict.__setitem__(self, key, value) 

507 

508 def setdefault(self, key, value=None): 

509 if not isinstance(key, PdfObject): 

510 raise ValueError("key must be PdfObject") 

511 if not isinstance(value, PdfObject): 

512 raise ValueError("value must be PdfObject") 

513 return dict.setdefault(self, key, value) 

514 

515 def __getitem__(self, key): 

516 return dict.__getitem__(self, key).getObject() 

517 

518 ## 

519 # Retrieves XMP (Extensible Metadata Platform) data relevant to the 

520 # this object, if available. 

521 # <p> 

522 # Stability: Added in v1.12, will exist for all future v1.x releases. 

523 # @return Returns a {@link #xmp.XmpInformation XmlInformation} instance 

524 # that can be used to access XMP metadata from the document. Can also 

525 # return None if no metadata was found on the document root. 

526 def getXmpMetadata(self): 

527 metadata = self.get("/Metadata", None) 

528 if metadata == None: 

529 return None 

530 metadata = metadata.getObject() 

531 from . import xmp 

532 if not isinstance(metadata, xmp.XmpInformation): 

533 metadata = xmp.XmpInformation(metadata) 

534 self[NameObject("/Metadata")] = metadata 

535 return metadata 

536 

537 ## 

538 # Read-only property that accesses the {@link 

539 # #DictionaryObject.getXmpData getXmpData} function. 

540 # <p> 

541 # Stability: Added in v1.12, will exist for all future v1.x releases. 

542 xmpMetadata = property(lambda self: self.getXmpMetadata(), None, None) 

543 

544 def writeToStream(self, stream, encryption_key): 

545 stream.write(b_("<<\n")) 

546 for key, value in list(self.items()): 

547 key.writeToStream(stream, encryption_key) 

548 stream.write(b_(" ")) 

549 value.writeToStream(stream, encryption_key) 

550 stream.write(b_("\n")) 

551 stream.write(b_(">>")) 

552 

553 def readFromStream(stream, pdf): 

554 debug = False 

555 tmp = stream.read(2) 

556 if tmp != b_("<<"): 

557 raise utils.PdfReadError("Dictionary read error at byte %s: stream must begin with '<<'" % utils.hexStr(stream.tell())) 

558 data = {} 

559 while True: 

560 tok = readNonWhitespace(stream) 

561 if tok == b_('\x00'): 

562 continue 

563 elif tok == b_('%'): 

564 stream.seek(-1, 1) 

565 skipOverComment(stream) 

566 continue 

567 if not tok: 

568 # stream has truncated prematurely 

569 raise PdfStreamError("Stream has ended unexpectedly") 

570 

571 if debug: print(("Tok:", tok)) 

572 if tok == b_(">"): 

573 stream.read(1) 

574 break 

575 stream.seek(-1, 1) 

576 key = readObject(stream, pdf) 

577 tok = readNonWhitespace(stream) 

578 stream.seek(-1, 1) 

579 value = readObject(stream, pdf) 

580 if not data.get(key): 

581 data[key] = value 

582 elif pdf.strict: 

583 # multiple definitions of key not permitted 

584 raise utils.PdfReadError("Multiple definitions in dictionary at byte %s for key %s" \ 

585 % (utils.hexStr(stream.tell()), key)) 

586 else: 

587 warnings.warn("Multiple definitions in dictionary at byte %s for key %s" \ 

588 % (utils.hexStr(stream.tell()), key), utils.PdfReadWarning) 

589 

590 pos = stream.tell() 

591 s = readNonWhitespace(stream) 

592 if s == b_('s') and stream.read(5) == b_('tream'): 

593 eol = stream.read(1) 

594 # odd PDF file output has spaces after 'stream' keyword but before EOL. 

595 # patch provided by Danial Sandler 

596 while eol == b_(' '): 

597 eol = stream.read(1) 

598 assert eol in (b_("\n"), b_("\r")) 

599 if eol == b_("\r"): 

600 # read \n after 

601 if stream.read(1) != b_('\n'): 

602 stream.seek(-1, 1) 

603 # this is a stream object, not a dictionary 

604 assert "/Length" in data 

605 length = data["/Length"] 

606 if debug: print(data) 

607 if isinstance(length, IndirectObject): 

608 t = stream.tell() 

609 length = pdf.getObject(length) 

610 stream.seek(t, 0) 

611 data["__streamdata__"] = stream.read(length) 

612 if debug: print("here") 

613 #if debug: print(binascii.hexlify(data["__streamdata__"])) 

614 e = readNonWhitespace(stream) 

615 ndstream = stream.read(8) 

616 if (e + ndstream) != b_("endstream"): 

617 # (sigh) - the odd PDF file has a length that is too long, so 

618 # we need to read backwards to find the "endstream" ending. 

619 # ReportLab (unknown version) generates files with this bug, 

620 # and Python users into PDF files tend to be our audience. 

621 # we need to do this to correct the streamdata and chop off 

622 # an extra character. 

623 pos = stream.tell() 

624 stream.seek(-10, 1) 

625 end = stream.read(9) 

626 if end == b_("endstream"): 

627 # we found it by looking back one character further. 

628 data["__streamdata__"] = data["__streamdata__"][:-1] 

629 else: 

630 if debug: print(("E", e, ndstream, debugging.toHex(end))) 

631 stream.seek(pos, 0) 

632 raise utils.PdfReadError("Unable to find 'endstream' marker after stream at byte %s." % utils.hexStr(stream.tell())) 

633 else: 

634 stream.seek(pos, 0) 

635 if "__streamdata__" in data: 

636 return StreamObject.initializeFromDictionary(data) 

637 else: 

638 retval = DictionaryObject() 

639 retval.update(data) 

640 return retval 

641 readFromStream = staticmethod(readFromStream) 

642 

643 

644class TreeObject(DictionaryObject): 

645 def __init__(self): 

646 DictionaryObject.__init__(self) 

647 

648 def hasChildren(self): 

649 return '/First' in self 

650 

651 def __iter__(self): 

652 return self.children() 

653 

654 def children(self): 

655 if not self.hasChildren(): 

656 raise StopIteration 

657 

658 child = self['/First'] 

659 while True: 

660 yield child 

661 if child == self['/Last']: 

662 raise StopIteration 

663 child = child['/Next'] 

664 

665 def addChild(self, child, pdf): 

666 childObj = child.getObject() 

667 child = pdf.getReference(childObj) 

668 assert isinstance(child, IndirectObject) 

669 

670 if '/First' not in self: 

671 self[NameObject('/First')] = child 

672 self[NameObject('/Count')] = NumberObject(0) 

673 prev = None 

674 else: 

675 prev = self['/Last'] 

676 

677 self[NameObject('/Last')] = child 

678 self[NameObject('/Count')] = NumberObject(self[NameObject('/Count')] + 1) 

679 

680 if prev: 

681 prevRef = pdf.getReference(prev) 

682 assert isinstance(prevRef, IndirectObject) 

683 childObj[NameObject('/Prev')] = prevRef 

684 prev[NameObject('/Next')] = child 

685 

686 parentRef = pdf.getReference(self) 

687 assert isinstance(parentRef, IndirectObject) 

688 childObj[NameObject('/Parent')] = parentRef 

689 

690 def removeChild(self, child): 

691 childObj = child.getObject() 

692 

693 if NameObject('/Parent') not in childObj: 

694 raise ValueError("Removed child does not appear to be a tree item") 

695 elif childObj[NameObject('/Parent')] != self: 

696 raise ValueError("Removed child is not a member of this tree") 

697 

698 found = False 

699 prevRef = None 

700 prev = None 

701 curRef = self[NameObject('/First')] 

702 cur = curRef.getObject() 

703 lastRef = self[NameObject('/Last')] 

704 last = lastRef.getObject() 

705 while cur != None: 

706 if cur == childObj: 

707 if prev == None: 

708 if NameObject('/Next') in cur: 

709 # Removing first tree node 

710 nextRef = cur[NameObject('/Next')] 

711 next = nextRef.getObject() 

712 del next[NameObject('/Prev')] 

713 self[NameObject('/First')] = nextRef 

714 self[NameObject('/Count')] = self[NameObject('/Count')] - 1 

715 

716 else: 

717 # Removing only tree node 

718 assert self[NameObject('/Count')] == 1 

719 del self[NameObject('/Count')] 

720 del self[NameObject('/First')] 

721 if NameObject('/Last') in self: 

722 del self[NameObject('/Last')] 

723 else: 

724 if NameObject('/Next') in cur: 

725 # Removing middle tree node 

726 nextRef = cur[NameObject('/Next')] 

727 next = nextRef.getObject() 

728 next[NameObject('/Prev')] = prevRef 

729 prev[NameObject('/Next')] = nextRef 

730 self[NameObject('/Count')] = self[NameObject('/Count')] - 1 

731 else: 

732 # Removing last tree node 

733 assert cur == last 

734 del prev[NameObject('/Next')] 

735 self[NameObject('/Last')] = prevRef 

736 self[NameObject('/Count')] = self[NameObject('/Count')] - 1 

737 found = True 

738 break 

739 

740 prevRef = curRef 

741 prev = cur 

742 if NameObject('/Next') in cur: 

743 curRef = cur[NameObject('/Next')] 

744 cur = curRef.getObject() 

745 else: 

746 curRef = None 

747 cur = None 

748 

749 if not found: 

750 raise ValueError("Removal couldn't find item in tree") 

751 

752 del childObj[NameObject('/Parent')] 

753 if NameObject('/Next') in childObj: 

754 del childObj[NameObject('/Next')] 

755 if NameObject('/Prev') in childObj: 

756 del childObj[NameObject('/Prev')] 

757 

758 def emptyTree(self): 

759 for child in self: 

760 childObj = child.getObject() 

761 del childObj[NameObject('/Parent')] 

762 if NameObject('/Next') in childObj: 

763 del childObj[NameObject('/Next')] 

764 if NameObject('/Prev') in childObj: 

765 del childObj[NameObject('/Prev')] 

766 

767 if NameObject('/Count') in self: 

768 del self[NameObject('/Count')] 

769 if NameObject('/First') in self: 

770 del self[NameObject('/First')] 

771 if NameObject('/Last') in self: 

772 del self[NameObject('/Last')] 

773 

774 

775class StreamObject(DictionaryObject): 

776 def __init__(self): 

777 self._data = None 

778 self.decodedSelf = None 

779 

780 def writeToStream(self, stream, encryption_key): 

781 self[NameObject("/Length")] = NumberObject(len(self._data)) 

782 DictionaryObject.writeToStream(self, stream, encryption_key) 

783 del self["/Length"] 

784 stream.write(b_("\nstream\n")) 

785 data = self._data 

786 if encryption_key: 

787 data = RC4_encrypt(encryption_key, data) 

788 stream.write(data) 

789 stream.write(b_("\nendstream")) 

790 

791 def initializeFromDictionary(data): 

792 if "/Filter" in data: 

793 retval = EncodedStreamObject() 

794 else: 

795 retval = DecodedStreamObject() 

796 retval._data = data["__streamdata__"] 

797 del data["__streamdata__"] 

798 del data["/Length"] 

799 retval.update(data) 

800 return retval 

801 initializeFromDictionary = staticmethod(initializeFromDictionary) 

802 

803 def flateEncode(self): 

804 if "/Filter" in self: 

805 f = self["/Filter"] 

806 if isinstance(f, ArrayObject): 

807 f.insert(0, NameObject("/FlateDecode")) 

808 else: 

809 newf = ArrayObject() 

810 newf.append(NameObject("/FlateDecode")) 

811 newf.append(f) 

812 f = newf 

813 else: 

814 f = NameObject("/FlateDecode") 

815 retval = EncodedStreamObject() 

816 retval[NameObject("/Filter")] = f 

817 retval._data = filters.FlateDecode.encode(self._data) 

818 return retval 

819 

820 

821class DecodedStreamObject(StreamObject): 

822 def getData(self): 

823 return self._data 

824 

825 def setData(self, data): 

826 self._data = data 

827 

828 

829class EncodedStreamObject(StreamObject): 

830 def __init__(self): 

831 self.decodedSelf = None 

832 

833 def getData(self): 

834 if self.decodedSelf: 

835 # cached version of decoded object 

836 return self.decodedSelf.getData() 

837 else: 

838 # create decoded object 

839 decoded = DecodedStreamObject() 

840 

841 decoded._data = filters.decodeStreamData(self) 

842 for key, value in list(self.items()): 

843 if not key in ("/Length", "/Filter", "/DecodeParms"): 

844 decoded[key] = value 

845 self.decodedSelf = decoded 

846 return decoded._data 

847 

848 def setData(self, data): 

849 raise utils.PdfReadError("Creating EncodedStreamObject is not currently supported") 

850 

851 

852class RectangleObject(ArrayObject): 

853 """ 

854 This class is used to represent *page boxes* in PyPDF2. These boxes include: 

855 

856 * :attr:`artBox <PyPDF2.pdf.PageObject.artBox>` 

857 * :attr:`bleedBox <PyPDF2.pdf.PageObject.bleedBox>` 

858 * :attr:`cropBox <PyPDF2.pdf.PageObject.cropBox>` 

859 * :attr:`mediaBox <PyPDF2.pdf.PageObject.mediaBox>` 

860 * :attr:`trimBox <PyPDF2.pdf.PageObject.trimBox>` 

861 """ 

862 def __init__(self, arr): 

863 # must have four points 

864 assert len(arr) == 4 

865 # automatically convert arr[x] into NumberObject(arr[x]) if necessary 

866 ArrayObject.__init__(self, [self.ensureIsNumber(x) for x in arr]) 

867 

868 def ensureIsNumber(self, value): 

869 if not isinstance(value, (NumberObject, FloatObject)): 

870 value = FloatObject(value) 

871 return value 

872 

873 def __repr__(self): 

874 return "RectangleObject(%s)" % repr(list(self)) 

875 

876 def getLowerLeft_x(self): 

877 return self[0] 

878 

879 def getLowerLeft_y(self): 

880 return self[1] 

881 

882 def getUpperRight_x(self): 

883 return self[2] 

884 

885 def getUpperRight_y(self): 

886 return self[3] 

887 

888 def getUpperLeft_x(self): 

889 return self.getLowerLeft_x() 

890 

891 def getUpperLeft_y(self): 

892 return self.getUpperRight_y() 

893 

894 def getLowerRight_x(self): 

895 return self.getUpperRight_x() 

896 

897 def getLowerRight_y(self): 

898 return self.getLowerLeft_y() 

899 

900 def getLowerLeft(self): 

901 return self.getLowerLeft_x(), self.getLowerLeft_y() 

902 

903 def getLowerRight(self): 

904 return self.getLowerRight_x(), self.getLowerRight_y() 

905 

906 def getUpperLeft(self): 

907 return self.getUpperLeft_x(), self.getUpperLeft_y() 

908 

909 def getUpperRight(self): 

910 return self.getUpperRight_x(), self.getUpperRight_y() 

911 

912 def setLowerLeft(self, value): 

913 self[0], self[1] = [self.ensureIsNumber(x) for x in value] 

914 

915 def setLowerRight(self, value): 

916 self[2], self[1] = [self.ensureIsNumber(x) for x in value] 

917 

918 def setUpperLeft(self, value): 

919 self[0], self[3] = [self.ensureIsNumber(x) for x in value] 

920 

921 def setUpperRight(self, value): 

922 self[2], self[3] = [self.ensureIsNumber(x) for x in value] 

923 

924 def getWidth(self): 

925 return self.getUpperRight_x() - self.getLowerLeft_x() 

926 

927 def getHeight(self): 

928 return self.getUpperRight_y() - self.getLowerLeft_y() 

929 

930 lowerLeft = property(getLowerLeft, setLowerLeft, None, None) 

931 """ 

932 Property to read and modify the lower left coordinate of this box 

933 in (x,y) form. 

934 """ 

935 lowerRight = property(getLowerRight, setLowerRight, None, None) 

936 """ 

937 Property to read and modify the lower right coordinate of this box 

938 in (x,y) form. 

939 """ 

940 upperLeft = property(getUpperLeft, setUpperLeft, None, None) 

941 """ 

942 Property to read and modify the upper left coordinate of this box 

943 in (x,y) form. 

944 """ 

945 upperRight = property(getUpperRight, setUpperRight, None, None) 

946 """ 

947 Property to read and modify the upper right coordinate of this box 

948 in (x,y) form. 

949 """ 

950 

951 

952class Field(TreeObject): 

953 """ 

954 A class representing a field dictionary. This class is accessed through 

955 :meth:`getFields()<PyPDF2.PdfFileReader.getFields>` 

956 """ 

957 def __init__(self, data): 

958 DictionaryObject.__init__(self) 

959 attributes = ("/FT", "/Parent", "/Kids", "/T", "/TU", "/TM", "/Ff", 

960 "/V", "/DV", "/AA") 

961 for attr in attributes: 

962 try: 

963 self[NameObject(attr)] = data[attr] 

964 except KeyError: 

965 pass 

966 

967 fieldType = property(lambda self: self.get("/FT")) 

968 """ 

969 Read-only property accessing the type of this field. 

970 """ 

971 

972 parent = property(lambda self: self.get("/Parent")) 

973 """ 

974 Read-only property accessing the parent of this field. 

975 """ 

976 

977 kids = property(lambda self: self.get("/Kids")) 

978 """ 

979 Read-only property accessing the kids of this field. 

980 """ 

981 

982 name = property(lambda self: self.get("/T")) 

983 """ 

984 Read-only property accessing the name of this field. 

985 """ 

986 

987 altName = property(lambda self: self.get("/TU")) 

988 """ 

989 Read-only property accessing the alternate name of this field. 

990 """ 

991 

992 mappingName = property(lambda self: self.get("/TM")) 

993 """ 

994 Read-only property accessing the mapping name of this field. This 

995 name is used by PyPDF2 as a key in the dictionary returned by 

996 :meth:`getFields()<PyPDF2.PdfFileReader.getFields>` 

997 """ 

998 

999 flags = property(lambda self: self.get("/Ff")) 

1000 """ 

1001 Read-only property accessing the field flags, specifying various 

1002 characteristics of the field (see Table 8.70 of the PDF 1.7 reference). 

1003 """ 

1004 

1005 value = property(lambda self: self.get("/V")) 

1006 """ 

1007 Read-only property accessing the value of this field. Format 

1008 varies based on field type. 

1009 """ 

1010 

1011 defaultValue = property(lambda self: self.get("/DV")) 

1012 """ 

1013 Read-only property accessing the default value of this field. 

1014 """ 

1015 

1016 additionalActions = property(lambda self: self.get("/AA")) 

1017 """ 

1018 Read-only property accessing the additional actions dictionary. 

1019 This dictionary defines the field's behavior in response to trigger events. 

1020 See Section 8.5.2 of the PDF 1.7 reference. 

1021 """ 

1022 

1023 

1024class Destination(TreeObject): 

1025 """ 

1026 A class representing a destination within a PDF file. 

1027 See section 8.2.1 of the PDF 1.6 reference. 

1028 

1029 :param str title: Title of this destination. 

1030 :param int page: Page number of this destination. 

1031 :param str typ: How the destination is displayed. 

1032 :param args: Additional arguments may be necessary depending on the type. 

1033 :raises PdfReadError: If destination type is invalid. 

1034 

1035 Valid ``typ`` arguments (see PDF spec for details): 

1036 /Fit No additional arguments 

1037 /XYZ [left] [top] [zoomFactor] 

1038 /FitH [top] 

1039 /FitV [left] 

1040 /FitR [left] [bottom] [right] [top] 

1041 /FitB No additional arguments 

1042 /FitBH [top] 

1043 /FitBV [left] 

1044 """ 

1045 def __init__(self, title, page, typ, *args): 

1046 DictionaryObject.__init__(self) 

1047 self[NameObject("/Title")] = title 

1048 self[NameObject("/Page")] = page 

1049 self[NameObject("/Type")] = typ 

1050 

1051 # from table 8.2 of the PDF 1.7 reference. 

1052 if typ == "/XYZ": 

1053 (self[NameObject("/Left")], self[NameObject("/Top")], 

1054 self[NameObject("/Zoom")]) = args 

1055 elif typ == "/FitR": 

1056 (self[NameObject("/Left")], self[NameObject("/Bottom")], 

1057 self[NameObject("/Right")], self[NameObject("/Top")]) = args 

1058 elif typ in ["/FitH", "/FitBH"]: 

1059 self[NameObject("/Top")], = args 

1060 elif typ in ["/FitV", "/FitBV"]: 

1061 self[NameObject("/Left")], = args 

1062 elif typ in ["/Fit", "/FitB"]: 

1063 pass 

1064 else: 

1065 raise utils.PdfReadError("Unknown Destination Type: %r" % typ) 

1066 

1067 def getDestArray(self): 

1068 return ArrayObject([self.raw_get('/Page'), self['/Type']] + [self[x] for x in ['/Left', '/Bottom', '/Right', '/Top', '/Zoom'] if x in self]) 

1069 

1070 def writeToStream(self, stream, encryption_key): 

1071 stream.write(b_("<<\n")) 

1072 key = NameObject('/D') 

1073 key.writeToStream(stream, encryption_key) 

1074 stream.write(b_(" ")) 

1075 value = self.getDestArray() 

1076 value.writeToStream(stream, encryption_key) 

1077 

1078 key = NameObject("/S") 

1079 key.writeToStream(stream, encryption_key) 

1080 stream.write(b_(" ")) 

1081 value = NameObject("/GoTo") 

1082 value.writeToStream(stream, encryption_key) 

1083 

1084 stream.write(b_("\n")) 

1085 stream.write(b_(">>")) 

1086 

1087 title = property(lambda self: self.get("/Title")) 

1088 """ 

1089 Read-only property accessing the destination title. 

1090 

1091 :rtype: str 

1092 """ 

1093 

1094 page = property(lambda self: self.get("/Page")) 

1095 """ 

1096 Read-only property accessing the destination page number. 

1097 

1098 :rtype: int 

1099 """ 

1100 

1101 typ = property(lambda self: self.get("/Type")) 

1102 """ 

1103 Read-only property accessing the destination type. 

1104 

1105 :rtype: str 

1106 """ 

1107 

1108 zoom = property(lambda self: self.get("/Zoom", None)) 

1109 """ 

1110 Read-only property accessing the zoom factor. 

1111 

1112 :rtype: int, or ``None`` if not available. 

1113 """ 

1114 

1115 left = property(lambda self: self.get("/Left", None)) 

1116 """ 

1117 Read-only property accessing the left horizontal coordinate. 

1118 

1119 :rtype: int, or ``None`` if not available. 

1120 """ 

1121 

1122 right = property(lambda self: self.get("/Right", None)) 

1123 """ 

1124 Read-only property accessing the right horizontal coordinate. 

1125 

1126 :rtype: int, or ``None`` if not available. 

1127 """ 

1128 

1129 top = property(lambda self: self.get("/Top", None)) 

1130 """ 

1131 Read-only property accessing the top vertical coordinate. 

1132 

1133 :rtype: int, or ``None`` if not available. 

1134 """ 

1135 

1136 bottom = property(lambda self: self.get("/Bottom", None)) 

1137 """ 

1138 Read-only property accessing the bottom vertical coordinate. 

1139 

1140 :rtype: int, or ``None`` if not available. 

1141 """ 

1142 

1143 

1144class Bookmark(Destination): 

1145 def writeToStream(self, stream, encryption_key): 

1146 stream.write(b_("<<\n")) 

1147 for key in [NameObject(x) for x in ['/Title', '/Parent', '/First', '/Last', '/Next', '/Prev'] if x in self]: 

1148 key.writeToStream(stream, encryption_key) 

1149 stream.write(b_(" ")) 

1150 value = self.raw_get(key) 

1151 value.writeToStream(stream, encryption_key) 

1152 stream.write(b_("\n")) 

1153 key = NameObject('/Dest') 

1154 key.writeToStream(stream, encryption_key) 

1155 stream.write(b_(" ")) 

1156 value = self.getDestArray() 

1157 value.writeToStream(stream, encryption_key) 

1158 stream.write(b_("\n")) 

1159 stream.write(b_(">>")) 

1160 

1161 

1162def encode_pdfdocencoding(unicode_string): 

1163 retval = b_('') 

1164 for c in unicode_string: 

1165 try: 

1166 retval += b_(chr(_pdfDocEncoding_rev[c])) 

1167 except KeyError: 

1168 raise UnicodeEncodeError("pdfdocencoding", c, -1, -1, 

1169 "does not exist in translation table") 

1170 return retval 

1171 

1172 

1173def decode_pdfdocencoding(byte_array): 

1174 retval = u_('') 

1175 for b in byte_array: 

1176 c = _pdfDocEncoding[ord_(b)] 

1177 if c == u_('\u0000'): 

1178 raise UnicodeDecodeError("pdfdocencoding", utils.barray(b), -1, -1, 

1179 "does not exist in translation table") 

1180 retval += c 

1181 return retval 

1182 

1183_pdfDocEncoding = ( 

1184 u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), 

1185 u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), 

1186 u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), 

1187 u_('\u02d8'), u_('\u02c7'), u_('\u02c6'), u_('\u02d9'), u_('\u02dd'), u_('\u02db'), u_('\u02da'), u_('\u02dc'), 

1188 u_('\u0020'), u_('\u0021'), u_('\u0022'), u_('\u0023'), u_('\u0024'), u_('\u0025'), u_('\u0026'), u_('\u0027'), 

1189 u_('\u0028'), u_('\u0029'), u_('\u002a'), u_('\u002b'), u_('\u002c'), u_('\u002d'), u_('\u002e'), u_('\u002f'), 

1190 u_('\u0030'), u_('\u0031'), u_('\u0032'), u_('\u0033'), u_('\u0034'), u_('\u0035'), u_('\u0036'), u_('\u0037'), 

1191 u_('\u0038'), u_('\u0039'), u_('\u003a'), u_('\u003b'), u_('\u003c'), u_('\u003d'), u_('\u003e'), u_('\u003f'), 

1192 u_('\u0040'), u_('\u0041'), u_('\u0042'), u_('\u0043'), u_('\u0044'), u_('\u0045'), u_('\u0046'), u_('\u0047'), 

1193 u_('\u0048'), u_('\u0049'), u_('\u004a'), u_('\u004b'), u_('\u004c'), u_('\u004d'), u_('\u004e'), u_('\u004f'), 

1194 u_('\u0050'), u_('\u0051'), u_('\u0052'), u_('\u0053'), u_('\u0054'), u_('\u0055'), u_('\u0056'), u_('\u0057'), 

1195 u_('\u0058'), u_('\u0059'), u_('\u005a'), u_('\u005b'), u_('\u005c'), u_('\u005d'), u_('\u005e'), u_('\u005f'), 

1196 u_('\u0060'), u_('\u0061'), u_('\u0062'), u_('\u0063'), u_('\u0064'), u_('\u0065'), u_('\u0066'), u_('\u0067'), 

1197 u_('\u0068'), u_('\u0069'), u_('\u006a'), u_('\u006b'), u_('\u006c'), u_('\u006d'), u_('\u006e'), u_('\u006f'), 

1198 u_('\u0070'), u_('\u0071'), u_('\u0072'), u_('\u0073'), u_('\u0074'), u_('\u0075'), u_('\u0076'), u_('\u0077'), 

1199 u_('\u0078'), u_('\u0079'), u_('\u007a'), u_('\u007b'), u_('\u007c'), u_('\u007d'), u_('\u007e'), u_('\u0000'), 

1200 u_('\u2022'), u_('\u2020'), u_('\u2021'), u_('\u2026'), u_('\u2014'), u_('\u2013'), u_('\u0192'), u_('\u2044'), 

1201 u_('\u2039'), u_('\u203a'), u_('\u2212'), u_('\u2030'), u_('\u201e'), u_('\u201c'), u_('\u201d'), u_('\u2018'), 

1202 u_('\u2019'), u_('\u201a'), u_('\u2122'), u_('\ufb01'), u_('\ufb02'), u_('\u0141'), u_('\u0152'), u_('\u0160'), 

1203 u_('\u0178'), u_('\u017d'), u_('\u0131'), u_('\u0142'), u_('\u0153'), u_('\u0161'), u_('\u017e'), u_('\u0000'), 

1204 u_('\u20ac'), u_('\u00a1'), u_('\u00a2'), u_('\u00a3'), u_('\u00a4'), u_('\u00a5'), u_('\u00a6'), u_('\u00a7'), 

1205 u_('\u00a8'), u_('\u00a9'), u_('\u00aa'), u_('\u00ab'), u_('\u00ac'), u_('\u0000'), u_('\u00ae'), u_('\u00af'), 

1206 u_('\u00b0'), u_('\u00b1'), u_('\u00b2'), u_('\u00b3'), u_('\u00b4'), u_('\u00b5'), u_('\u00b6'), u_('\u00b7'), 

1207 u_('\u00b8'), u_('\u00b9'), u_('\u00ba'), u_('\u00bb'), u_('\u00bc'), u_('\u00bd'), u_('\u00be'), u_('\u00bf'), 

1208 u_('\u00c0'), u_('\u00c1'), u_('\u00c2'), u_('\u00c3'), u_('\u00c4'), u_('\u00c5'), u_('\u00c6'), u_('\u00c7'), 

1209 u_('\u00c8'), u_('\u00c9'), u_('\u00ca'), u_('\u00cb'), u_('\u00cc'), u_('\u00cd'), u_('\u00ce'), u_('\u00cf'), 

1210 u_('\u00d0'), u_('\u00d1'), u_('\u00d2'), u_('\u00d3'), u_('\u00d4'), u_('\u00d5'), u_('\u00d6'), u_('\u00d7'), 

1211 u_('\u00d8'), u_('\u00d9'), u_('\u00da'), u_('\u00db'), u_('\u00dc'), u_('\u00dd'), u_('\u00de'), u_('\u00df'), 

1212 u_('\u00e0'), u_('\u00e1'), u_('\u00e2'), u_('\u00e3'), u_('\u00e4'), u_('\u00e5'), u_('\u00e6'), u_('\u00e7'), 

1213 u_('\u00e8'), u_('\u00e9'), u_('\u00ea'), u_('\u00eb'), u_('\u00ec'), u_('\u00ed'), u_('\u00ee'), u_('\u00ef'), 

1214 u_('\u00f0'), u_('\u00f1'), u_('\u00f2'), u_('\u00f3'), u_('\u00f4'), u_('\u00f5'), u_('\u00f6'), u_('\u00f7'), 

1215 u_('\u00f8'), u_('\u00f9'), u_('\u00fa'), u_('\u00fb'), u_('\u00fc'), u_('\u00fd'), u_('\u00fe'), u_('\u00ff') 

1216) 

1217 

1218assert len(_pdfDocEncoding) == 256 

1219 

1220_pdfDocEncoding_rev = {} 

1221for i in range(256): 

1222 char = _pdfDocEncoding[i] 

1223 if char == u_("\u0000"): 

1224 continue 

1225 assert char not in _pdfDocEncoding_rev 

1226 _pdfDocEncoding_rev[char] = i