Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# vim: sw=4:expandtab:foldmethod=marker 

2# 

3# Copyright (c) 2006, Mathieu Fenniak 

4# All rights reserved. 

5# 

6# Redistribution and use in source and binary forms, with or without 

7# modification, are permitted provided that the following conditions are 

8# met: 

9# 

10# * Redistributions of source code must retain the above copyright notice, 

11# this list of conditions and the following disclaimer. 

12# * Redistributions in binary form must reproduce the above copyright notice, 

13# this list of conditions and the following disclaimer in the documentation 

14# and/or other materials provided with the distribution. 

15# * The name of the author may not be used to endorse or promote products 

16# derived from this software without specific prior written permission. 

17# 

18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 

19# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 

20# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 

21# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 

22# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 

23# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 

24# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 

25# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 

26# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 

27# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 

28# POSSIBILITY OF SUCH DAMAGE. 

29 

30 

31""" 

32Implementation of stream filters for PDF. 

33""" 

34__author__ = "Mathieu Fenniak" 

35__author_email__ = "biziqe@mathieu.fenniak.net" 

36 

37from .utils import PdfReadError, ord_, chr_ 

38from sys import version_info 

39if version_info < ( 3, 0 ): 

40 from cStringIO import StringIO 

41else: 

42 from io import StringIO 

43 import struct 

44 

45try: 

46 import zlib 

47 

48 def decompress(data): 

49 return zlib.decompress(data) 

50 

51 def compress(data): 

52 return zlib.compress(data) 

53 

54except ImportError: 

55 # Unable to import zlib. Attempt to use the System.IO.Compression 

56 # library from the .NET framework. (IronPython only) 

57 import System 

58 from System import IO, Collections, Array 

59 

60 def _string_to_bytearr(buf): 

61 retval = Array.CreateInstance(System.Byte, len(buf)) 

62 for i in range(len(buf)): 

63 retval[i] = ord(buf[i]) 

64 return retval 

65 

66 def _bytearr_to_string(bytes): 

67 retval = "" 

68 for i in range(bytes.Length): 

69 retval += chr(bytes[i]) 

70 return retval 

71 

72 def _read_bytes(stream): 

73 ms = IO.MemoryStream() 

74 buf = Array.CreateInstance(System.Byte, 2048) 

75 while True: 

76 bytes = stream.Read(buf, 0, buf.Length) 

77 if bytes == 0: 

78 break 

79 else: 

80 ms.Write(buf, 0, bytes) 

81 retval = ms.ToArray() 

82 ms.Close() 

83 return retval 

84 

85 def decompress(data): 

86 bytes = _string_to_bytearr(data) 

87 ms = IO.MemoryStream() 

88 ms.Write(bytes, 0, bytes.Length) 

89 ms.Position = 0 # fseek 0 

90 gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress) 

91 bytes = _read_bytes(gz) 

92 retval = _bytearr_to_string(bytes) 

93 gz.Close() 

94 return retval 

95 

96 def compress(data): 

97 bytes = _string_to_bytearr(data) 

98 ms = IO.MemoryStream() 

99 gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True) 

100 gz.Write(bytes, 0, bytes.Length) 

101 gz.Close() 

102 ms.Position = 0 # fseek 0 

103 bytes = ms.ToArray() 

104 retval = _bytearr_to_string(bytes) 

105 ms.Close() 

106 return retval 

107 

108 

109class FlateDecode(object): 

110 def decode(data, decodeParms): 

111 data = decompress(data) 

112 predictor = 1 

113 if decodeParms: 

114 try: 

115 predictor = decodeParms.get("/Predictor", 1) 

116 except AttributeError: 

117 pass # usually an array with a null object was read 

118 

119 # predictor 1 == no predictor 

120 if predictor != 1: 

121 columns = decodeParms["/Columns"] 

122 # PNG prediction: 

123 if predictor >= 10 and predictor <= 15: 

124 output = StringIO() 

125 # PNG prediction can vary from row to row 

126 rowlength = columns + 1 

127 assert len(data) % rowlength == 0 

128 prev_rowdata = (0,) * rowlength 

129 for row in range(len(data) // rowlength): 

130 rowdata = [ord_(x) for x in data[(row*rowlength):((row+1)*rowlength)]] 

131 filterByte = rowdata[0] 

132 if filterByte == 0: 

133 pass 

134 elif filterByte == 1: 

135 for i in range(2, rowlength): 

136 rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256 

137 elif filterByte == 2: 

138 for i in range(1, rowlength): 

139 rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256 

140 else: 

141 # unsupported PNG filter 

142 raise PdfReadError("Unsupported PNG filter %r" % filterByte) 

143 prev_rowdata = rowdata 

144 output.write(''.join([chr(x) for x in rowdata[1:]])) 

145 data = output.getvalue() 

146 else: 

147 # unsupported predictor 

148 raise PdfReadError("Unsupported flatedecode predictor %r" % predictor) 

149 return data 

150 decode = staticmethod(decode) 

151 

152 def encode(data): 

153 return compress(data) 

154 encode = staticmethod(encode) 

155 

156 

157class ASCIIHexDecode(object): 

158 def decode(data, decodeParms=None): 

159 retval = "" 

160 char = "" 

161 x = 0 

162 while True: 

163 c = data[x] 

164 if c == ">": 

165 break 

166 elif c.isspace(): 

167 x += 1 

168 continue 

169 char += c 

170 if len(char) == 2: 

171 retval += chr(int(char, base=16)) 

172 char = "" 

173 x += 1 

174 assert char == "" 

175 return retval 

176 decode = staticmethod(decode) 

177 

178 

179class LZWDecode(object): 

180 """Taken from: 

181 http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm 

182 """ 

183 class decoder(object): 

184 def __init__(self, data): 

185 self.STOP=257 

186 self.CLEARDICT=256 

187 self.data=data 

188 self.bytepos=0 

189 self.bitpos=0 

190 self.dict=[""]*4096 

191 for i in range(256): 

192 self.dict[i]=chr(i) 

193 self.resetDict() 

194 

195 def resetDict(self): 

196 self.dictlen=258 

197 self.bitspercode=9 

198 

199 def nextCode(self): 

200 fillbits=self.bitspercode 

201 value=0 

202 while fillbits>0 : 

203 if self.bytepos >= len(self.data): 

204 return -1 

205 nextbits=ord(self.data[self.bytepos]) 

206 bitsfromhere=8-self.bitpos 

207 if bitsfromhere>fillbits: 

208 bitsfromhere=fillbits 

209 value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) & 

210 (0xff >> (8-bitsfromhere))) << 

211 (fillbits-bitsfromhere)) 

212 fillbits -= bitsfromhere 

213 self.bitpos += bitsfromhere 

214 if self.bitpos >=8: 

215 self.bitpos=0 

216 self.bytepos = self.bytepos+1 

217 return value 

218 

219 def decode(self): 

220 """ algorithm derived from: 

221 http://www.rasip.fer.hr/research/compress/algorithms/fund/lz/lzw.html 

222 and the PDFReference 

223 """ 

224 cW = self.CLEARDICT; 

225 baos="" 

226 while True: 

227 pW = cW; 

228 cW = self.nextCode(); 

229 if cW == -1: 

230 raise PdfReadError("Missed the stop code in LZWDecode!") 

231 if cW == self.STOP: 

232 break; 

233 elif cW == self.CLEARDICT: 

234 self.resetDict(); 

235 elif pW == self.CLEARDICT: 

236 baos+=self.dict[cW] 

237 else: 

238 if cW < self.dictlen: 

239 baos += self.dict[cW] 

240 p=self.dict[pW]+self.dict[cW][0] 

241 self.dict[self.dictlen]=p 

242 self.dictlen+=1 

243 else: 

244 p=self.dict[pW]+self.dict[pW][0] 

245 baos+=p 

246 self.dict[self.dictlen] = p; 

247 self.dictlen+=1 

248 if (self.dictlen >= (1 << self.bitspercode) - 1 and 

249 self.bitspercode < 12): 

250 self.bitspercode+=1 

251 return baos 

252 

253 @staticmethod 

254 def decode(data,decodeParams=None): 

255 return LZWDecode.decoder(data).decode() 

256 

257 

258class ASCII85Decode(object): 

259 def decode(data, decodeParms=None): 

260 if version_info < ( 3, 0 ): 

261 retval = "" 

262 group = [] 

263 x = 0 

264 hitEod = False 

265 # remove all whitespace from data 

266 data = [y for y in data if not (y in ' \n\r\t')] 

267 while not hitEod: 

268 c = data[x] 

269 if len(retval) == 0 and c == "<" and data[x+1] == "~": 

270 x += 2 

271 continue 

272 #elif c.isspace(): 

273 # x += 1 

274 # continue 

275 elif c == 'z': 

276 assert len(group) == 0 

277 retval += '\x00\x00\x00\x00' 

278 x += 1 

279 continue 

280 elif c == "~" and data[x+1] == ">": 

281 if len(group) != 0: 

282 # cannot have a final group of just 1 char 

283 assert len(group) > 1 

284 cnt = len(group) - 1 

285 group += [ 85, 85, 85 ] 

286 hitEod = cnt 

287 else: 

288 break 

289 else: 

290 c = ord(c) - 33 

291 assert c >= 0 and c < 85 

292 group += [ c ] 

293 if len(group) >= 5: 

294 b = group[0] * (85**4) + \ 

295 group[1] * (85**3) + \ 

296 group[2] * (85**2) + \ 

297 group[3] * 85 + \ 

298 group[4] 

299 assert b < (2**32 - 1) 

300 c4 = chr((b >> 0) % 256) 

301 c3 = chr((b >> 8) % 256) 

302 c2 = chr((b >> 16) % 256) 

303 c1 = chr(b >> 24) 

304 retval += (c1 + c2 + c3 + c4) 

305 if hitEod: 

306 retval = retval[:-4+hitEod] 

307 group = [] 

308 x += 1 

309 return retval 

310 else: 

311 if isinstance(data, str): 

312 data = data.encode('ascii') 

313 n = b = 0 

314 out = bytearray() 

315 for c in data: 

316 if ord('!') <= c and c <= ord('u'): 

317 n += 1 

318 b = b*85+(c-33) 

319 if n == 5: 

320 out += struct.pack(b'>L',b) 

321 n = b = 0 

322 elif c == ord('z'): 

323 assert n == 0 

324 out += b'\0\0\0\0' 

325 elif c == ord('~'): 

326 if n: 

327 for _ in range(5-n): 

328 b = b*85+84 

329 out += struct.pack(b'>L',b)[:n-1] 

330 break 

331 return bytes(out) 

332 decode = staticmethod(decode) 

333 

334 

335def decodeStreamData(stream): 

336 from .generic import NameObject 

337 filters = stream.get("/Filter", ()) 

338 if len(filters) and not isinstance(filters[0], NameObject): 

339 # we have a single filter instance 

340 filters = (filters,) 

341 data = stream._data 

342 # If there is not data to decode we should not try to decode the data. 

343 if data: 

344 for filterType in filters: 

345 if filterType == "/FlateDecode" or filterType == "/Fl": 

346 data = FlateDecode.decode(data, stream.get("/DecodeParms")) 

347 elif filterType == "/ASCIIHexDecode" or filterType == "/AHx": 

348 data = ASCIIHexDecode.decode(data) 

349 elif filterType == "/LZWDecode" or filterType == "/LZW": 

350 data = LZWDecode.decode(data, stream.get("/DecodeParms")) 

351 elif filterType == "/ASCII85Decode" or filterType == "/A85": 

352 data = ASCII85Decode.decode(data) 

353 elif filterType == "/Crypt": 

354 decodeParams = stream.get("/DecodeParams", {}) 

355 if "/Name" not in decodeParams and "/Type" not in decodeParams: 

356 pass 

357 else: 

358 raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet") 

359 else: 

360 # unsupported filter 

361 raise NotImplementedError("unsupported filter %s" % filterType) 

362 return data