Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pygments/filters/__init__.py : 24%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3 pygments.filters
4 ~~~~~~~~~~~~~~~~
6 Module containing filter lookup functions and default
7 filters.
9 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
10 :license: BSD, see LICENSE for details.
11"""
13import re
15from pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \
16 string_to_tokentype
17from pygments.filter import Filter
18from pygments.util import get_list_opt, get_int_opt, get_bool_opt, \
19 get_choice_opt, ClassNotFound, OptionError
20from pygments.plugin import find_plugin_filters
23def find_filter_class(filtername):
24 """Lookup a filter by name. Return None if not found."""
25 if filtername in FILTERS:
26 return FILTERS[filtername]
27 for name, cls in find_plugin_filters():
28 if name == filtername:
29 return cls
30 return None
33def get_filter_by_name(filtername, **options):
34 """Return an instantiated filter.
36 Options are passed to the filter initializer if wanted.
37 Raise a ClassNotFound if not found.
38 """
39 cls = find_filter_class(filtername)
40 if cls:
41 return cls(**options)
42 else:
43 raise ClassNotFound('filter %r not found' % filtername)
46def get_all_filters():
47 """Return a generator of all filter names."""
48 yield from FILTERS
49 for name, _ in find_plugin_filters():
50 yield name
53def _replace_special(ttype, value, regex, specialttype,
54 replacefunc=lambda x: x):
55 last = 0
56 for match in regex.finditer(value):
57 start, end = match.start(), match.end()
58 if start != last:
59 yield ttype, value[last:start]
60 yield specialttype, replacefunc(value[start:end])
61 last = end
62 if last != len(value):
63 yield ttype, value[last:]
66class CodeTagFilter(Filter):
67 """Highlight special code tags in comments and docstrings.
69 Options accepted:
71 `codetags` : list of strings
72 A list of strings that are flagged as code tags. The default is to
73 highlight ``XXX``, ``TODO``, ``BUG`` and ``NOTE``.
74 """
76 def __init__(self, **options):
77 Filter.__init__(self, **options)
78 tags = get_list_opt(options, 'codetags',
79 ['XXX', 'TODO', 'BUG', 'NOTE'])
80 self.tag_re = re.compile(r'\b(%s)\b' % '|'.join([
81 re.escape(tag) for tag in tags if tag
82 ]))
84 def filter(self, lexer, stream):
85 regex = self.tag_re
86 for ttype, value in stream:
87 if ttype in String.Doc or \
88 ttype in Comment and \
89 ttype not in Comment.Preproc:
90 yield from _replace_special(ttype, value, regex, Comment.Special)
91 else:
92 yield ttype, value
95class SymbolFilter(Filter):
96 """Convert mathematical symbols such as \\<longrightarrow> in Isabelle
97 or \\longrightarrow in LaTeX into Unicode characters.
99 This is mostly useful for HTML or console output when you want to
100 approximate the source rendering you'd see in an IDE.
102 Options accepted:
104 `lang` : string
105 The symbol language. Must be one of ``'isabelle'`` or
106 ``'latex'``. The default is ``'isabelle'``.
107 """
109 latex_symbols = {
110 '\\alpha' : '\U000003b1',
111 '\\beta' : '\U000003b2',
112 '\\gamma' : '\U000003b3',
113 '\\delta' : '\U000003b4',
114 '\\varepsilon' : '\U000003b5',
115 '\\zeta' : '\U000003b6',
116 '\\eta' : '\U000003b7',
117 '\\vartheta' : '\U000003b8',
118 '\\iota' : '\U000003b9',
119 '\\kappa' : '\U000003ba',
120 '\\lambda' : '\U000003bb',
121 '\\mu' : '\U000003bc',
122 '\\nu' : '\U000003bd',
123 '\\xi' : '\U000003be',
124 '\\pi' : '\U000003c0',
125 '\\varrho' : '\U000003c1',
126 '\\sigma' : '\U000003c3',
127 '\\tau' : '\U000003c4',
128 '\\upsilon' : '\U000003c5',
129 '\\varphi' : '\U000003c6',
130 '\\chi' : '\U000003c7',
131 '\\psi' : '\U000003c8',
132 '\\omega' : '\U000003c9',
133 '\\Gamma' : '\U00000393',
134 '\\Delta' : '\U00000394',
135 '\\Theta' : '\U00000398',
136 '\\Lambda' : '\U0000039b',
137 '\\Xi' : '\U0000039e',
138 '\\Pi' : '\U000003a0',
139 '\\Sigma' : '\U000003a3',
140 '\\Upsilon' : '\U000003a5',
141 '\\Phi' : '\U000003a6',
142 '\\Psi' : '\U000003a8',
143 '\\Omega' : '\U000003a9',
144 '\\leftarrow' : '\U00002190',
145 '\\longleftarrow' : '\U000027f5',
146 '\\rightarrow' : '\U00002192',
147 '\\longrightarrow' : '\U000027f6',
148 '\\Leftarrow' : '\U000021d0',
149 '\\Longleftarrow' : '\U000027f8',
150 '\\Rightarrow' : '\U000021d2',
151 '\\Longrightarrow' : '\U000027f9',
152 '\\leftrightarrow' : '\U00002194',
153 '\\longleftrightarrow' : '\U000027f7',
154 '\\Leftrightarrow' : '\U000021d4',
155 '\\Longleftrightarrow' : '\U000027fa',
156 '\\mapsto' : '\U000021a6',
157 '\\longmapsto' : '\U000027fc',
158 '\\relbar' : '\U00002500',
159 '\\Relbar' : '\U00002550',
160 '\\hookleftarrow' : '\U000021a9',
161 '\\hookrightarrow' : '\U000021aa',
162 '\\leftharpoondown' : '\U000021bd',
163 '\\rightharpoondown' : '\U000021c1',
164 '\\leftharpoonup' : '\U000021bc',
165 '\\rightharpoonup' : '\U000021c0',
166 '\\rightleftharpoons' : '\U000021cc',
167 '\\leadsto' : '\U0000219d',
168 '\\downharpoonleft' : '\U000021c3',
169 '\\downharpoonright' : '\U000021c2',
170 '\\upharpoonleft' : '\U000021bf',
171 '\\upharpoonright' : '\U000021be',
172 '\\restriction' : '\U000021be',
173 '\\uparrow' : '\U00002191',
174 '\\Uparrow' : '\U000021d1',
175 '\\downarrow' : '\U00002193',
176 '\\Downarrow' : '\U000021d3',
177 '\\updownarrow' : '\U00002195',
178 '\\Updownarrow' : '\U000021d5',
179 '\\langle' : '\U000027e8',
180 '\\rangle' : '\U000027e9',
181 '\\lceil' : '\U00002308',
182 '\\rceil' : '\U00002309',
183 '\\lfloor' : '\U0000230a',
184 '\\rfloor' : '\U0000230b',
185 '\\flqq' : '\U000000ab',
186 '\\frqq' : '\U000000bb',
187 '\\bot' : '\U000022a5',
188 '\\top' : '\U000022a4',
189 '\\wedge' : '\U00002227',
190 '\\bigwedge' : '\U000022c0',
191 '\\vee' : '\U00002228',
192 '\\bigvee' : '\U000022c1',
193 '\\forall' : '\U00002200',
194 '\\exists' : '\U00002203',
195 '\\nexists' : '\U00002204',
196 '\\neg' : '\U000000ac',
197 '\\Box' : '\U000025a1',
198 '\\Diamond' : '\U000025c7',
199 '\\vdash' : '\U000022a2',
200 '\\models' : '\U000022a8',
201 '\\dashv' : '\U000022a3',
202 '\\surd' : '\U0000221a',
203 '\\le' : '\U00002264',
204 '\\ge' : '\U00002265',
205 '\\ll' : '\U0000226a',
206 '\\gg' : '\U0000226b',
207 '\\lesssim' : '\U00002272',
208 '\\gtrsim' : '\U00002273',
209 '\\lessapprox' : '\U00002a85',
210 '\\gtrapprox' : '\U00002a86',
211 '\\in' : '\U00002208',
212 '\\notin' : '\U00002209',
213 '\\subset' : '\U00002282',
214 '\\supset' : '\U00002283',
215 '\\subseteq' : '\U00002286',
216 '\\supseteq' : '\U00002287',
217 '\\sqsubset' : '\U0000228f',
218 '\\sqsupset' : '\U00002290',
219 '\\sqsubseteq' : '\U00002291',
220 '\\sqsupseteq' : '\U00002292',
221 '\\cap' : '\U00002229',
222 '\\bigcap' : '\U000022c2',
223 '\\cup' : '\U0000222a',
224 '\\bigcup' : '\U000022c3',
225 '\\sqcup' : '\U00002294',
226 '\\bigsqcup' : '\U00002a06',
227 '\\sqcap' : '\U00002293',
228 '\\Bigsqcap' : '\U00002a05',
229 '\\setminus' : '\U00002216',
230 '\\propto' : '\U0000221d',
231 '\\uplus' : '\U0000228e',
232 '\\bigplus' : '\U00002a04',
233 '\\sim' : '\U0000223c',
234 '\\doteq' : '\U00002250',
235 '\\simeq' : '\U00002243',
236 '\\approx' : '\U00002248',
237 '\\asymp' : '\U0000224d',
238 '\\cong' : '\U00002245',
239 '\\equiv' : '\U00002261',
240 '\\Join' : '\U000022c8',
241 '\\bowtie' : '\U00002a1d',
242 '\\prec' : '\U0000227a',
243 '\\succ' : '\U0000227b',
244 '\\preceq' : '\U0000227c',
245 '\\succeq' : '\U0000227d',
246 '\\parallel' : '\U00002225',
247 '\\mid' : '\U000000a6',
248 '\\pm' : '\U000000b1',
249 '\\mp' : '\U00002213',
250 '\\times' : '\U000000d7',
251 '\\div' : '\U000000f7',
252 '\\cdot' : '\U000022c5',
253 '\\star' : '\U000022c6',
254 '\\circ' : '\U00002218',
255 '\\dagger' : '\U00002020',
256 '\\ddagger' : '\U00002021',
257 '\\lhd' : '\U000022b2',
258 '\\rhd' : '\U000022b3',
259 '\\unlhd' : '\U000022b4',
260 '\\unrhd' : '\U000022b5',
261 '\\triangleleft' : '\U000025c3',
262 '\\triangleright' : '\U000025b9',
263 '\\triangle' : '\U000025b3',
264 '\\triangleq' : '\U0000225c',
265 '\\oplus' : '\U00002295',
266 '\\bigoplus' : '\U00002a01',
267 '\\otimes' : '\U00002297',
268 '\\bigotimes' : '\U00002a02',
269 '\\odot' : '\U00002299',
270 '\\bigodot' : '\U00002a00',
271 '\\ominus' : '\U00002296',
272 '\\oslash' : '\U00002298',
273 '\\dots' : '\U00002026',
274 '\\cdots' : '\U000022ef',
275 '\\sum' : '\U00002211',
276 '\\prod' : '\U0000220f',
277 '\\coprod' : '\U00002210',
278 '\\infty' : '\U0000221e',
279 '\\int' : '\U0000222b',
280 '\\oint' : '\U0000222e',
281 '\\clubsuit' : '\U00002663',
282 '\\diamondsuit' : '\U00002662',
283 '\\heartsuit' : '\U00002661',
284 '\\spadesuit' : '\U00002660',
285 '\\aleph' : '\U00002135',
286 '\\emptyset' : '\U00002205',
287 '\\nabla' : '\U00002207',
288 '\\partial' : '\U00002202',
289 '\\flat' : '\U0000266d',
290 '\\natural' : '\U0000266e',
291 '\\sharp' : '\U0000266f',
292 '\\angle' : '\U00002220',
293 '\\copyright' : '\U000000a9',
294 '\\textregistered' : '\U000000ae',
295 '\\textonequarter' : '\U000000bc',
296 '\\textonehalf' : '\U000000bd',
297 '\\textthreequarters' : '\U000000be',
298 '\\textordfeminine' : '\U000000aa',
299 '\\textordmasculine' : '\U000000ba',
300 '\\euro' : '\U000020ac',
301 '\\pounds' : '\U000000a3',
302 '\\yen' : '\U000000a5',
303 '\\textcent' : '\U000000a2',
304 '\\textcurrency' : '\U000000a4',
305 '\\textdegree' : '\U000000b0',
306 }
308 isabelle_symbols = {
309 '\\<zero>' : '\U0001d7ec',
310 '\\<one>' : '\U0001d7ed',
311 '\\<two>' : '\U0001d7ee',
312 '\\<three>' : '\U0001d7ef',
313 '\\<four>' : '\U0001d7f0',
314 '\\<five>' : '\U0001d7f1',
315 '\\<six>' : '\U0001d7f2',
316 '\\<seven>' : '\U0001d7f3',
317 '\\<eight>' : '\U0001d7f4',
318 '\\<nine>' : '\U0001d7f5',
319 '\\<A>' : '\U0001d49c',
320 '\\<B>' : '\U0000212c',
321 '\\<C>' : '\U0001d49e',
322 '\\<D>' : '\U0001d49f',
323 '\\<E>' : '\U00002130',
324 '\\<F>' : '\U00002131',
325 '\\<G>' : '\U0001d4a2',
326 '\\<H>' : '\U0000210b',
327 '\\<I>' : '\U00002110',
328 '\\<J>' : '\U0001d4a5',
329 '\\<K>' : '\U0001d4a6',
330 '\\<L>' : '\U00002112',
331 '\\<M>' : '\U00002133',
332 '\\<N>' : '\U0001d4a9',
333 '\\<O>' : '\U0001d4aa',
334 '\\<P>' : '\U0001d4ab',
335 '\\<Q>' : '\U0001d4ac',
336 '\\<R>' : '\U0000211b',
337 '\\<S>' : '\U0001d4ae',
338 '\\<T>' : '\U0001d4af',
339 '\\<U>' : '\U0001d4b0',
340 '\\<V>' : '\U0001d4b1',
341 '\\<W>' : '\U0001d4b2',
342 '\\<X>' : '\U0001d4b3',
343 '\\<Y>' : '\U0001d4b4',
344 '\\<Z>' : '\U0001d4b5',
345 '\\<a>' : '\U0001d5ba',
346 '\\<b>' : '\U0001d5bb',
347 '\\<c>' : '\U0001d5bc',
348 '\\<d>' : '\U0001d5bd',
349 '\\<e>' : '\U0001d5be',
350 '\\<f>' : '\U0001d5bf',
351 '\\<g>' : '\U0001d5c0',
352 '\\<h>' : '\U0001d5c1',
353 '\\<i>' : '\U0001d5c2',
354 '\\<j>' : '\U0001d5c3',
355 '\\<k>' : '\U0001d5c4',
356 '\\<l>' : '\U0001d5c5',
357 '\\<m>' : '\U0001d5c6',
358 '\\<n>' : '\U0001d5c7',
359 '\\<o>' : '\U0001d5c8',
360 '\\<p>' : '\U0001d5c9',
361 '\\<q>' : '\U0001d5ca',
362 '\\<r>' : '\U0001d5cb',
363 '\\<s>' : '\U0001d5cc',
364 '\\<t>' : '\U0001d5cd',
365 '\\<u>' : '\U0001d5ce',
366 '\\<v>' : '\U0001d5cf',
367 '\\<w>' : '\U0001d5d0',
368 '\\<x>' : '\U0001d5d1',
369 '\\<y>' : '\U0001d5d2',
370 '\\<z>' : '\U0001d5d3',
371 '\\<AA>' : '\U0001d504',
372 '\\<BB>' : '\U0001d505',
373 '\\<CC>' : '\U0000212d',
374 '\\<DD>' : '\U0001d507',
375 '\\<EE>' : '\U0001d508',
376 '\\<FF>' : '\U0001d509',
377 '\\<GG>' : '\U0001d50a',
378 '\\<HH>' : '\U0000210c',
379 '\\<II>' : '\U00002111',
380 '\\<JJ>' : '\U0001d50d',
381 '\\<KK>' : '\U0001d50e',
382 '\\<LL>' : '\U0001d50f',
383 '\\<MM>' : '\U0001d510',
384 '\\<NN>' : '\U0001d511',
385 '\\<OO>' : '\U0001d512',
386 '\\<PP>' : '\U0001d513',
387 '\\<QQ>' : '\U0001d514',
388 '\\<RR>' : '\U0000211c',
389 '\\<SS>' : '\U0001d516',
390 '\\<TT>' : '\U0001d517',
391 '\\<UU>' : '\U0001d518',
392 '\\<VV>' : '\U0001d519',
393 '\\<WW>' : '\U0001d51a',
394 '\\<XX>' : '\U0001d51b',
395 '\\<YY>' : '\U0001d51c',
396 '\\<ZZ>' : '\U00002128',
397 '\\<aa>' : '\U0001d51e',
398 '\\<bb>' : '\U0001d51f',
399 '\\<cc>' : '\U0001d520',
400 '\\<dd>' : '\U0001d521',
401 '\\<ee>' : '\U0001d522',
402 '\\<ff>' : '\U0001d523',
403 '\\<gg>' : '\U0001d524',
404 '\\<hh>' : '\U0001d525',
405 '\\<ii>' : '\U0001d526',
406 '\\<jj>' : '\U0001d527',
407 '\\<kk>' : '\U0001d528',
408 '\\<ll>' : '\U0001d529',
409 '\\<mm>' : '\U0001d52a',
410 '\\<nn>' : '\U0001d52b',
411 '\\<oo>' : '\U0001d52c',
412 '\\<pp>' : '\U0001d52d',
413 '\\<qq>' : '\U0001d52e',
414 '\\<rr>' : '\U0001d52f',
415 '\\<ss>' : '\U0001d530',
416 '\\<tt>' : '\U0001d531',
417 '\\<uu>' : '\U0001d532',
418 '\\<vv>' : '\U0001d533',
419 '\\<ww>' : '\U0001d534',
420 '\\<xx>' : '\U0001d535',
421 '\\<yy>' : '\U0001d536',
422 '\\<zz>' : '\U0001d537',
423 '\\<alpha>' : '\U000003b1',
424 '\\<beta>' : '\U000003b2',
425 '\\<gamma>' : '\U000003b3',
426 '\\<delta>' : '\U000003b4',
427 '\\<epsilon>' : '\U000003b5',
428 '\\<zeta>' : '\U000003b6',
429 '\\<eta>' : '\U000003b7',
430 '\\<theta>' : '\U000003b8',
431 '\\<iota>' : '\U000003b9',
432 '\\<kappa>' : '\U000003ba',
433 '\\<lambda>' : '\U000003bb',
434 '\\<mu>' : '\U000003bc',
435 '\\<nu>' : '\U000003bd',
436 '\\<xi>' : '\U000003be',
437 '\\<pi>' : '\U000003c0',
438 '\\<rho>' : '\U000003c1',
439 '\\<sigma>' : '\U000003c3',
440 '\\<tau>' : '\U000003c4',
441 '\\<upsilon>' : '\U000003c5',
442 '\\<phi>' : '\U000003c6',
443 '\\<chi>' : '\U000003c7',
444 '\\<psi>' : '\U000003c8',
445 '\\<omega>' : '\U000003c9',
446 '\\<Gamma>' : '\U00000393',
447 '\\<Delta>' : '\U00000394',
448 '\\<Theta>' : '\U00000398',
449 '\\<Lambda>' : '\U0000039b',
450 '\\<Xi>' : '\U0000039e',
451 '\\<Pi>' : '\U000003a0',
452 '\\<Sigma>' : '\U000003a3',
453 '\\<Upsilon>' : '\U000003a5',
454 '\\<Phi>' : '\U000003a6',
455 '\\<Psi>' : '\U000003a8',
456 '\\<Omega>' : '\U000003a9',
457 '\\<bool>' : '\U0001d539',
458 '\\<complex>' : '\U00002102',
459 '\\<nat>' : '\U00002115',
460 '\\<rat>' : '\U0000211a',
461 '\\<real>' : '\U0000211d',
462 '\\<int>' : '\U00002124',
463 '\\<leftarrow>' : '\U00002190',
464 '\\<longleftarrow>' : '\U000027f5',
465 '\\<rightarrow>' : '\U00002192',
466 '\\<longrightarrow>' : '\U000027f6',
467 '\\<Leftarrow>' : '\U000021d0',
468 '\\<Longleftarrow>' : '\U000027f8',
469 '\\<Rightarrow>' : '\U000021d2',
470 '\\<Longrightarrow>' : '\U000027f9',
471 '\\<leftrightarrow>' : '\U00002194',
472 '\\<longleftrightarrow>' : '\U000027f7',
473 '\\<Leftrightarrow>' : '\U000021d4',
474 '\\<Longleftrightarrow>' : '\U000027fa',
475 '\\<mapsto>' : '\U000021a6',
476 '\\<longmapsto>' : '\U000027fc',
477 '\\<midarrow>' : '\U00002500',
478 '\\<Midarrow>' : '\U00002550',
479 '\\<hookleftarrow>' : '\U000021a9',
480 '\\<hookrightarrow>' : '\U000021aa',
481 '\\<leftharpoondown>' : '\U000021bd',
482 '\\<rightharpoondown>' : '\U000021c1',
483 '\\<leftharpoonup>' : '\U000021bc',
484 '\\<rightharpoonup>' : '\U000021c0',
485 '\\<rightleftharpoons>' : '\U000021cc',
486 '\\<leadsto>' : '\U0000219d',
487 '\\<downharpoonleft>' : '\U000021c3',
488 '\\<downharpoonright>' : '\U000021c2',
489 '\\<upharpoonleft>' : '\U000021bf',
490 '\\<upharpoonright>' : '\U000021be',
491 '\\<restriction>' : '\U000021be',
492 '\\<Colon>' : '\U00002237',
493 '\\<up>' : '\U00002191',
494 '\\<Up>' : '\U000021d1',
495 '\\<down>' : '\U00002193',
496 '\\<Down>' : '\U000021d3',
497 '\\<updown>' : '\U00002195',
498 '\\<Updown>' : '\U000021d5',
499 '\\<langle>' : '\U000027e8',
500 '\\<rangle>' : '\U000027e9',
501 '\\<lceil>' : '\U00002308',
502 '\\<rceil>' : '\U00002309',
503 '\\<lfloor>' : '\U0000230a',
504 '\\<rfloor>' : '\U0000230b',
505 '\\<lparr>' : '\U00002987',
506 '\\<rparr>' : '\U00002988',
507 '\\<lbrakk>' : '\U000027e6',
508 '\\<rbrakk>' : '\U000027e7',
509 '\\<lbrace>' : '\U00002983',
510 '\\<rbrace>' : '\U00002984',
511 '\\<guillemotleft>' : '\U000000ab',
512 '\\<guillemotright>' : '\U000000bb',
513 '\\<bottom>' : '\U000022a5',
514 '\\<top>' : '\U000022a4',
515 '\\<and>' : '\U00002227',
516 '\\<And>' : '\U000022c0',
517 '\\<or>' : '\U00002228',
518 '\\<Or>' : '\U000022c1',
519 '\\<forall>' : '\U00002200',
520 '\\<exists>' : '\U00002203',
521 '\\<nexists>' : '\U00002204',
522 '\\<not>' : '\U000000ac',
523 '\\<box>' : '\U000025a1',
524 '\\<diamond>' : '\U000025c7',
525 '\\<turnstile>' : '\U000022a2',
526 '\\<Turnstile>' : '\U000022a8',
527 '\\<tturnstile>' : '\U000022a9',
528 '\\<TTurnstile>' : '\U000022ab',
529 '\\<stileturn>' : '\U000022a3',
530 '\\<surd>' : '\U0000221a',
531 '\\<le>' : '\U00002264',
532 '\\<ge>' : '\U00002265',
533 '\\<lless>' : '\U0000226a',
534 '\\<ggreater>' : '\U0000226b',
535 '\\<lesssim>' : '\U00002272',
536 '\\<greatersim>' : '\U00002273',
537 '\\<lessapprox>' : '\U00002a85',
538 '\\<greaterapprox>' : '\U00002a86',
539 '\\<in>' : '\U00002208',
540 '\\<notin>' : '\U00002209',
541 '\\<subset>' : '\U00002282',
542 '\\<supset>' : '\U00002283',
543 '\\<subseteq>' : '\U00002286',
544 '\\<supseteq>' : '\U00002287',
545 '\\<sqsubset>' : '\U0000228f',
546 '\\<sqsupset>' : '\U00002290',
547 '\\<sqsubseteq>' : '\U00002291',
548 '\\<sqsupseteq>' : '\U00002292',
549 '\\<inter>' : '\U00002229',
550 '\\<Inter>' : '\U000022c2',
551 '\\<union>' : '\U0000222a',
552 '\\<Union>' : '\U000022c3',
553 '\\<squnion>' : '\U00002294',
554 '\\<Squnion>' : '\U00002a06',
555 '\\<sqinter>' : '\U00002293',
556 '\\<Sqinter>' : '\U00002a05',
557 '\\<setminus>' : '\U00002216',
558 '\\<propto>' : '\U0000221d',
559 '\\<uplus>' : '\U0000228e',
560 '\\<Uplus>' : '\U00002a04',
561 '\\<noteq>' : '\U00002260',
562 '\\<sim>' : '\U0000223c',
563 '\\<doteq>' : '\U00002250',
564 '\\<simeq>' : '\U00002243',
565 '\\<approx>' : '\U00002248',
566 '\\<asymp>' : '\U0000224d',
567 '\\<cong>' : '\U00002245',
568 '\\<smile>' : '\U00002323',
569 '\\<equiv>' : '\U00002261',
570 '\\<frown>' : '\U00002322',
571 '\\<Join>' : '\U000022c8',
572 '\\<bowtie>' : '\U00002a1d',
573 '\\<prec>' : '\U0000227a',
574 '\\<succ>' : '\U0000227b',
575 '\\<preceq>' : '\U0000227c',
576 '\\<succeq>' : '\U0000227d',
577 '\\<parallel>' : '\U00002225',
578 '\\<bar>' : '\U000000a6',
579 '\\<plusminus>' : '\U000000b1',
580 '\\<minusplus>' : '\U00002213',
581 '\\<times>' : '\U000000d7',
582 '\\<div>' : '\U000000f7',
583 '\\<cdot>' : '\U000022c5',
584 '\\<star>' : '\U000022c6',
585 '\\<bullet>' : '\U00002219',
586 '\\<circ>' : '\U00002218',
587 '\\<dagger>' : '\U00002020',
588 '\\<ddagger>' : '\U00002021',
589 '\\<lhd>' : '\U000022b2',
590 '\\<rhd>' : '\U000022b3',
591 '\\<unlhd>' : '\U000022b4',
592 '\\<unrhd>' : '\U000022b5',
593 '\\<triangleleft>' : '\U000025c3',
594 '\\<triangleright>' : '\U000025b9',
595 '\\<triangle>' : '\U000025b3',
596 '\\<triangleq>' : '\U0000225c',
597 '\\<oplus>' : '\U00002295',
598 '\\<Oplus>' : '\U00002a01',
599 '\\<otimes>' : '\U00002297',
600 '\\<Otimes>' : '\U00002a02',
601 '\\<odot>' : '\U00002299',
602 '\\<Odot>' : '\U00002a00',
603 '\\<ominus>' : '\U00002296',
604 '\\<oslash>' : '\U00002298',
605 '\\<dots>' : '\U00002026',
606 '\\<cdots>' : '\U000022ef',
607 '\\<Sum>' : '\U00002211',
608 '\\<Prod>' : '\U0000220f',
609 '\\<Coprod>' : '\U00002210',
610 '\\<infinity>' : '\U0000221e',
611 '\\<integral>' : '\U0000222b',
612 '\\<ointegral>' : '\U0000222e',
613 '\\<clubsuit>' : '\U00002663',
614 '\\<diamondsuit>' : '\U00002662',
615 '\\<heartsuit>' : '\U00002661',
616 '\\<spadesuit>' : '\U00002660',
617 '\\<aleph>' : '\U00002135',
618 '\\<emptyset>' : '\U00002205',
619 '\\<nabla>' : '\U00002207',
620 '\\<partial>' : '\U00002202',
621 '\\<flat>' : '\U0000266d',
622 '\\<natural>' : '\U0000266e',
623 '\\<sharp>' : '\U0000266f',
624 '\\<angle>' : '\U00002220',
625 '\\<copyright>' : '\U000000a9',
626 '\\<registered>' : '\U000000ae',
627 '\\<hyphen>' : '\U000000ad',
628 '\\<inverse>' : '\U000000af',
629 '\\<onequarter>' : '\U000000bc',
630 '\\<onehalf>' : '\U000000bd',
631 '\\<threequarters>' : '\U000000be',
632 '\\<ordfeminine>' : '\U000000aa',
633 '\\<ordmasculine>' : '\U000000ba',
634 '\\<section>' : '\U000000a7',
635 '\\<paragraph>' : '\U000000b6',
636 '\\<exclamdown>' : '\U000000a1',
637 '\\<questiondown>' : '\U000000bf',
638 '\\<euro>' : '\U000020ac',
639 '\\<pounds>' : '\U000000a3',
640 '\\<yen>' : '\U000000a5',
641 '\\<cent>' : '\U000000a2',
642 '\\<currency>' : '\U000000a4',
643 '\\<degree>' : '\U000000b0',
644 '\\<amalg>' : '\U00002a3f',
645 '\\<mho>' : '\U00002127',
646 '\\<lozenge>' : '\U000025ca',
647 '\\<wp>' : '\U00002118',
648 '\\<wrong>' : '\U00002240',
649 '\\<struct>' : '\U000022c4',
650 '\\<acute>' : '\U000000b4',
651 '\\<index>' : '\U00000131',
652 '\\<dieresis>' : '\U000000a8',
653 '\\<cedilla>' : '\U000000b8',
654 '\\<hungarumlaut>' : '\U000002dd',
655 '\\<some>' : '\U000003f5',
656 '\\<newline>' : '\U000023ce',
657 '\\<open>' : '\U00002039',
658 '\\<close>' : '\U0000203a',
659 '\\<here>' : '\U00002302',
660 '\\<^sub>' : '\U000021e9',
661 '\\<^sup>' : '\U000021e7',
662 '\\<^bold>' : '\U00002759',
663 '\\<^bsub>' : '\U000021d8',
664 '\\<^esub>' : '\U000021d9',
665 '\\<^bsup>' : '\U000021d7',
666 '\\<^esup>' : '\U000021d6',
667 }
669 lang_map = {'isabelle' : isabelle_symbols, 'latex' : latex_symbols}
671 def __init__(self, **options):
672 Filter.__init__(self, **options)
673 lang = get_choice_opt(options, 'lang',
674 ['isabelle', 'latex'], 'isabelle')
675 self.symbols = self.lang_map[lang]
677 def filter(self, lexer, stream):
678 for ttype, value in stream:
679 if value in self.symbols:
680 yield ttype, self.symbols[value]
681 else:
682 yield ttype, value
685class KeywordCaseFilter(Filter):
686 """Convert keywords to lowercase or uppercase or capitalize them, which
687 means first letter uppercase, rest lowercase.
689 This can be useful e.g. if you highlight Pascal code and want to adapt the
690 code to your styleguide.
692 Options accepted:
694 `case` : string
695 The casing to convert keywords to. Must be one of ``'lower'``,
696 ``'upper'`` or ``'capitalize'``. The default is ``'lower'``.
697 """
699 def __init__(self, **options):
700 Filter.__init__(self, **options)
701 case = get_choice_opt(options, 'case',
702 ['lower', 'upper', 'capitalize'], 'lower')
703 self.convert = getattr(str, case)
705 def filter(self, lexer, stream):
706 for ttype, value in stream:
707 if ttype in Keyword:
708 yield ttype, self.convert(value)
709 else:
710 yield ttype, value
713class NameHighlightFilter(Filter):
714 """Highlight a normal Name (and Name.*) token with a different token type.
716 Example::
718 filter = NameHighlightFilter(
719 names=['foo', 'bar', 'baz'],
720 tokentype=Name.Function,
721 )
723 This would highlight the names "foo", "bar" and "baz"
724 as functions. `Name.Function` is the default token type.
726 Options accepted:
728 `names` : list of strings
729 A list of names that should be given the different token type.
730 There is no default.
731 `tokentype` : TokenType or string
732 A token type or a string containing a token type name that is
733 used for highlighting the strings in `names`. The default is
734 `Name.Function`.
735 """
737 def __init__(self, **options):
738 Filter.__init__(self, **options)
739 self.names = set(get_list_opt(options, 'names', []))
740 tokentype = options.get('tokentype')
741 if tokentype:
742 self.tokentype = string_to_tokentype(tokentype)
743 else:
744 self.tokentype = Name.Function
746 def filter(self, lexer, stream):
747 for ttype, value in stream:
748 if ttype in Name and value in self.names:
749 yield self.tokentype, value
750 else:
751 yield ttype, value
754class ErrorToken(Exception):
755 pass
758class RaiseOnErrorTokenFilter(Filter):
759 """Raise an exception when the lexer generates an error token.
761 Options accepted:
763 `excclass` : Exception class
764 The exception class to raise.
765 The default is `pygments.filters.ErrorToken`.
767 .. versionadded:: 0.8
768 """
770 def __init__(self, **options):
771 Filter.__init__(self, **options)
772 self.exception = options.get('excclass', ErrorToken)
773 try:
774 # issubclass() will raise TypeError if first argument is not a class
775 if not issubclass(self.exception, Exception):
776 raise TypeError
777 except TypeError:
778 raise OptionError('excclass option is not an exception class')
780 def filter(self, lexer, stream):
781 for ttype, value in stream:
782 if ttype is Error:
783 raise self.exception(value)
784 yield ttype, value
787class VisibleWhitespaceFilter(Filter):
788 """Convert tabs, newlines and/or spaces to visible characters.
790 Options accepted:
792 `spaces` : string or bool
793 If this is a one-character string, spaces will be replaces by this string.
794 If it is another true value, spaces will be replaced by ``·`` (unicode
795 MIDDLE DOT). If it is a false value, spaces will not be replaced. The
796 default is ``False``.
797 `tabs` : string or bool
798 The same as for `spaces`, but the default replacement character is ``»``
799 (unicode RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK). The default value
800 is ``False``. Note: this will not work if the `tabsize` option for the
801 lexer is nonzero, as tabs will already have been expanded then.
802 `tabsize` : int
803 If tabs are to be replaced by this filter (see the `tabs` option), this
804 is the total number of characters that a tab should be expanded to.
805 The default is ``8``.
806 `newlines` : string or bool
807 The same as for `spaces`, but the default replacement character is ``¶``
808 (unicode PILCROW SIGN). The default value is ``False``.
809 `wstokentype` : bool
810 If true, give whitespace the special `Whitespace` token type. This allows
811 styling the visible whitespace differently (e.g. greyed out), but it can
812 disrupt background colors. The default is ``True``.
814 .. versionadded:: 0.8
815 """
817 def __init__(self, **options):
818 Filter.__init__(self, **options)
819 for name, default in [('spaces', '·'),
820 ('tabs', '»'),
821 ('newlines', '¶')]:
822 opt = options.get(name, False)
823 if isinstance(opt, str) and len(opt) == 1:
824 setattr(self, name, opt)
825 else:
826 setattr(self, name, (opt and default or ''))
827 tabsize = get_int_opt(options, 'tabsize', 8)
828 if self.tabs:
829 self.tabs += ' ' * (tabsize - 1)
830 if self.newlines:
831 self.newlines += '\n'
832 self.wstt = get_bool_opt(options, 'wstokentype', True)
834 def filter(self, lexer, stream):
835 if self.wstt:
836 spaces = self.spaces or ' '
837 tabs = self.tabs or '\t'
838 newlines = self.newlines or '\n'
839 regex = re.compile(r'\s')
841 def replacefunc(wschar):
842 if wschar == ' ':
843 return spaces
844 elif wschar == '\t':
845 return tabs
846 elif wschar == '\n':
847 return newlines
848 return wschar
850 for ttype, value in stream:
851 yield from _replace_special(ttype, value, regex, Whitespace,
852 replacefunc)
853 else:
854 spaces, tabs, newlines = self.spaces, self.tabs, self.newlines
855 # simpler processing
856 for ttype, value in stream:
857 if spaces:
858 value = value.replace(' ', spaces)
859 if tabs:
860 value = value.replace('\t', tabs)
861 if newlines:
862 value = value.replace('\n', newlines)
863 yield ttype, value
866class GobbleFilter(Filter):
867 """Gobbles source code lines (eats initial characters).
869 This filter drops the first ``n`` characters off every line of code. This
870 may be useful when the source code fed to the lexer is indented by a fixed
871 amount of space that isn't desired in the output.
873 Options accepted:
875 `n` : int
876 The number of characters to gobble.
878 .. versionadded:: 1.2
879 """
880 def __init__(self, **options):
881 Filter.__init__(self, **options)
882 self.n = get_int_opt(options, 'n', 0)
884 def gobble(self, value, left):
885 if left < len(value):
886 return value[left:], 0
887 else:
888 return '', left - len(value)
890 def filter(self, lexer, stream):
891 n = self.n
892 left = n # How many characters left to gobble.
893 for ttype, value in stream:
894 # Remove ``left`` tokens from first line, ``n`` from all others.
895 parts = value.split('\n')
896 (parts[0], left) = self.gobble(parts[0], left)
897 for i in range(1, len(parts)):
898 (parts[i], left) = self.gobble(parts[i], n)
899 value = '\n'.join(parts)
901 if value != '':
902 yield ttype, value
905class TokenMergeFilter(Filter):
906 """Merges consecutive tokens with the same token type in the output
907 stream of a lexer.
909 .. versionadded:: 1.2
910 """
911 def __init__(self, **options):
912 Filter.__init__(self, **options)
914 def filter(self, lexer, stream):
915 current_type = None
916 current_value = None
917 for ttype, value in stream:
918 if ttype is current_type:
919 current_value += value
920 else:
921 if current_type is not None:
922 yield current_type, current_value
923 current_type = ttype
924 current_value = value
925 if current_type is not None:
926 yield current_type, current_value
929FILTERS = {
930 'codetagify': CodeTagFilter,
931 'keywordcase': KeywordCaseFilter,
932 'highlight': NameHighlightFilter,
933 'raiseonerror': RaiseOnErrorTokenFilter,
934 'whitespace': VisibleWhitespaceFilter,
935 'gobble': GobbleFilter,
936 'tokenmerge': TokenMergeFilter,
937 'symbols': SymbolFilter,
938}