Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3 pygments.lexers 

4 ~~~~~~~~~~~~~~~ 

5 

6 Pygments lexers. 

7 

8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. 

9 :license: BSD, see LICENSE for details. 

10""" 

11 

12import re 

13import sys 

14import types 

15import fnmatch 

16from os.path import basename 

17 

18from pygments.lexers._mapping import LEXERS 

19from pygments.modeline import get_filetype_from_buffer 

20from pygments.plugin import find_plugin_lexers 

21from pygments.util import ClassNotFound, guess_decode 

22 

23COMPAT = { 

24 'Python3Lexer': 'PythonLexer', 

25 'Python3TracebackLexer': 'PythonTracebackLexer', 

26} 

27 

28__all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class', 

29 'guess_lexer', 'load_lexer_from_file'] + list(LEXERS) + list(COMPAT) 

30 

31_lexer_cache = {} 

32_pattern_cache = {} 

33 

34 

35def _fn_matches(fn, glob): 

36 """Return whether the supplied file name fn matches pattern filename.""" 

37 if glob not in _pattern_cache: 

38 pattern = _pattern_cache[glob] = re.compile(fnmatch.translate(glob)) 

39 return pattern.match(fn) 

40 return _pattern_cache[glob].match(fn) 

41 

42 

43def _load_lexers(module_name): 

44 """Load a lexer (and all others in the module too).""" 

45 mod = __import__(module_name, None, None, ['__all__']) 

46 for lexer_name in mod.__all__: 

47 cls = getattr(mod, lexer_name) 

48 _lexer_cache[cls.name] = cls 

49 

50 

51def get_all_lexers(): 

52 """Return a generator of tuples in the form ``(name, aliases, 

53 filenames, mimetypes)`` of all know lexers. 

54 """ 

55 for item in LEXERS.values(): 

56 yield item[1:] 

57 for lexer in find_plugin_lexers(): 

58 yield lexer.name, lexer.aliases, lexer.filenames, lexer.mimetypes 

59 

60 

61def find_lexer_class(name): 

62 """Lookup a lexer class by name. 

63 

64 Return None if not found. 

65 """ 

66 if name in _lexer_cache: 

67 return _lexer_cache[name] 

68 # lookup builtin lexers 

69 for module_name, lname, aliases, _, _ in LEXERS.values(): 

70 if name == lname: 

71 _load_lexers(module_name) 

72 return _lexer_cache[name] 

73 # continue with lexers from setuptools entrypoints 

74 for cls in find_plugin_lexers(): 

75 if cls.name == name: 

76 return cls 

77 

78 

79def find_lexer_class_by_name(_alias): 

80 """Lookup a lexer class by alias. 

81 

82 Like `get_lexer_by_name`, but does not instantiate the class. 

83 

84 .. versionadded:: 2.2 

85 """ 

86 if not _alias: 

87 raise ClassNotFound('no lexer for alias %r found' % _alias) 

88 # lookup builtin lexers 

89 for module_name, name, aliases, _, _ in LEXERS.values(): 

90 if _alias.lower() in aliases: 

91 if name not in _lexer_cache: 

92 _load_lexers(module_name) 

93 return _lexer_cache[name] 

94 # continue with lexers from setuptools entrypoints 

95 for cls in find_plugin_lexers(): 

96 if _alias.lower() in cls.aliases: 

97 return cls 

98 raise ClassNotFound('no lexer for alias %r found' % _alias) 

99 

100 

101def get_lexer_by_name(_alias, **options): 

102 """Get a lexer by an alias. 

103 

104 Raises ClassNotFound if not found. 

105 """ 

106 if not _alias: 

107 raise ClassNotFound('no lexer for alias %r found' % _alias) 

108 

109 # lookup builtin lexers 

110 for module_name, name, aliases, _, _ in LEXERS.values(): 

111 if _alias.lower() in aliases: 

112 if name not in _lexer_cache: 

113 _load_lexers(module_name) 

114 return _lexer_cache[name](**options) 

115 # continue with lexers from setuptools entrypoints 

116 for cls in find_plugin_lexers(): 

117 if _alias.lower() in cls.aliases: 

118 return cls(**options) 

119 raise ClassNotFound('no lexer for alias %r found' % _alias) 

120 

121 

122def load_lexer_from_file(filename, lexername="CustomLexer", **options): 

123 """Load a lexer from a file. 

124 

125 This method expects a file located relative to the current working 

126 directory, which contains a Lexer class. By default, it expects the 

127 Lexer to be name CustomLexer; you can specify your own class name 

128 as the second argument to this function. 

129 

130 Users should be very careful with the input, because this method 

131 is equivalent to running eval on the input file. 

132 

133 Raises ClassNotFound if there are any problems importing the Lexer. 

134 

135 .. versionadded:: 2.2 

136 """ 

137 try: 

138 # This empty dict will contain the namespace for the exec'd file 

139 custom_namespace = {} 

140 with open(filename, 'rb') as f: 

141 exec(f.read(), custom_namespace) 

142 # Retrieve the class `lexername` from that namespace 

143 if lexername not in custom_namespace: 

144 raise ClassNotFound('no valid %s class found in %s' % 

145 (lexername, filename)) 

146 lexer_class = custom_namespace[lexername] 

147 # And finally instantiate it with the options 

148 return lexer_class(**options) 

149 except IOError as err: 

150 raise ClassNotFound('cannot read %s: %s' % (filename, err)) 

151 except ClassNotFound: 

152 raise 

153 except Exception as err: 

154 raise ClassNotFound('error when loading custom lexer: %s' % err) 

155 

156 

157def find_lexer_class_for_filename(_fn, code=None): 

158 """Get a lexer for a filename. 

159 

160 If multiple lexers match the filename pattern, use ``analyse_text()`` to 

161 figure out which one is more appropriate. 

162 

163 Returns None if not found. 

164 """ 

165 matches = [] 

166 fn = basename(_fn) 

167 for modname, name, _, filenames, _ in LEXERS.values(): 

168 for filename in filenames: 

169 if _fn_matches(fn, filename): 

170 if name not in _lexer_cache: 

171 _load_lexers(modname) 

172 matches.append((_lexer_cache[name], filename)) 

173 for cls in find_plugin_lexers(): 

174 for filename in cls.filenames: 

175 if _fn_matches(fn, filename): 

176 matches.append((cls, filename)) 

177 

178 if isinstance(code, bytes): 

179 # decode it, since all analyse_text functions expect unicode 

180 code = guess_decode(code) 

181 

182 def get_rating(info): 

183 cls, filename = info 

184 # explicit patterns get a bonus 

185 bonus = '*' not in filename and 0.5 or 0 

186 # The class _always_ defines analyse_text because it's included in 

187 # the Lexer class. The default implementation returns None which 

188 # gets turned into 0.0. Run scripts/detect_missing_analyse_text.py 

189 # to find lexers which need it overridden. 

190 if code: 

191 return cls.analyse_text(code) + bonus, cls.__name__ 

192 return cls.priority + bonus, cls.__name__ 

193 

194 if matches: 

195 matches.sort(key=get_rating) 

196 # print "Possible lexers, after sort:", matches 

197 return matches[-1][0] 

198 

199 

200def get_lexer_for_filename(_fn, code=None, **options): 

201 """Get a lexer for a filename. 

202 

203 If multiple lexers match the filename pattern, use ``analyse_text()`` to 

204 figure out which one is more appropriate. 

205 

206 Raises ClassNotFound if not found. 

207 """ 

208 res = find_lexer_class_for_filename(_fn, code) 

209 if not res: 

210 raise ClassNotFound('no lexer for filename %r found' % _fn) 

211 return res(**options) 

212 

213 

214def get_lexer_for_mimetype(_mime, **options): 

215 """Get a lexer for a mimetype. 

216 

217 Raises ClassNotFound if not found. 

218 """ 

219 for modname, name, _, _, mimetypes in LEXERS.values(): 

220 if _mime in mimetypes: 

221 if name not in _lexer_cache: 

222 _load_lexers(modname) 

223 return _lexer_cache[name](**options) 

224 for cls in find_plugin_lexers(): 

225 if _mime in cls.mimetypes: 

226 return cls(**options) 

227 raise ClassNotFound('no lexer for mimetype %r found' % _mime) 

228 

229 

230def _iter_lexerclasses(plugins=True): 

231 """Return an iterator over all lexer classes.""" 

232 for key in sorted(LEXERS): 

233 module_name, name = LEXERS[key][:2] 

234 if name not in _lexer_cache: 

235 _load_lexers(module_name) 

236 yield _lexer_cache[name] 

237 if plugins: 

238 yield from find_plugin_lexers() 

239 

240 

241def guess_lexer_for_filename(_fn, _text, **options): 

242 """ 

243 Lookup all lexers that handle those filenames primary (``filenames``) 

244 or secondary (``alias_filenames``). Then run a text analysis for those 

245 lexers and choose the best result. 

246 

247 usage:: 

248 

249 >>> from pygments.lexers import guess_lexer_for_filename 

250 >>> guess_lexer_for_filename('hello.html', '<%= @foo %>') 

251 <pygments.lexers.templates.RhtmlLexer object at 0xb7d2f32c> 

252 >>> guess_lexer_for_filename('hello.html', '<h1>{{ title|e }}</h1>') 

253 <pygments.lexers.templates.HtmlDjangoLexer object at 0xb7d2f2ac> 

254 >>> guess_lexer_for_filename('style.css', 'a { color: <?= $link ?> }') 

255 <pygments.lexers.templates.CssPhpLexer object at 0xb7ba518c> 

256 """ 

257 fn = basename(_fn) 

258 primary = {} 

259 matching_lexers = set() 

260 for lexer in _iter_lexerclasses(): 

261 for filename in lexer.filenames: 

262 if _fn_matches(fn, filename): 

263 matching_lexers.add(lexer) 

264 primary[lexer] = True 

265 for filename in lexer.alias_filenames: 

266 if _fn_matches(fn, filename): 

267 matching_lexers.add(lexer) 

268 primary[lexer] = False 

269 if not matching_lexers: 

270 raise ClassNotFound('no lexer for filename %r found' % fn) 

271 if len(matching_lexers) == 1: 

272 return matching_lexers.pop()(**options) 

273 result = [] 

274 for lexer in matching_lexers: 

275 rv = lexer.analyse_text(_text) 

276 if rv == 1.0: 

277 return lexer(**options) 

278 result.append((rv, lexer)) 

279 

280 def type_sort(t): 

281 # sort by: 

282 # - analyse score 

283 # - is primary filename pattern? 

284 # - priority 

285 # - last resort: class name 

286 return (t[0], primary[t[1]], t[1].priority, t[1].__name__) 

287 result.sort(key=type_sort) 

288 

289 return result[-1][1](**options) 

290 

291 

292def guess_lexer(_text, **options): 

293 """Guess a lexer by strong distinctions in the text (eg, shebang).""" 

294 

295 if not isinstance(_text, str): 

296 inencoding = options.get('inencoding', options.get('encoding')) 

297 if inencoding: 

298 _text = _text.decode(inencoding or 'utf8') 

299 else: 

300 _text, _ = guess_decode(_text) 

301 

302 # try to get a vim modeline first 

303 ft = get_filetype_from_buffer(_text) 

304 

305 if ft is not None: 

306 try: 

307 return get_lexer_by_name(ft, **options) 

308 except ClassNotFound: 

309 pass 

310 

311 best_lexer = [0.0, None] 

312 for lexer in _iter_lexerclasses(): 

313 rv = lexer.analyse_text(_text) 

314 if rv == 1.0: 

315 return lexer(**options) 

316 if rv > best_lexer[0]: 

317 best_lexer[:] = (rv, lexer) 

318 if not best_lexer[0] or best_lexer[1] is None: 

319 raise ClassNotFound('no lexer matching the text found') 

320 return best_lexer[1](**options) 

321 

322 

323class _automodule(types.ModuleType): 

324 """Automatically import lexers.""" 

325 

326 def __getattr__(self, name): 

327 info = LEXERS.get(name) 

328 if info: 

329 _load_lexers(info[0]) 

330 cls = _lexer_cache[info[1]] 

331 setattr(self, name, cls) 

332 return cls 

333 if name in COMPAT: 

334 return getattr(self, COMPAT[name]) 

335 raise AttributeError(name) 

336 

337 

338oldmod = sys.modules[__name__] 

339newmod = _automodule(__name__) 

340newmod.__dict__.update(oldmod.__dict__) 

341sys.modules[__name__] = newmod 

342del newmod.newmod, newmod.oldmod, newmod.sys, newmod.types