docs for lmcat v0.1.2
View Source on GitHub

lmcat.processors


  1import json
  2from typing import Callable, Sequence
  3from pathlib import Path
  4
  5
  6# type defs
  7# ==================================================
  8
  9ProcessorName = str
 10DeciderName = str
 11
 12ProcessorFunc = Callable[[Path], str]
 13DeciderFunc = Callable[[Path], bool]
 14
 15
 16# global dicts of processors and deciders
 17# ==================================================
 18
 19PROCESSORS: dict[ProcessorName, ProcessorFunc] = dict()
 20
 21DECIDERS: dict[DeciderName, DeciderFunc] = dict()
 22
 23
 24# register functions
 25# ==================================================
 26
 27
 28def register_processor(func: ProcessorFunc) -> ProcessorFunc:
 29	"""Register a function as a path processor"""
 30	PROCESSORS[ProcessorName(func.__name__)] = func
 31	return func
 32
 33
 34def register_decider(func: DeciderFunc) -> DeciderFunc:
 35	"""Register a function as a decider"""
 36	DECIDERS[DeciderName(func.__name__)] = func
 37	return func
 38
 39
 40# default deciders
 41# ==================================================
 42@register_decider
 43def is_over_10kb(path: Path) -> bool:
 44	"""Check if file is over 10KB."""
 45	return path.stat().st_size > 2**1
 46
 47
 48@register_decider
 49def is_documentation(path: Path) -> bool:
 50	"""Check if file is documentation."""
 51	return path.suffix in {".md", ".rst", ".txt"}
 52
 53
 54# default processors
 55# ==================================================
 56
 57
 58@register_processor
 59def remove_comments(path: Path) -> str:
 60	"""Remove single-line comments from code."""
 61	lines = path.read_text().splitlines()
 62	processed = [line for line in lines if not line.strip().startswith("#")]
 63	return "\n".join(processed)
 64
 65
 66@register_processor
 67def compress_whitespace(path: Path) -> str:
 68	"""Compress multiple whitespace characters into single spaces."""
 69	return " ".join(path.read_text().split())
 70
 71
 72@register_processor
 73def to_relative_path(path: Path) -> str:
 74	"""return the path to the file as a string"""
 75	return path.as_posix()
 76
 77
 78@register_processor
 79def ipynb_to_md(path: Path) -> str:
 80	"""Convert an IPython notebook to markdown."""
 81	nb_contents: dict = json.loads(path.read_text(encoding="utf-8"))
 82
 83	output: list[str] = []
 84
 85	for cell in nb_contents["cells"]:
 86		if cell["cell_type"] == "markdown":
 87			output.extend(cell["source"])
 88			output.append("\n\n")
 89		elif cell["cell_type"] == "code":
 90			output.append("```python\n")
 91			output.extend(cell["source"])
 92			output.append("\n```\n\n")
 93
 94	return "".join(output)
 95
 96
 97@register_processor
 98def makefile_recipes(path: Path) -> str:
 99	"""Process a Makefile to show only target descriptions and basic structure.
100
101	Preserves:
102	- Comments above .PHONY targets up to first empty line
103	- The .PHONY line and target line
104	- First line after target if it starts with @echo
105
106	# Parameters:
107	 - `path : Path`
108		Path to the Makefile to process
109
110	# Returns:
111	 - `str`
112		Processed Makefile content
113	"""
114	lines: Sequence[str] = path.read_text().splitlines()
115	output_lines: list[str] = []
116
117	i: int = 0
118	while i < len(lines):
119		line: str = lines[i]
120
121		# Look for .PHONY lines
122		if line.strip().startswith(".PHONY:"):
123			# Store target name for later matching
124			target_name: str = line.split(":")[1].strip()
125
126			# Collect comments above until empty line
127			comment_lines: list[str] = []
128			look_back: int = i - 1
129			while look_back >= 0 and lines[look_back].strip():
130				if lines[look_back].strip().startswith("#"):
131					comment_lines.insert(0, lines[look_back])
132				look_back -= 1
133
134			# Add collected comments
135			output_lines.extend(comment_lines)
136
137			# Add .PHONY line
138			output_lines.append(line)
139
140			# Add target line (should be next)
141			if i + 1 < len(lines) and lines[i + 1].startswith(f"{target_name}:"):
142				output_lines.append(lines[i + 1])
143				i += 1
144
145				# Check for @echo on next line
146				if i + 1 < len(lines) and lines[i + 1].strip().startswith("@echo"):
147					output_lines.append(lines[i + 1])
148
149				output_lines.append("	...")
150				output_lines.append("")
151
152		i += 1
153
154	return "\n".join(output_lines)
155
156
157@register_processor
158def csv_preview_5_lines(path: Path) -> str:
159	"""Preview first few lines of a CSV file (up to 5)
160
161	Reads only first 1024 bytes and splits into lines.
162	Does not attempt to parse CSV structure.
163
164	# Parameters:
165	- `path : Path`
166	    Path to CSV file
167
168	# Returns:
169	- `str`
170	    First few lines of the file"""
171	try:
172		with path.open("r", encoding="utf-8") as f:
173			content = f.read(1024)
174
175		lines = content.splitlines()[:5]
176		if len(content) == 1024:
177			lines.append("... (truncated)")
178
179		return "\n".join(lines)
180	except Exception as e:
181		return f"Error previewing CSV: {str(e)}"

ProcessorName = <class 'str'>
DeciderName = <class 'str'>
ProcessorFunc = typing.Callable[[pathlib.Path], str]
DeciderFunc = typing.Callable[[pathlib.Path], bool]
PROCESSORS: dict[str, typing.Callable[[pathlib.Path], str]] = {'remove_comments': <function remove_comments>, 'compress_whitespace': <function compress_whitespace>, 'to_relative_path': <function to_relative_path>, 'ipynb_to_md': <function ipynb_to_md>, 'makefile_recipes': <function makefile_recipes>, 'csv_preview_5_lines': <function csv_preview_5_lines>}
DECIDERS: dict[str, typing.Callable[[pathlib.Path], bool]] = {'is_over_10kb': <function is_over_10kb>, 'is_documentation': <function is_documentation>}
def register_processor(func: Callable[[pathlib.Path], str]) -> Callable[[pathlib.Path], str]:
29def register_processor(func: ProcessorFunc) -> ProcessorFunc:
30	"""Register a function as a path processor"""
31	PROCESSORS[ProcessorName(func.__name__)] = func
32	return func

Register a function as a path processor

def register_decider(func: Callable[[pathlib.Path], bool]) -> Callable[[pathlib.Path], bool]:
35def register_decider(func: DeciderFunc) -> DeciderFunc:
36	"""Register a function as a decider"""
37	DECIDERS[DeciderName(func.__name__)] = func
38	return func

Register a function as a decider

@register_decider
def is_over_10kb(path: pathlib.Path) -> bool:
43@register_decider
44def is_over_10kb(path: Path) -> bool:
45	"""Check if file is over 10KB."""
46	return path.stat().st_size > 2**1

Check if file is over 10KB.

@register_decider
def is_documentation(path: pathlib.Path) -> bool:
49@register_decider
50def is_documentation(path: Path) -> bool:
51	"""Check if file is documentation."""
52	return path.suffix in {".md", ".rst", ".txt"}

Check if file is documentation.

@register_processor
def remove_comments(path: pathlib.Path) -> str:
59@register_processor
60def remove_comments(path: Path) -> str:
61	"""Remove single-line comments from code."""
62	lines = path.read_text().splitlines()
63	processed = [line for line in lines if not line.strip().startswith("#")]
64	return "\n".join(processed)

Remove single-line comments from code.

@register_processor
def compress_whitespace(path: pathlib.Path) -> str:
67@register_processor
68def compress_whitespace(path: Path) -> str:
69	"""Compress multiple whitespace characters into single spaces."""
70	return " ".join(path.read_text().split())

Compress multiple whitespace characters into single spaces.

@register_processor
def to_relative_path(path: pathlib.Path) -> str:
73@register_processor
74def to_relative_path(path: Path) -> str:
75	"""return the path to the file as a string"""
76	return path.as_posix()

return the path to the file as a string

@register_processor
def ipynb_to_md(path: pathlib.Path) -> str:
79@register_processor
80def ipynb_to_md(path: Path) -> str:
81	"""Convert an IPython notebook to markdown."""
82	nb_contents: dict = json.loads(path.read_text(encoding="utf-8"))
83
84	output: list[str] = []
85
86	for cell in nb_contents["cells"]:
87		if cell["cell_type"] == "markdown":
88			output.extend(cell["source"])
89			output.append("\n\n")
90		elif cell["cell_type"] == "code":
91			output.append("```python\n")
92			output.extend(cell["source"])
93			output.append("\n```\n\n")
94
95	return "".join(output)

Convert an IPython notebook to markdown.

@register_processor
def makefile_recipes(path: pathlib.Path) -> str:
 98@register_processor
 99def makefile_recipes(path: Path) -> str:
100	"""Process a Makefile to show only target descriptions and basic structure.
101
102	Preserves:
103	- Comments above .PHONY targets up to first empty line
104	- The .PHONY line and target line
105	- First line after target if it starts with @echo
106
107	# Parameters:
108	 - `path : Path`
109		Path to the Makefile to process
110
111	# Returns:
112	 - `str`
113		Processed Makefile content
114	"""
115	lines: Sequence[str] = path.read_text().splitlines()
116	output_lines: list[str] = []
117
118	i: int = 0
119	while i < len(lines):
120		line: str = lines[i]
121
122		# Look for .PHONY lines
123		if line.strip().startswith(".PHONY:"):
124			# Store target name for later matching
125			target_name: str = line.split(":")[1].strip()
126
127			# Collect comments above until empty line
128			comment_lines: list[str] = []
129			look_back: int = i - 1
130			while look_back >= 0 and lines[look_back].strip():
131				if lines[look_back].strip().startswith("#"):
132					comment_lines.insert(0, lines[look_back])
133				look_back -= 1
134
135			# Add collected comments
136			output_lines.extend(comment_lines)
137
138			# Add .PHONY line
139			output_lines.append(line)
140
141			# Add target line (should be next)
142			if i + 1 < len(lines) and lines[i + 1].startswith(f"{target_name}:"):
143				output_lines.append(lines[i + 1])
144				i += 1
145
146				# Check for @echo on next line
147				if i + 1 < len(lines) and lines[i + 1].strip().startswith("@echo"):
148					output_lines.append(lines[i + 1])
149
150				output_lines.append("	...")
151				output_lines.append("")
152
153		i += 1
154
155	return "\n".join(output_lines)

Process a Makefile to show only target descriptions and basic structure.

Preserves:

  • Comments above .PHONY targets up to first empty line
  • The .PHONY line and target line
  • First line after target if it starts with @echo

Parameters:

  • path : Path Path to the Makefile to process

Returns:

  • str Processed Makefile content
@register_processor
def csv_preview_5_lines(path: pathlib.Path) -> str:
158@register_processor
159def csv_preview_5_lines(path: Path) -> str:
160	"""Preview first few lines of a CSV file (up to 5)
161
162	Reads only first 1024 bytes and splits into lines.
163	Does not attempt to parse CSV structure.
164
165	# Parameters:
166	- `path : Path`
167	    Path to CSV file
168
169	# Returns:
170	- `str`
171	    First few lines of the file"""
172	try:
173		with path.open("r", encoding="utf-8") as f:
174			content = f.read(1024)
175
176		lines = content.splitlines()[:5]
177		if len(content) == 1024:
178			lines.append("... (truncated)")
179
180		return "\n".join(lines)
181	except Exception as e:
182		return f"Error previewing CSV: {str(e)}"

Preview first few lines of a CSV file (up to 5)

Reads only first 1024 bytes and splits into lines. Does not attempt to parse CSV structure.

Parameters:

  • path : Path Path to CSV file

Returns:

  • str First few lines of the file