Coverage for lmcat\processors.py: 38%

82 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-29 16:57 -0700

1import json 

2from typing import Callable, Sequence 

3from pathlib import Path 

4 

5 

6# type defs 

7# ================================================== 

8 

9ProcessorName = str 

10DeciderName = str 

11 

12ProcessorFunc = Callable[[Path], str] 

13DeciderFunc = Callable[[Path], bool] 

14 

15 

16# global dicts of processors and deciders 

17# ================================================== 

18 

19PROCESSORS: dict[ProcessorName, ProcessorFunc] = dict() 

20 

21DECIDERS: dict[DeciderName, DeciderFunc] = dict() 

22 

23 

24# register functions 

25# ================================================== 

26 

27 

28def register_processor(func: ProcessorFunc) -> ProcessorFunc: 

29 """Register a function as a path processor""" 

30 PROCESSORS[ProcessorName(func.__name__)] = func 

31 return func 

32 

33 

34def register_decider(func: DeciderFunc) -> DeciderFunc: 

35 """Register a function as a decider""" 

36 DECIDERS[DeciderName(func.__name__)] = func 

37 return func 

38 

39 

40# default deciders 

41# ================================================== 

42@register_decider 

43def is_over_10kb(path: Path) -> bool: 

44 """Check if file is over 10KB.""" 

45 return path.stat().st_size > 2**1 

46 

47 

48@register_decider 

49def is_documentation(path: Path) -> bool: 

50 """Check if file is documentation.""" 

51 return path.suffix in {".md", ".rst", ".txt"} 

52 

53 

54# default processors 

55# ================================================== 

56 

57 

58@register_processor 

59def remove_comments(path: Path) -> str: 

60 """Remove single-line comments from code.""" 

61 lines = path.read_text().splitlines() 

62 processed = [line for line in lines if not line.strip().startswith("#")] 

63 return "\n".join(processed) 

64 

65 

66@register_processor 

67def compress_whitespace(path: Path) -> str: 

68 """Compress multiple whitespace characters into single spaces.""" 

69 return " ".join(path.read_text().split()) 

70 

71 

72@register_processor 

73def to_relative_path(path: Path) -> str: 

74 """return the path to the file as a string""" 

75 return path.as_posix() 

76 

77 

78@register_processor 

79def ipynb_to_md(path: Path) -> str: 

80 """Convert an IPython notebook to markdown.""" 

81 nb_contents: dict = json.loads(path.read_text(encoding="utf-8")) 

82 

83 output: list[str] = [] 

84 

85 for cell in nb_contents["cells"]: 

86 if cell["cell_type"] == "markdown": 

87 output.extend(cell["source"]) 

88 output.append("\n\n") 

89 elif cell["cell_type"] == "code": 

90 output.append("```python\n") 

91 output.extend(cell["source"]) 

92 output.append("\n```\n\n") 

93 

94 return "".join(output) 

95 

96 

97@register_processor 

98def makefile_recipes(path: Path) -> str: 

99 """Process a Makefile to show only target descriptions and basic structure. 

100 

101 Preserves: 

102 - Comments above .PHONY targets up to first empty line 

103 - The .PHONY line and target line 

104 - First line after target if it starts with @echo 

105 

106 # Parameters: 

107 - `path : Path` 

108 Path to the Makefile to process 

109 

110 # Returns: 

111 - `str` 

112 Processed Makefile content 

113 """ 

114 lines: Sequence[str] = path.read_text().splitlines() 

115 output_lines: list[str] = [] 

116 

117 i: int = 0 

118 while i < len(lines): 

119 line: str = lines[i] 

120 

121 # Look for .PHONY lines 

122 if line.strip().startswith(".PHONY:"): 

123 # Store target name for later matching 

124 target_name: str = line.split(":")[1].strip() 

125 

126 # Collect comments above until empty line 

127 comment_lines: list[str] = [] 

128 look_back: int = i - 1 

129 while look_back >= 0 and lines[look_back].strip(): 

130 if lines[look_back].strip().startswith("#"): 

131 comment_lines.insert(0, lines[look_back]) 

132 look_back -= 1 

133 

134 # Add collected comments 

135 output_lines.extend(comment_lines) 

136 

137 # Add .PHONY line 

138 output_lines.append(line) 

139 

140 # Add target line (should be next) 

141 if i + 1 < len(lines) and lines[i + 1].startswith(f"{target_name}:"): 

142 output_lines.append(lines[i + 1]) 

143 i += 1 

144 

145 # Check for @echo on next line 

146 if i + 1 < len(lines) and lines[i + 1].strip().startswith("@echo"): 

147 output_lines.append(lines[i + 1]) 

148 

149 output_lines.append(" ...") 

150 output_lines.append("") 

151 

152 i += 1 

153 

154 return "\n".join(output_lines) 

155 

156 

157@register_processor 

158def csv_preview_5_lines(path: Path) -> str: 

159 """Preview first few lines of a CSV file (up to 5) 

160 

161 Reads only first 1024 bytes and splits into lines. 

162 Does not attempt to parse CSV structure. 

163 

164 # Parameters: 

165 - `path : Path` 

166 Path to CSV file 

167 

168 # Returns: 

169 - `str` 

170 First few lines of the file""" 

171 try: 

172 with path.open("r", encoding="utf-8") as f: 

173 content = f.read(1024) 

174 

175 lines = content.splitlines()[:5] 

176 if len(content) == 1024: 

177 lines.append("... (truncated)") 

178 

179 return "\n".join(lines) 

180 except Exception as e: 

181 return f"Error previewing CSV: {str(e)}"