Coverage for src/slide_stream/parser.py: 97%
22 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-24 15:45 +0800
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-24 15:45 +0800
1"""Markdown parsing functionality for Slide Stream."""
3from typing import Any
5import markdown
6from bs4 import BeautifulSoup
9def parse_markdown(markdown_text: str) -> list[dict[str, Any]]:
10 """Parse markdown text into slide data."""
11 html = markdown.markdown(markdown_text)
12 soup = BeautifulSoup(html, "html.parser")
13 slides = []
15 for header in soup.find_all("h1"):
16 slide_title = header.get_text()
18 # Find the next sibling that is a list (ul or ol)
19 next_sibling = header.find_next_sibling()
20 content_items = []
22 while next_sibling:
23 if next_sibling.name in ["ul", "ol"]:
24 content_items = [
25 item.get_text() for item in next_sibling.find_all("li")
26 ]
27 break
28 elif next_sibling.name == "p":
29 # If it's a paragraph, add it as content
30 content_items.append(next_sibling.get_text())
31 elif next_sibling.name in ["h1", "h2", "h3"]: 31 ↛ 34line 31 didn't jump to line 34 because the condition on line 31 was always true
32 # Stop if we hit another header
33 break
34 next_sibling = next_sibling.find_next_sibling()
36 slides.append({"title": slide_title, "content": content_items})
38 return slides