Coverage for src/slide_stream/parser.py: 97%

22 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-24 15:45 +0800

1"""Markdown parsing functionality for Slide Stream.""" 

2 

3from typing import Any 

4 

5import markdown 

6from bs4 import BeautifulSoup 

7 

8 

9def parse_markdown(markdown_text: str) -> list[dict[str, Any]]: 

10 """Parse markdown text into slide data.""" 

11 html = markdown.markdown(markdown_text) 

12 soup = BeautifulSoup(html, "html.parser") 

13 slides = [] 

14 

15 for header in soup.find_all("h1"): 

16 slide_title = header.get_text() 

17 

18 # Find the next sibling that is a list (ul or ol) 

19 next_sibling = header.find_next_sibling() 

20 content_items = [] 

21 

22 while next_sibling: 

23 if next_sibling.name in ["ul", "ol"]: 

24 content_items = [ 

25 item.get_text() for item in next_sibling.find_all("li") 

26 ] 

27 break 

28 elif next_sibling.name == "p": 

29 # If it's a paragraph, add it as content 

30 content_items.append(next_sibling.get_text()) 

31 elif next_sibling.name in ["h1", "h2", "h3"]: 31 ↛ 34line 31 didn't jump to line 34 because the condition on line 31 was always true

32 # Stop if we hit another header 

33 break 

34 next_sibling = next_sibling.find_next_sibling() 

35 

36 slides.append({"title": slide_title, "content": content_items}) 

37 

38 return slides