"""File conversion utilities. Converts document files (PDF, PPT, Excel, Word) to Markdown using markitdown. No FastAPI or HTTP dependencies — pure utility functions. """ import logging from pathlib import Path logger = logging.getLogger(__name__) # File extensions that should be converted to markdown CONVERTIBLE_EXTENSIONS = { ".pdf", ".ppt", ".pptx", ".xls", ".xlsx", ".doc", ".docx", } async def convert_file_to_markdown(file_path: Path) -> Path | None: """Convert a file to markdown using markitdown. Args: file_path: Path to the file to convert. Returns: Path to the markdown file if conversion was successful, None otherwise. """ try: from markitdown import MarkItDown md = MarkItDown() result = md.convert(str(file_path)) # Save as .md file with same name md_path = file_path.with_suffix(".md") md_path.write_text(result.text_content, encoding="utf-8") logger.info(f"Converted {file_path.name} to markdown: {md_path.name}") return md_path except Exception as e: logger.error(f"Failed to convert {file_path.name} to markdown: {e}") return None