""" PPTX to PDF converter using LibreOffice for high-quality conversion This module provides the essential function to convert PowerPoint presentations to PDF using LibreOffice headless mode for professional-grade quality. The converted PDF is then processed by the main upload workflow for 4K image generation. """ import os import subprocess import logging import signal import time # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def cleanup_libreoffice_processes(): """Clean up any hanging LibreOffice processes""" try: subprocess.run(['pkill', '-f', 'soffice'], capture_output=True, timeout=10) time.sleep(1) # Give processes time to terminate except Exception as e: logger.warning(f"Failed to cleanup LibreOffice processes: {e}") def pptx_to_pdf_libreoffice(pptx_path, output_dir): """ Convert PPTX to PDF using LibreOffice for highest quality. This function is the core component of the PPTX processing workflow: PPTX → PDF (this function) → 4K JPG images (handled in uploads.py) Args: pptx_path (str): Path to the PPTX file output_dir (str): Directory to save the PDF Returns: str: Path to the generated PDF file, or None if conversion failed """ try: # Clean up any existing LibreOffice processes cleanup_libreoffice_processes() # Ensure output directory exists os.makedirs(output_dir, exist_ok=True) # Use LibreOffice to convert PPTX to PDF cmd = [ 'libreoffice', '--headless', '--convert-to', 'pdf', '--outdir', output_dir, '--invisible', # Run without any UI '--nodefault', # Don't start with default template pptx_path ] logger.info(f"Converting PPTX to PDF using LibreOffice: {pptx_path}") # Increase timeout to 300 seconds (5 minutes) for large presentations result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) if result.returncode != 0: logger.error(f"LibreOffice conversion failed: {result.stderr}") logger.error(f"LibreOffice stdout: {result.stdout}") cleanup_libreoffice_processes() # Clean up on failure return None # Find the generated PDF file base_name = os.path.splitext(os.path.basename(pptx_path))[0] pdf_path = os.path.join(output_dir, f"{base_name}.pdf") if os.path.exists(pdf_path): logger.info(f"PDF conversion successful: {pdf_path}") cleanup_libreoffice_processes() # Clean up after success return pdf_path else: logger.error(f"PDF file not found after conversion: {pdf_path}") cleanup_libreoffice_processes() # Clean up on failure return None except subprocess.TimeoutExpired: logger.error("LibreOffice conversion timed out (300s)") cleanup_libreoffice_processes() # Clean up on timeout return None except Exception as e: logger.error(f"Error in PPTX to PDF conversion: {e}") import traceback logger.error(f"Traceback: {traceback.format_exc()}") cleanup_libreoffice_processes() # Clean up on error return None if __name__ == "__main__": # Test the converter import sys if len(sys.argv) > 1: test_pptx = sys.argv[1] if os.path.exists(test_pptx): output_dir = "test_output" pdf_result = pptx_to_pdf_libreoffice(test_pptx, output_dir) if pdf_result: print(f"Successfully converted PPTX to PDF: {pdf_result}") else: print("PPTX to PDF conversion failed") else: print(f"File not found: {test_pptx}") else: print("Usage: python pptx_converter.py ")