112 lines
3.9 KiB
Python
112 lines
3.9 KiB
Python
"""
|
|
PPTX to PDF converter using LibreOffice for high-quality conversion
|
|
This module provides the essential function to convert PowerPoint presentations to PDF
|
|
using LibreOffice headless mode for professional-grade quality.
|
|
|
|
The converted PDF is then processed by the main upload workflow for 4K image generation.
|
|
"""
|
|
|
|
import os
|
|
import subprocess
|
|
import logging
|
|
import signal
|
|
import time
|
|
|
|
# Set up logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def cleanup_libreoffice_processes():
|
|
"""Clean up any hanging LibreOffice processes"""
|
|
try:
|
|
subprocess.run(['pkill', '-f', 'soffice'], capture_output=True, timeout=10)
|
|
time.sleep(1) # Give processes time to terminate
|
|
except Exception as e:
|
|
logger.warning(f"Failed to cleanup LibreOffice processes: {e}")
|
|
|
|
|
|
def pptx_to_pdf_libreoffice(pptx_path, output_dir):
|
|
"""
|
|
Convert PPTX to PDF using LibreOffice for highest quality.
|
|
|
|
This function is the core component of the PPTX processing workflow:
|
|
PPTX → PDF (this function) → 4K JPG images (handled in uploads.py)
|
|
|
|
Args:
|
|
pptx_path (str): Path to the PPTX file
|
|
output_dir (str): Directory to save the PDF
|
|
|
|
Returns:
|
|
str: Path to the generated PDF file, or None if conversion failed
|
|
"""
|
|
try:
|
|
# Clean up any existing LibreOffice processes
|
|
cleanup_libreoffice_processes()
|
|
|
|
# Ensure output directory exists
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
# Use LibreOffice to convert PPTX to PDF
|
|
cmd = [
|
|
'libreoffice',
|
|
'--headless',
|
|
'--convert-to', 'pdf',
|
|
'--outdir', output_dir,
|
|
'--invisible', # Run without any UI
|
|
'--nodefault', # Don't start with default template
|
|
pptx_path
|
|
]
|
|
|
|
logger.info(f"Converting PPTX to PDF using LibreOffice: {pptx_path}")
|
|
# Increase timeout to 300 seconds (5 minutes) for large presentations
|
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
|
|
|
|
if result.returncode != 0:
|
|
logger.error(f"LibreOffice conversion failed: {result.stderr}")
|
|
logger.error(f"LibreOffice stdout: {result.stdout}")
|
|
cleanup_libreoffice_processes() # Clean up on failure
|
|
return None
|
|
|
|
# Find the generated PDF file
|
|
base_name = os.path.splitext(os.path.basename(pptx_path))[0]
|
|
pdf_path = os.path.join(output_dir, f"{base_name}.pdf")
|
|
|
|
if os.path.exists(pdf_path):
|
|
logger.info(f"PDF conversion successful: {pdf_path}")
|
|
cleanup_libreoffice_processes() # Clean up after success
|
|
return pdf_path
|
|
else:
|
|
logger.error(f"PDF file not found after conversion: {pdf_path}")
|
|
cleanup_libreoffice_processes() # Clean up on failure
|
|
return None
|
|
|
|
except subprocess.TimeoutExpired:
|
|
logger.error("LibreOffice conversion timed out (300s)")
|
|
cleanup_libreoffice_processes() # Clean up on timeout
|
|
return None
|
|
except Exception as e:
|
|
logger.error(f"Error in PPTX to PDF conversion: {e}")
|
|
import traceback
|
|
logger.error(f"Traceback: {traceback.format_exc()}")
|
|
cleanup_libreoffice_processes() # Clean up on error
|
|
return None
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Test the converter
|
|
import sys
|
|
if len(sys.argv) > 1:
|
|
test_pptx = sys.argv[1]
|
|
if os.path.exists(test_pptx):
|
|
output_dir = "test_output"
|
|
pdf_result = pptx_to_pdf_libreoffice(test_pptx, output_dir)
|
|
if pdf_result:
|
|
print(f"Successfully converted PPTX to PDF: {pdf_result}")
|
|
else:
|
|
print("PPTX to PDF conversion failed")
|
|
else:
|
|
print(f"File not found: {test_pptx}")
|
|
else:
|
|
print("Usage: python pptx_converter.py <pptx_file>")
|