- Changed ownership of all files to scheianu:scheianu - Set directories to 755 permissions (rwxr-xr-x) - Set files to 644 permissions (rw-r--r--) - Made shell scripts executable (755) - Allows development without requiring sudo for file modifications - Improves development workflow and security
107 lines
3.3 KiB
Python
107 lines
3.3 KiB
Python
"""PowerPoint to PDF converter using LibreOffice."""
|
|
import os
|
|
import subprocess
|
|
import time
|
|
import logging
|
|
from typing import Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def cleanup_libreoffice_processes() -> None:
|
|
"""Clean up any hanging LibreOffice processes."""
|
|
try:
|
|
subprocess.run(['pkill', '-f', 'soffice'], capture_output=True, timeout=10)
|
|
time.sleep(1) # Give processes time to terminate
|
|
except Exception as e:
|
|
logger.warning(f"Failed to cleanup LibreOffice processes: {e}")
|
|
|
|
|
|
def pptx_to_pdf_libreoffice(pptx_path: str, output_dir: str) -> Optional[str]:
|
|
"""Convert PPTX to PDF using LibreOffice for highest quality.
|
|
|
|
This function is the core component of the PPTX processing workflow:
|
|
PPTX → PDF (this function) → JPG images (handled in uploads.py)
|
|
|
|
Args:
|
|
pptx_path: Path to the PPTX file
|
|
output_dir: Directory to save the PDF
|
|
|
|
Returns:
|
|
Path to the generated PDF file, or None if conversion failed
|
|
"""
|
|
try:
|
|
# Clean up any existing LibreOffice processes
|
|
cleanup_libreoffice_processes()
|
|
|
|
# Ensure output directory exists
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
# Use LibreOffice to convert PPTX to PDF
|
|
cmd = [
|
|
'libreoffice',
|
|
'--headless',
|
|
'--convert-to', 'pdf',
|
|
'--outdir', output_dir,
|
|
'--invisible',
|
|
'--nodefault',
|
|
pptx_path
|
|
]
|
|
|
|
logger.info(f"Converting PPTX to PDF using LibreOffice: {pptx_path}")
|
|
|
|
# Increase timeout to 300 seconds (5 minutes) for large presentations
|
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
|
|
|
|
if result.returncode != 0:
|
|
logger.error(f"LibreOffice conversion failed: {result.stderr}")
|
|
logger.error(f"LibreOffice stdout: {result.stdout}")
|
|
cleanup_libreoffice_processes()
|
|
return None
|
|
|
|
# Find the generated PDF file
|
|
base_name = os.path.splitext(os.path.basename(pptx_path))[0]
|
|
pdf_path = os.path.join(output_dir, f"{base_name}.pdf")
|
|
|
|
if os.path.exists(pdf_path):
|
|
logger.info(f"PDF conversion successful: {pdf_path}")
|
|
cleanup_libreoffice_processes()
|
|
return pdf_path
|
|
else:
|
|
logger.error(f"PDF file not found after conversion: {pdf_path}")
|
|
cleanup_libreoffice_processes()
|
|
return None
|
|
|
|
except subprocess.TimeoutExpired:
|
|
logger.error("LibreOffice conversion timed out (300s)")
|
|
cleanup_libreoffice_processes()
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in PPTX to PDF conversion: {e}")
|
|
cleanup_libreoffice_processes()
|
|
return None
|
|
|
|
|
|
def validate_pptx_file(filepath: str) -> bool:
|
|
"""Validate if file is a valid PowerPoint file.
|
|
|
|
Args:
|
|
filepath: Path to file to validate
|
|
|
|
Returns:
|
|
True if valid PPTX file, False otherwise
|
|
"""
|
|
if not os.path.exists(filepath):
|
|
return False
|
|
|
|
# Check file extension
|
|
if not filepath.lower().endswith(('.ppt', '.pptx')):
|
|
return False
|
|
|
|
# Check file size (must be > 0)
|
|
if os.path.getsize(filepath) == 0:
|
|
return False
|
|
|
|
return True
|