Files
digiserver-v2/app/utils/pptx_converter.py
Quality App Developer a4262da7c9 chore: fix file permissions and ownership across project
- Changed ownership of all files to scheianu:scheianu
- Set directories to 755 permissions (rwxr-xr-x)
- Set files to 644 permissions (rw-r--r--)
- Made shell scripts executable (755)
- Allows development without requiring sudo for file modifications
- Improves development workflow and security
2026-01-15 22:39:51 +02:00

107 lines
3.3 KiB
Python

"""PowerPoint to PDF converter using LibreOffice."""
import os
import subprocess
import time
import logging
from typing import Optional
logger = logging.getLogger(__name__)
def cleanup_libreoffice_processes() -> None:
"""Clean up any hanging LibreOffice processes."""
try:
subprocess.run(['pkill', '-f', 'soffice'], capture_output=True, timeout=10)
time.sleep(1) # Give processes time to terminate
except Exception as e:
logger.warning(f"Failed to cleanup LibreOffice processes: {e}")
def pptx_to_pdf_libreoffice(pptx_path: str, output_dir: str) -> Optional[str]:
"""Convert PPTX to PDF using LibreOffice for highest quality.
This function is the core component of the PPTX processing workflow:
PPTX → PDF (this function) → JPG images (handled in uploads.py)
Args:
pptx_path: Path to the PPTX file
output_dir: Directory to save the PDF
Returns:
Path to the generated PDF file, or None if conversion failed
"""
try:
# Clean up any existing LibreOffice processes
cleanup_libreoffice_processes()
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
# Use LibreOffice to convert PPTX to PDF
cmd = [
'libreoffice',
'--headless',
'--convert-to', 'pdf',
'--outdir', output_dir,
'--invisible',
'--nodefault',
pptx_path
]
logger.info(f"Converting PPTX to PDF using LibreOffice: {pptx_path}")
# Increase timeout to 300 seconds (5 minutes) for large presentations
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
if result.returncode != 0:
logger.error(f"LibreOffice conversion failed: {result.stderr}")
logger.error(f"LibreOffice stdout: {result.stdout}")
cleanup_libreoffice_processes()
return None
# Find the generated PDF file
base_name = os.path.splitext(os.path.basename(pptx_path))[0]
pdf_path = os.path.join(output_dir, f"{base_name}.pdf")
if os.path.exists(pdf_path):
logger.info(f"PDF conversion successful: {pdf_path}")
cleanup_libreoffice_processes()
return pdf_path
else:
logger.error(f"PDF file not found after conversion: {pdf_path}")
cleanup_libreoffice_processes()
return None
except subprocess.TimeoutExpired:
logger.error("LibreOffice conversion timed out (300s)")
cleanup_libreoffice_processes()
return None
except Exception as e:
logger.error(f"Error in PPTX to PDF conversion: {e}")
cleanup_libreoffice_processes()
return None
def validate_pptx_file(filepath: str) -> bool:
"""Validate if file is a valid PowerPoint file.
Args:
filepath: Path to file to validate
Returns:
True if valid PPTX file, False otherwise
"""
if not os.path.exists(filepath):
return False
# Check file extension
if not filepath.lower().endswith(('.ppt', '.pptx')):
return False
# Check file size (must be > 0)
if os.path.getsize(filepath) == 0:
return False
return True