import io import os import subprocess import time from typing import Optional from fastapi import FastAPI, File, HTTPException, UploadFile from fastapi.responses import FileResponse, RedirectResponse from PIL import Image # Pillow library for image processing from pydantic import BaseModel # Response models class ConvertResponse(BaseModel): pdf_path: str converted: bool original: Optional[str] = None file_size: Optional[int] = None error: Optional[str] = None class HealthResponse(BaseModel): status: str service: str version: str uptime: float # FastAPI Application app = FastAPI( title="LibreOffice Document Conversion Service", description="Convert Word/PPT/Excel/PDF to PDF and support mixed content document processing", version="1.0.0", docs_url="/docs", redoc_url="/redoc" ) start_time = time.time() @app.get("/", include_in_schema=False) async def root(): """Redirect to documentation page""" return RedirectResponse(url="/docs") @app.get("/health", response_model=HealthResponse) async def health(): """Health check interface""" return HealthResponse( status="healthy", service="libreoffice-converter", version="1.0.0", uptime=time.time() - start_time ) @app.post("/convert") async def convert(file: UploadFile = File(...)): """ Document conversion interface Returns: PDF file stream """ try: # File format validation allowed_extensions = [ '.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', '.md', '.txt', '.rtf', '.odt', '.ods', '.odp', '.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff', '.webp' ] file_ext = os.path.splitext(file.filename)[1].lower() if file_ext not in allowed_extensions: raise HTTPException( status_code=400, detail=f"Unsupported file format: {file_ext}. Supported formats: {', '.join(allowed_extensions)}" ) # Check uploads directory existence upload_dir = "/app/uploads" if os.path.exists("/app/uploads") else "./uploads" os.makedirs(upload_dir, exist_ok=True) # Save uploaded file filepath = os.path.join(upload_dir, file.filename) with open(filepath, "wb") as buffer: content = await file.read() buffer.write(content) # For PDF files, return directly without conversion if file_ext == '.pdf': return FileResponse(filepath, filename=file.filename, media_type='application/pdf') if file_ext == '.md': # Use Node.js script to render Markdown to PDF expected_pdf = filepath.rsplit('.', 1)[0] + '.pdf' cmd = [ 'node', '/app/md_to_pdf.js', filepath, expected_pdf ] elif file_ext in ['.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff', '.webp']: # For image files, use Pillow to convert to PDF expected_pdf = filepath.rsplit('.', 1)[0] + '.pdf' # Open image and save as PDF with Image.open(filepath) as img: # Convert RGBA mode to RGB (support for transparent images) if img.mode in ('RGBA', 'LA', 'P'): # Convert to white background background = Image.new('RGB', img.size, (255, 255, 255)) if img.mode == 'P': img = img.convert('RGBA') background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None) img = background elif img.mode != 'RGB': img = img.convert('RGB') # Save as PDF img.save(expected_pdf, 'PDF', resolution=100.0, save_all=False) # Verify PDF generation completed if not os.path.exists(expected_pdf): raise HTTPException( status_code=500, detail="Image to PDF conversion succeeded but output file not found" ) # Image conversion completed, return PDF file filename_base = os.path.splitext(file.filename)[0] return FileResponse(expected_pdf, filename=f"{filename_base}.pdf", media_type='application/pdf') else: # Conversion using LibreOffice cmd = [ 'soffice', '--headless', '--convert-to', 'pdf', '--outdir', upload_dir, filepath ] result = subprocess.run( cmd, capture_output=True, text=True, timeout=600, # Extended to 10 minutes to support complex Markdown conversion ) # Combine stdout and stderr for error reporting since capture_output uses PIPE combined_output = result.stdout if result.stdout else "" if result.stderr: combined_output += "\n" + result.stderr # Display Node.js script output for debugging print(f"Node.js script output: {combined_output}") if result.returncode != 0: print(f"Subprocess failed with return code: {result.returncode}") # Combine stdout and stderr for error reporting combined_output = result.stdout if result.stdout else "" if result.stderr: combined_output += "\n" + result.stderr print(f"Subprocess output: {combined_output}") raise HTTPException( status_code=500, detail=f"Conversion failed: {combined_output}" ) # Verify output file expected_pdf = filepath.rsplit('.', 1)[0] + '.pdf' if not os.path.exists(expected_pdf): raise HTTPException( status_code=500, detail="Conversion succeeded but output file not found" ) filename_base = os.path.splitext(file.filename)[0] return FileResponse(expected_pdf, filename=f"{filename_base}.pdf", media_type='application/pdf') except HTTPException: raise except subprocess.TimeoutExpired: raise HTTPException(status_code=504, detail="Conversion timeout (300 seconds)") except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.get("/version") async def version(): """Version information""" return { "service": "libreoffice-converter", "version": "1.0.0", "framework": "FastAPI", "libreoffice": "7.x" }