import fitz # PyMuPDF import sys import os import json def convert_pdf_to_images(pdf_path, output_dir, zoom=2.0, quality=85): """ Converts PDF pages to images. zoom: 2.0 means 200% scaling (approx 144 DPI if original is 72 DPI) """ try: if not os.path.exists(output_dir): os.makedirs(output_dir) doc = fitz.open(pdf_path) images = [] # Matrix for scaling (DPI control) mat = fitz.Matrix(zoom, zoom) for i in range(len(doc)): page = doc.load_page(i) pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB) output_path = os.path.join(output_dir, f"page-{i+1}.jpg") # In newer PyMuPDF, save() doesn't take quality. Use tobytes instead. img_bytes = pix.tobytes("jpg", jpg_quality=quality) with open(output_path, "wb") as f: f.write(img_bytes) images.append({ "path": output_path, "pageIndex": i + 1, "size": os.path.getsize(output_path) }) doc.close() return { "success": True, "images": images, "totalPages": len(images) } except Exception as e: return { "success": False, "error": str(e) } if __name__ == "__main__": if len(sys.argv) < 3: print(json.dumps({"success": False, "error": "Usage: python pdf_to_images.py [zoom] [quality]"})) sys.exit(1) pdf_path = sys.argv[1] output_dir = sys.argv[2] zoom = float(sys.argv[3]) if len(sys.argv) > 3 else 2.0 quality = int(sys.argv[4]) if len(sys.argv) > 4 else 85 result = convert_pdf_to_images(pdf_path, output_dir, zoom, quality) print(json.dumps(result))