| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- import fitz # PyMuPDF
- import sys
- import os
- import json
- def convert_pdf_to_images(pdf_path, output_dir, zoom=2.0, quality=85):
- """
- Converts PDF pages to images.
- zoom: 2.0 means 200% scaling (approx 144 DPI if original is 72 DPI)
- """
- try:
- if not os.path.exists(output_dir):
- os.makedirs(output_dir)
- doc = fitz.open(pdf_path)
- images = []
-
- # Matrix for scaling (DPI control)
- mat = fitz.Matrix(zoom, zoom)
-
- for i in range(len(doc)):
- page = doc.load_page(i)
- pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB)
-
- output_path = os.path.join(output_dir, f"page-{i+1}.jpg")
- # In newer PyMuPDF, save() doesn't take quality. Use tobytes instead.
- img_bytes = pix.tobytes("jpg", jpg_quality=quality)
- with open(output_path, "wb") as f:
- f.write(img_bytes)
-
- images.append({
- "path": output_path,
- "pageIndex": i + 1,
- "size": os.path.getsize(output_path)
- })
-
- doc.close()
- return {
- "success": True,
- "images": images,
- "totalPages": len(images)
- }
- except Exception as e:
- return {
- "success": False,
- "error": str(e)
- }
- if __name__ == "__main__":
- if len(sys.argv) < 3:
- print(json.dumps({"success": False, "error": "Usage: python pdf_to_images.py <pdf_path> <output_dir> [zoom] [quality]"}))
- sys.exit(1)
-
- pdf_path = sys.argv[1]
- output_dir = sys.argv[2]
- zoom = float(sys.argv[3]) if len(sys.argv) > 3 else 2.0
- quality = int(sys.argv[4]) if len(sys.argv) > 4 else 85
-
- result = convert_pdf_to_images(pdf_path, output_dir, zoom, quality)
- print(json.dumps(result))
|