md_to_pdf.js 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. const fs = require('fs');
  2. const { execSync } = require('child_process');
  3. const path = require('path');
  4. const puppeteer = require('puppeteer');
  5. console.log('=== MD to PDF Converter Starting ===');
  6. console.log('Node.js version:', process.version);
  7. console.log('Working directory:', process.cwd());
  8. console.log('Input path:', process.argv[2]);
  9. console.log('Output path:', process.argv[3]);
  10. // Arguments: node md_to_pdf.js <input_md_path> <output_pdf_path>
  11. const inputPath = process.argv[2];
  12. const outputPath = process.argv[3];
  13. if (!inputPath || !outputPath) {
  14. console.error('Usage: node md_to_pdf.js <input_md_path> <output_pdf_path>');
  15. process.exit(1);
  16. }
  17. console.log(`Processing Markdown: ${inputPath}`);
  18. (async () => {
  19. try {
  20. console.log('Reading input file...');
  21. let mdContent = fs.readFileSync(inputPath, 'utf8');
  22. console.log(`File read successfully, length: ${mdContent.length} characters`);
  23. // 1. Protect Math Blocks
  24. const mathBlocks = [];
  25. const placeholderPrefix = 'MATHBLOCK_PLACEHOLDER_';
  26. mdContent = mdContent.replace(/\$\$([\s\S]*?)\$\$/g, (match, p1) => {
  27. const id = mathBlocks.length;
  28. mathBlocks.push(`$$${p1}$$`);
  29. return `${placeholderPrefix}${id}`;
  30. });
  31. mdContent = mdContent.replace(/\$([^\$\n]+?)\$/g, (match, p1) => {
  32. const id = mathBlocks.length;
  33. mathBlocks.push(`$${p1}$`);
  34. return `${placeholderPrefix}${id}`;
  35. });
  36. console.log(`Protected ${mathBlocks.length} math blocks`);
  37. // 2. Convert to HTML using marked (CLI via npx or library?)
  38. // Since we are in a container, we should use the library directly if possible,
  39. // but the reference uses npx. To avoid npx/network dependency at runtime,
  40. // we will require 'marked' from node_modules (assuming we verify it's installed).
  41. const marked = require('marked');
  42. console.log('Parsing markdown content...');
  43. let finalHtml = marked.parse(mdContent);
  44. console.log('Markdown parsed successfully');
  45. // 3. Restore Math Blocks
  46. mathBlocks.forEach((block, index) => {
  47. finalHtml = finalHtml.replace(`${placeholderPrefix}${index}`, block);
  48. });
  49. // 4. Fix Mermaid syntax
  50. finalHtml = finalHtml.replace(
  51. /<pre><code class="language-mermaid">([\s\S]*?)<\/code><\/pre>/g,
  52. (match, content) => {
  53. content = content.replace(/&quot;/g, '"')
  54. .replace(/&#39;/g, "'")
  55. .replace(/&gt;/g, '>')
  56. .replace(/&lt;/g, '<')
  57. .replace(/&amp;/g, '&');
  58. return `<div class="mermaid">${content}</div>`;
  59. }
  60. );
  61. // 5. Wrap in Template
  62. const template = `
  63. <!DOCTYPE html>
  64. <html lang="zh-CN">
  65. <head>
  66. <meta charset="UTF-8">
  67. <title>Document</title>
  68. <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/github-markdown-css/5.2.0/github-markdown-light.min.css">
  69. <!-- Mermaid -->
  70. <script src="https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.min.js"></script>
  71. <!-- MathJax -->
  72. <script>
  73. window.MathJax = {
  74. tex: {
  75. inlineMath: [['$', '$'], ['\\\\(', '\\\\)']],
  76. displayMath: [['$$', '$$'], ['\\\\[', '\\\\]']],
  77. processEscapes: false
  78. },
  79. startup: {
  80. pageReady: () => {
  81. return MathJax.startup.defaultPageReady().then(() => {
  82. const div = document.createElement('div');
  83. div.id = 'mathjax-finished';
  84. div.style.display = 'none';
  85. document.body.appendChild(div);
  86. });
  87. }
  88. }
  89. };
  90. </script>
  91. <script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js"></script>
  92. <style>
  93. body {
  94. box-sizing: border-box;
  95. margin: 0 auto;
  96. padding: 20px;
  97. }
  98. .mermaid {
  99. display: flex;
  100. justify-content: center;
  101. margin: 20px 0;
  102. }
  103. table {
  104. width: 100% !important;
  105. display: table !important;
  106. }
  107. </style>
  108. <!-- Embedded Mermaid Library -->
  109. <script>
  110. // This is a minimal stub to prevent errors when mermaid is referenced but not available
  111. if (typeof window.mermaid === 'undefined') {
  112. window.mermaid = {
  113. initialize: function() {},
  114. init: function() {},
  115. render: function() {}
  116. };
  117. }
  118. </script>
  119. <!-- MathJax configuration and library -->
  120. <script>
  121. window.MathJax = {
  122. tex: {
  123. inlineMath: [['$', '$'], ['\\\\(', '\\\\)']],
  124. displayMath: [['$$', '$$'], ['\\\\[', '\\\\]']],
  125. processEscapes: false
  126. },
  127. startup: {
  128. pageReady: () => {
  129. return MathJax.startup.defaultPageReady().then(() => {
  130. const div = document.createElement('div');
  131. div.id = 'mathjax-finished';
  132. div.style.display = 'none';
  133. document.body.appendChild(div);
  134. });
  135. }
  136. }
  137. };
  138. </script>
  139. <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js"></script>
  140. </head>
  141. <body class="markdown-body">
  142. ${finalHtml}
  143. <script>
  144. // Initialize mermaid if it's available
  145. if (typeof mermaid !== 'undefined') {
  146. mermaid.initialize({ startOnLoad: true, theme: 'default', securityLevel: 'loose' });
  147. } else {
  148. console.log('Mermaid library not loaded, skipping initialization');
  149. }
  150. </script>
  151. </body>
  152. </html>`;
  153. console.log('Template prepared, starting PDF generation...');
  154. // 6. Generate PDF with Puppeteer
  155. console.log('Starting Puppeteer browser launch...');
  156. const browser = await puppeteer.launch({
  157. executablePath: '/usr/bin/chromium-browser', // Alpine location
  158. args: [
  159. '--no-sandbox',
  160. '--disable-setuid-sandbox',
  161. '--disable-dev-shm-usage',
  162. '--disable-background-timer-throttling',
  163. '--disable-renderer-backgrounding',
  164. '--disable-backgrounding-occluded-windows',
  165. '--memory-pressure-off',
  166. '--js-flags=--max-old-space-size=4096', // 增加内存限制
  167. '--enable-features=NetworkService',
  168. '--disable-features=VizDisplayCompositor',
  169. '--disable-gpu',
  170. '--disable-web-security',
  171. '--disable-features=VizDisplayCompositor'
  172. ],
  173. headless: 'new',
  174. timeout: 120000 // Increased timeout for containerized environment
  175. });
  176. console.log('Browser launched successfully');
  177. const page = await browser.newPage();
  178. console.log('Page created successfully');
  179. // ページのビューポートとユーザーエージェントを設定
  180. await page.setViewport({ width: 1200, height: 800 });
  181. await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
  182. console.log('Viewport and user agent set');
  183. // さまざまなタイムアウトを設定 - 長時間の待機を避けるためにデフォルト値を低下
  184. await page.setDefaultNavigationTimeout(30000); // 30秒
  185. await page.setDefaultTimeout(30000); // 30秒
  186. console.log('Timeouts configured');
  187. // すべての外部リソースの読み込みをブロックするリクエストをインターセプト
  188. await page.setRequestInterception(true);
  189. page.on('request', (req) => {
  190. // すべての外部リソース要求を完全にブロック(CDNリソースを含む)してネットワークタイムアウトを回避
  191. const url = req.url();
  192. if (url.startsWith('http') || url.startsWith('https') || url.startsWith('ftp')) {
  193. // すべての外部リクエストに空白のレスポンスを返して、ネットワークタイムアウトエラーを回避
  194. req.respond({
  195. status: 200,
  196. contentType: 'text/plain',
  197. body: ''
  198. }).catch(() => {});
  199. } else {
  200. // ローカルおよびdata URLリソースを許可
  201. req.continue().catch(() => {});
  202. }
  203. });
  204. console.log('Request interception configured to block all external resources');
  205. // エラーイベントを監視
  206. page.on('error', (error) => {
  207. console.error('Page error:', error);
  208. });
  209. page.on('pageerror', (error) => {
  210. console.error('Page error event:', error);
  211. });
  212. page.on('console', (msg) => {
  213. console.log('Browser console:', msg.text());
  214. });
  215. console.log('Error listeners attached');
  216. // 再試行メカニズム
  217. let success = false;
  218. let attempts = 0;
  219. const maxAttempts = 3;
  220. while (!success && attempts < maxAttempts) {
  221. attempts++;
  222. console.log(`Attempt ${attempts} of ${maxAttempts} for PDF generation...`);
  223. console.log(`HTML template length: ${template.length} characters`);
  224. try {
  225. console.log('About to navigate to data URL...');
  226. // 外部リソースを待たずに高速なナビゲーションオプションを使用
  227. await page.goto(`data:text/html;charset=UTF-8,${encodeURIComponent(template)}`, {
  228. waitUntil: 'domcontentloaded', // 等待DOM加载完成,但不等待资源
  229. timeout: 30000 // Reduced timeout for faster failure
  230. });
  231. console.log('Page loaded successfully');
  232. // 画像の読み込みを待機(タイムアウトあり、読み込み失敗の画像は素早くスキップ)
  233. try {
  234. console.log('Checking for images to load...');
  235. await page.evaluate(async () => {
  236. const images = Array.from(document.querySelectorAll('img'));
  237. console.log(`Found ${images.length} images on the page`);
  238. if (images.length > 0) {
  239. // すべての画像の読み込みを待つのではなく、短時間だけ待って次に進む
  240. await new Promise((resolve) => {
  241. setTimeout(() => {
  242. console.log(`Continuing after attempting to load ${images.length} images`);
  243. resolve();
  244. }, 500); // 只等待500ms,不管图像是否加载完成
  245. });
  246. }
  247. });
  248. } catch (e) {
  249. console.warn('Error checking images:', e.message);
  250. }
  251. // MathJaxのレンダリングを待機(タイムアウトあり)
  252. console.log('Checking for MathJax...');
  253. let mathjaxFinished = false;
  254. let mermaidProcessed = false; // 移动变量声明到这里
  255. try {
  256. // ページに数式が含まれているか確認(MathJaxは通常、$...$または$$...$$形式の数式を処理します)
  257. const hasMathContent = await page.evaluate(() => {
  258. const html = document.documentElement.innerHTML;
  259. // 数学記号のタグを確認
  260. return html.includes('$') || html.includes('\\(') || html.includes('\\[') ||
  261. html.includes('\\begin{') || html.includes('math-tex') ||
  262. document.querySelectorAll('mjx-container').length > 0 ||
  263. document.querySelectorAll('[class*="math"]').length > 0;
  264. });
  265. console.log(`Math content found: ${hasMathContent}`);
  266. if (hasMathContent) {
  267. console.log('Math content detected, waiting for MathJax...');
  268. // 特定のセレクタを無限に待つのではなく、MathJaxの初期化に合理的な時間を待機
  269. await new Promise(r => setTimeout(r, 1000)); // 短暂等待1秒
  270. // MathJaxが存在するか再度確認
  271. const mathjaxExists = await page.evaluate(() => typeof window.MathJax !== 'undefined');
  272. if (mathjaxExists) {
  273. // MathJaxが存在する場合、レンダリング完了を待機
  274. await page.evaluate(async () => {
  275. if (window.MathJax && window.MathJax.Hub) {
  276. await window.MathJax.Hub.Queue(['Typeset', window.MathJax.Hub]);
  277. } else if (window.MathJax && window.MathJax.typesetPromise) {
  278. await window.MathJax.typesetPromise();
  279. }
  280. });
  281. console.log('MathJax typesetting completed');
  282. mathjaxFinished = true;
  283. } else {
  284. console.log('MathJax not found after content check');
  285. }
  286. } else {
  287. console.log('No math content found, skipping MathJax wait');
  288. }
  289. } catch (e) {
  290. console.warn('Error checking MathJax:', e.message);
  291. }
  292. // MathJaxが完了していない場合、追加の時間を待機
  293. if (!mathjaxFinished) {
  294. console.log('Waiting 1 second before generating PDF...');
  295. await new Promise(r => setTimeout(r, 1000));
  296. }
  297. // Mermaidが完了していない場合、追加の時間を待機
  298. if (!mermaidProcessed) {
  299. console.log('Waiting 1 second before generating PDF...');
  300. await new Promise(r => setTimeout(r, 1000));
  301. }
  302. // 等待 Mermaid 图表渲染
  303. console.log('Checking for Mermaid diagrams...');
  304. try {
  305. // ページにMermaidチャートコンテナがあるか確認
  306. const mermaidElementsCount = await page.evaluate(() => document.querySelectorAll('.mermaid').length);
  307. console.log(`Mermaid diagrams found: ${mermaidElementsCount > 0}`);
  308. if (mermaidElementsCount > 0) {
  309. console.log(`Processing ${mermaidElementsCount} Mermaid diagrams...`);
  310. // Mermaidライブラリが存在するか確認し、初期化を試みる
  311. const mermaidExists = await page.evaluate(() => typeof mermaid !== 'undefined');
  312. if (mermaidExists) {
  313. console.log('Mermaid library found, attempting to initialize...');
  314. await page.evaluate(async () => {
  315. // mermaidオブジェクトが存在するか確認
  316. if (typeof mermaid !== 'undefined' && mermaid.init) {
  317. try {
  318. // Mermaidチャートの初期化を試みる
  319. mermaid.init(undefined, '.mermaid');
  320. } catch (e) {
  321. console.log('Mermaid init error:', e.message);
  322. }
  323. } else {
  324. console.log('Mermaid library not fully loaded, skipping initialization');
  325. }
  326. // レンダリング完了を待機(最大5秒)
  327. const startTime = Date.now();
  328. while (Date.now() - startTime < 5000) {
  329. // 未完成のMermaidチャートがあるか確認
  330. const incompleteCharts = document.querySelectorAll('.mermaid:not(.mermaid-loaded)');
  331. if (incompleteCharts.length === 0) {
  332. break;
  333. }
  334. // 等待一小段时间后重试
  335. await new Promise(r => setTimeout(r, 100));
  336. }
  337. });
  338. } else {
  339. console.log('Mermaid library not found in document, skipping processing');
  340. }
  341. console.log('Mermaid diagrams processed');
  342. mermaidProcessed = true;
  343. } else {
  344. console.log('No Mermaid diagrams found, skipping wait');
  345. }
  346. } catch (e) {
  347. console.warn('Error processing Mermaid:', e.message);
  348. }
  349. // 等待页面基本渲染完成(不等待所有外部资源)
  350. console.log('Waiting for basic page content to be loaded...');
  351. try {
  352. // complete状態ではなくDOMContentLoadedイベントを待機
  353. await page.waitForFunction(() => document.readyState !== 'loading', { timeout: 10000 }); // Reduced timeout
  354. console.log('Page DOM loaded, readyState is not loading');
  355. } catch (e) {
  356. console.warn('DOM did not finish loading, continuing...', e.message);
  357. }
  358. // 确保所有异步操作完成后再生成PDF
  359. console.log('Waiting 2 seconds before generating PDF...');
  360. await new Promise(r => setTimeout(r, 2000));
  361. console.log('Generating PDF file...');
  362. await page.pdf({
  363. path: outputPath,
  364. format: 'A4',
  365. printBackground: true,
  366. scale: 0.75, // Scale down to fit more content
  367. margin: { top: '10mm', right: '10mm', bottom: '10mm', left: '10mm' },
  368. timeout: 120000
  369. });
  370. console.log('PDF generated successfully');
  371. success = true;
  372. console.log(`PDF successfully generated at ${outputPath}`);
  373. } catch (error) {
  374. console.error(`Attempt ${attempts} failed:`, error.message);
  375. console.error(`Error stack:`, error.stack);
  376. // 致命的なエラーの場合は再試行不要
  377. if (error.message.includes('Protocol error') ||
  378. error.message.includes('Target closed') ||
  379. error.message.includes('Browser closed') ||
  380. error.message.includes('Connection closed') ||
  381. error.message.includes('Navigation failed') ||
  382. error.message.includes('net::ERR_CONNECTION_CLOSED')) {
  383. console.error('Fatal browser error occurred, aborting retries');
  384. throw error;
  385. }
  386. if (attempts >= maxAttempts) {
  387. // すべての再試行が失敗した場合、最も簡略化された方法を試す
  388. console.log('All attempts failed, trying most basic PDF generation...');
  389. console.log('Creating a new page for basic method...');
  390. // 重新创建页面以确保干净的状态
  391. const basicPage = await browser.newPage();
  392. await basicPage.setViewport({ width: 1200, height: 800 });
  393. await basicPage.setDefaultNavigationTimeout(60000);
  394. await basicPage.goto(`data:text/html;charset=UTF-8,${encodeURIComponent(template)}`, {
  395. waitUntil: 'domcontentloaded',
  396. timeout: 120000 // Increased timeout for containerized environment
  397. });
  398. // 等待一段较短的时间
  399. console.log('Waiting 2 seconds in basic method...');
  400. await new Promise(r => setTimeout(r, 2000));
  401. try {
  402. console.log('Generating PDF with basic method...');
  403. await basicPage.pdf({
  404. path: outputPath,
  405. format: 'A4',
  406. printBackground: true,
  407. scale: 0.75,
  408. margin: { top: '10mm', right: '10mm', bottom: '10mm', left: '10mm' },
  409. timeout: 300000 // Increased timeout for containerized environment
  410. });
  411. success = true;
  412. console.log(`PDF generated using basic method at ${outputPath}`);
  413. await basicPage.close();
  414. } catch (basicError) {
  415. console.error('Basic PDF generation also failed:', basicError.message);
  416. console.error('Basic error stack:', basicError.stack);
  417. await basicPage.close();
  418. throw basicError;
  419. }
  420. } else {
  421. // 一定時間待機してから再試行(システムが復旧する時間を与える)
  422. const delay = 10000 * attempts; // 逐次的に遅延時間を増加
  423. console.log(`Waiting ${delay}ms before retry...`);
  424. await new Promise(r => setTimeout(r, delay));
  425. }
  426. }
  427. }
  428. console.log('Closing browser...');
  429. await browser.close();
  430. console.log('Browser closed');
  431. console.log('=== MD to PDF Conversion Completed Successfully ===');
  432. } catch (err) {
  433. console.error('Error during conversion:', err);
  434. console.error('Error stack:', err.stack);
  435. process.exit(1);
  436. }
  437. })();