Prathamesh Sarjerao Vaidya commited on
Commit
ffb9af5
·
1 Parent(s): 12f17d1

update main & check.yml for better preprocessing of md file

Browse files
Files changed (2) hide show
  1. .github/workflows/check.yml +141 -37
  2. .github/workflows/main.yml +141 -37
.github/workflows/check.yml CHANGED
@@ -222,7 +222,7 @@ jobs:
222
  }
223
  EOF
224
 
225
- # Create preprocessing script
226
  - name: Create preprocessing script
227
  run: |
228
  cat > preprocess_markdown.py << 'EOF'
@@ -232,7 +232,7 @@ jobs:
232
  import sys
233
  import subprocess
234
  from pathlib import Path
235
-
236
  def process_mermaid_diagrams(content, file_dir):
237
  """Convert mermaid diagrams to images"""
238
  mermaid_pattern = r'```mermaid\n(.*?)\n```'
@@ -246,39 +246,60 @@ jobs:
246
  png_file = f"{file_dir}/mermaid_{diagram_hash}.png"
247
 
248
  # Write mermaid code to file
249
- with open(mermaid_file, 'w') as f:
250
- f.write(mermaid_code)
 
 
 
 
251
 
252
  try:
253
  # Convert to SVG first
254
- subprocess.run([
255
  'mmdc', '-i', mermaid_file, '-o', svg_file,
256
  '--theme', 'default', '--backgroundColor', 'white'
257
- ], check=True, capture_output=True)
258
 
259
  # Convert SVG to PNG for better PDF compatibility
260
  subprocess.run([
261
  'rsvg-convert', '-f', 'png', '-o', png_file,
262
  '--width', '1200', '--height', '800', svg_file
263
- ], check=True, capture_output=True)
264
 
265
  # Clean up intermediate files
266
- os.remove(mermaid_file)
267
- if os.path.exists(svg_file):
268
- os.remove(svg_file)
 
 
 
 
 
 
269
 
270
  # Return markdown image syntax
271
  return f'\n<div class="mermaid-container">\n\n![Architecture Diagram]({os.path.basename(png_file)})\n\n</div>\n'
272
 
273
  except subprocess.CalledProcessError as e:
274
  print(f"Error converting mermaid diagram: {e}")
 
 
 
 
 
 
275
  return f'\n```\n{mermaid_code}\n```\n'
276
  except Exception as e:
277
  print(f"Unexpected error with mermaid: {e}")
 
 
 
 
 
278
  return f'\n```\n{mermaid_code}\n```\n'
279
 
280
  return re.sub(mermaid_pattern, replace_mermaid, content, flags=re.DOTALL)
281
-
282
  def fix_image_paths(content, file_dir):
283
  """Fix image paths and add proper sizing"""
284
  # Pattern to match markdown images
@@ -309,32 +330,53 @@ jobs:
309
  )
310
 
311
  return content
312
-
313
  def main():
314
  if len(sys.argv) != 2:
315
  print("Usage: python preprocess_markdown.py <markdown_file>")
316
  sys.exit(1)
317
 
318
  md_file = sys.argv[1]
319
- file_dir = os.path.dirname(os.path.abspath(md_file))
320
-
321
- with open(md_file, 'r', encoding='utf-8') as f:
322
- content = f.read()
323
 
324
- # Process mermaid diagrams
325
- content = process_mermaid_diagrams(content, file_dir)
326
-
327
- # Fix image paths and sizing
328
- content = fix_image_paths(content, file_dir)
329
-
330
- # Write processed content
331
- processed_file = md_file.replace('.md', '_processed.md')
332
- with open(processed_file, 'w', encoding='utf-8') as f:
333
- f.write(content)
334
 
335
- print(f"Processed file saved as: {processed_file}")
336
- return processed_file
337
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  if __name__ == "__main__":
339
  main()
340
  EOF
@@ -351,16 +393,69 @@ jobs:
351
  pdf_path="$dir/$filename.pdf"
352
 
353
  echo "Processing $file..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
 
355
  # Preprocess the markdown file
356
  cd "$dir"
357
- processed_file=$(python3 "$GITHUB_WORKSPACE/preprocess_markdown.py" "$(basename "$file")")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
 
 
359
  if [ ! -f "$processed_file" ]; then
360
- echo "Preprocessing failed for $file, using original"
361
  processed_file="$(basename "$file")"
362
  fi
363
 
 
 
 
 
 
 
 
 
364
  echo "Converting $processed_file to $pdf_path"
365
 
366
  # Method 1: Try XeLaTeX with enhanced settings
@@ -383,7 +478,7 @@ jobs:
383
  --highlight-style=pygments \
384
  --wrap=auto \
385
  --dpi=300 \
386
- --verbose 2>/dev/null || {
387
 
388
  echo "XeLaTeX failed, trying HTML->PDF conversion..."
389
 
@@ -396,10 +491,11 @@ jobs:
396
  --toc \
397
  --number-sections \
398
  --highlight-style=pygments \
399
- -o "$dir/$filename.html"
400
 
401
  if [ -f "$dir/$filename.html" ]; then
402
- weasyprint "$dir/$filename.html" "$pdf_path" --presentational-hints || {
 
403
  echo "WeasyPrint failed, trying wkhtmltopdf..."
404
 
405
  # Method 3: wkhtmltopdf as final fallback
@@ -412,7 +508,7 @@ jobs:
412
  --encoding UTF-8 \
413
  --no-outline \
414
  --enable-local-file-access \
415
- "$dir/$filename.html" "$pdf_path" || {
416
  echo "All conversion methods failed for $file"
417
  continue
418
  }
@@ -420,12 +516,16 @@ jobs:
420
 
421
  # Clean up HTML file
422
  rm -f "$dir/$filename.html"
 
 
 
423
  fi
424
  }
425
 
426
- # Clean up processed file
427
  if [ "$processed_file" != "$(basename "$file")" ]; then
428
  rm -f "$processed_file"
 
429
  fi
430
 
431
  # Clean up generated mermaid images
@@ -433,11 +533,15 @@ jobs:
433
 
434
  if [ -f "$pdf_path" ]; then
435
  echo "✅ Successfully converted $file to $pdf_path"
 
436
  else
437
  echo "❌ Failed to convert $file"
438
  fi
 
 
 
439
  done
440
-
441
  # Upload PDF artifacts
442
  - name: Upload PDF artifacts
443
  uses: actions/upload-artifact@v4
 
222
  }
223
  EOF
224
 
225
+ # Fixed preprocessing script
226
  - name: Create preprocessing script
227
  run: |
228
  cat > preprocess_markdown.py << 'EOF'
 
232
  import sys
233
  import subprocess
234
  from pathlib import Path
235
+
236
  def process_mermaid_diagrams(content, file_dir):
237
  """Convert mermaid diagrams to images"""
238
  mermaid_pattern = r'```mermaid\n(.*?)\n```'
 
246
  png_file = f"{file_dir}/mermaid_{diagram_hash}.png"
247
 
248
  # Write mermaid code to file
249
+ try:
250
+ with open(mermaid_file, 'w', encoding='utf-8') as f:
251
+ f.write(mermaid_code)
252
+ except Exception as e:
253
+ print(f"Error writing mermaid file: {e}")
254
+ return f'\n```\n{mermaid_code}\n```\n'
255
 
256
  try:
257
  # Convert to SVG first
258
+ result = subprocess.run([
259
  'mmdc', '-i', mermaid_file, '-o', svg_file,
260
  '--theme', 'default', '--backgroundColor', 'white'
261
+ ], check=True, capture_output=True, text=True)
262
 
263
  # Convert SVG to PNG for better PDF compatibility
264
  subprocess.run([
265
  'rsvg-convert', '-f', 'png', '-o', png_file,
266
  '--width', '1200', '--height', '800', svg_file
267
+ ], check=True, capture_output=True, text=True)
268
 
269
  # Clean up intermediate files
270
+ try:
271
+ os.remove(mermaid_file)
272
+ except:
273
+ pass
274
+ try:
275
+ if os.path.exists(svg_file):
276
+ os.remove(svg_file)
277
+ except:
278
+ pass
279
 
280
  # Return markdown image syntax
281
  return f'\n<div class="mermaid-container">\n\n![Architecture Diagram]({os.path.basename(png_file)})\n\n</div>\n'
282
 
283
  except subprocess.CalledProcessError as e:
284
  print(f"Error converting mermaid diagram: {e}")
285
+ print(f"Command output: {e.stderr if e.stderr else 'No stderr'}")
286
+ # Clean up files on error
287
+ try:
288
+ os.remove(mermaid_file)
289
+ except:
290
+ pass
291
  return f'\n```\n{mermaid_code}\n```\n'
292
  except Exception as e:
293
  print(f"Unexpected error with mermaid: {e}")
294
+ # Clean up files on error
295
+ try:
296
+ os.remove(mermaid_file)
297
+ except:
298
+ pass
299
  return f'\n```\n{mermaid_code}\n```\n'
300
 
301
  return re.sub(mermaid_pattern, replace_mermaid, content, flags=re.DOTALL)
302
+
303
  def fix_image_paths(content, file_dir):
304
  """Fix image paths and add proper sizing"""
305
  # Pattern to match markdown images
 
330
  )
331
 
332
  return content
333
+
334
  def main():
335
  if len(sys.argv) != 2:
336
  print("Usage: python preprocess_markdown.py <markdown_file>")
337
  sys.exit(1)
338
 
339
  md_file = sys.argv[1]
 
 
 
 
340
 
341
+ # Check if file exists
342
+ if not os.path.exists(md_file):
343
+ print(f"Error: File {md_file} does not exist")
344
+ sys.exit(1)
 
 
 
 
 
 
345
 
346
+ try:
347
+ file_dir = os.path.dirname(os.path.abspath(md_file))
348
+
349
+ with open(md_file, 'r', encoding='utf-8') as f:
350
+ content = f.read()
351
+
352
+ print(f"Processing file: {md_file}")
353
+ print(f"File directory: {file_dir}")
354
+ print(f"Content length: {len(content)} characters")
355
+
356
+ # Process mermaid diagrams
357
+ original_content_length = len(content)
358
+ content = process_mermaid_diagrams(content, file_dir)
359
+ print(f"Mermaid processing complete. Content length: {len(content)}")
360
+
361
+ # Fix image paths and sizing
362
+ content = fix_image_paths(content, file_dir)
363
+ print(f"Image path fixing complete. Content length: {len(content)}")
364
+
365
+ # Write processed content
366
+ processed_file = md_file.replace('.md', '_processed.md')
367
+ with open(processed_file, 'w', encoding='utf-8') as f:
368
+ f.write(content)
369
+
370
+ print(f"Processed file saved as: {processed_file}")
371
+ print(processed_file) # This is what the shell script captures
372
+ return processed_file
373
+
374
+ except Exception as e:
375
+ print(f"Error processing {md_file}: {e}")
376
+ import traceback
377
+ traceback.print_exc()
378
+ sys.exit(1)
379
+
380
  if __name__ == "__main__":
381
  main()
382
  EOF
 
393
  pdf_path="$dir/$filename.pdf"
394
 
395
  echo "Processing $file..."
396
+ echo "Directory: $dir"
397
+ echo "Filename: $filename"
398
+ echo "PDF path: $pdf_path"
399
+
400
+ # Check if file exists and is readable
401
+ if [ ! -f "$file" ]; then
402
+ echo "ERROR: File $file does not exist"
403
+ continue
404
+ fi
405
+
406
+ if [ ! -r "$file" ]; then
407
+ echo "ERROR: File $file is not readable"
408
+ continue
409
+ fi
410
+
411
+ # Show file info for debugging
412
+ echo "File size: $(wc -c < "$file") bytes"
413
+ echo "File permissions: $(ls -la "$file")"
414
 
415
  # Preprocess the markdown file
416
  cd "$dir"
417
+ echo "Changed to directory: $(pwd)"
418
+ echo "Running preprocessing script..."
419
+
420
+ # Debug: Check if preprocessing script exists and is executable
421
+ if [ ! -f "$GITHUB_WORKSPACE/preprocess_markdown.py" ]; then
422
+ echo "ERROR: Preprocessing script not found at $GITHUB_WORKSPACE/preprocess_markdown.py"
423
+ processed_file="$(basename "$file")"
424
+ elif [ ! -x "$GITHUB_WORKSPACE/preprocess_markdown.py" ]; then
425
+ echo "WARNING: Preprocessing script is not executable, trying anyway..."
426
+ processed_file=$(python3 "$GITHUB_WORKSPACE/preprocess_markdown.py" "$(basename "$file")" 2>&1)
427
+ if [ $? -ne 0 ]; then
428
+ echo "Preprocessing failed with output: $processed_file"
429
+ processed_file="$(basename "$file")"
430
+ else
431
+ echo "Preprocessing succeeded: $processed_file"
432
+ fi
433
+ else
434
+ processed_file=$(python3 "$GITHUB_WORKSPACE/preprocess_markdown.py" "$(basename "$file")" 2>&1)
435
+ exit_code=$?
436
+ echo "Preprocessing exit code: $exit_code"
437
+ echo "Preprocessing output: $processed_file"
438
+
439
+ if [ $exit_code -ne 0 ]; then
440
+ echo "Preprocessing failed, using original file"
441
+ processed_file="$(basename "$file")"
442
+ fi
443
+ fi
444
 
445
+ # Verify processed file exists
446
  if [ ! -f "$processed_file" ]; then
447
+ echo "Processed file $processed_file does not exist, using original"
448
  processed_file="$(basename "$file")"
449
  fi
450
 
451
+ echo "Using file for conversion: $processed_file"
452
+
453
+ # Check if pandoc is available
454
+ if ! command -v pandoc &> /dev/null; then
455
+ echo "ERROR: pandoc is not installed or not in PATH"
456
+ continue
457
+ fi
458
+
459
  echo "Converting $processed_file to $pdf_path"
460
 
461
  # Method 1: Try XeLaTeX with enhanced settings
 
478
  --highlight-style=pygments \
479
  --wrap=auto \
480
  --dpi=300 \
481
+ --verbose 2>&1 || {
482
 
483
  echo "XeLaTeX failed, trying HTML->PDF conversion..."
484
 
 
491
  --toc \
492
  --number-sections \
493
  --highlight-style=pygments \
494
+ -o "$dir/$filename.html" 2>&1
495
 
496
  if [ -f "$dir/$filename.html" ]; then
497
+ echo "HTML file created, attempting WeasyPrint conversion..."
498
+ weasyprint "$dir/$filename.html" "$pdf_path" --presentational-hints 2>&1 || {
499
  echo "WeasyPrint failed, trying wkhtmltopdf..."
500
 
501
  # Method 3: wkhtmltopdf as final fallback
 
508
  --encoding UTF-8 \
509
  --no-outline \
510
  --enable-local-file-access \
511
+ "$dir/$filename.html" "$pdf_path" 2>&1 || {
512
  echo "All conversion methods failed for $file"
513
  continue
514
  }
 
516
 
517
  # Clean up HTML file
518
  rm -f "$dir/$filename.html"
519
+ else
520
+ echo "Failed to create HTML file for $file"
521
+ continue
522
  fi
523
  }
524
 
525
+ # Clean up processed file if it's different from original
526
  if [ "$processed_file" != "$(basename "$file")" ]; then
527
  rm -f "$processed_file"
528
+ echo "Cleaned up processed file: $processed_file"
529
  fi
530
 
531
  # Clean up generated mermaid images
 
533
 
534
  if [ -f "$pdf_path" ]; then
535
  echo "✅ Successfully converted $file to $pdf_path"
536
+ echo "PDF file size: $(wc -c < "$pdf_path") bytes"
537
  else
538
  echo "❌ Failed to convert $file"
539
  fi
540
+
541
+ # Return to original directory
542
+ cd "$GITHUB_WORKSPACE"
543
  done
544
+
545
  # Upload PDF artifacts
546
  - name: Upload PDF artifacts
547
  uses: actions/upload-artifact@v4
.github/workflows/main.yml CHANGED
@@ -210,7 +210,7 @@ jobs:
210
  }
211
  EOF
212
 
213
- # Create preprocessing script
214
  - name: Create preprocessing script
215
  run: |
216
  cat > preprocess_markdown.py << 'EOF'
@@ -220,7 +220,7 @@ jobs:
220
  import sys
221
  import subprocess
222
  from pathlib import Path
223
-
224
  def process_mermaid_diagrams(content, file_dir):
225
  """Convert mermaid diagrams to images"""
226
  mermaid_pattern = r'```mermaid\n(.*?)\n```'
@@ -234,39 +234,60 @@ jobs:
234
  png_file = f"{file_dir}/mermaid_{diagram_hash}.png"
235
 
236
  # Write mermaid code to file
237
- with open(mermaid_file, 'w') as f:
238
- f.write(mermaid_code)
 
 
 
 
239
 
240
  try:
241
  # Convert to SVG first
242
- subprocess.run([
243
  'mmdc', '-i', mermaid_file, '-o', svg_file,
244
  '--theme', 'default', '--backgroundColor', 'white'
245
- ], check=True, capture_output=True)
246
 
247
  # Convert SVG to PNG for better PDF compatibility
248
  subprocess.run([
249
  'rsvg-convert', '-f', 'png', '-o', png_file,
250
  '--width', '1200', '--height', '800', svg_file
251
- ], check=True, capture_output=True)
252
 
253
  # Clean up intermediate files
254
- os.remove(mermaid_file)
255
- if os.path.exists(svg_file):
256
- os.remove(svg_file)
 
 
 
 
 
 
257
 
258
  # Return markdown image syntax
259
  return f'\n<div class="mermaid-container">\n\n![Architecture Diagram]({os.path.basename(png_file)})\n\n</div>\n'
260
 
261
  except subprocess.CalledProcessError as e:
262
  print(f"Error converting mermaid diagram: {e}")
 
 
 
 
 
 
263
  return f'\n```\n{mermaid_code}\n```\n'
264
  except Exception as e:
265
  print(f"Unexpected error with mermaid: {e}")
 
 
 
 
 
266
  return f'\n```\n{mermaid_code}\n```\n'
267
 
268
  return re.sub(mermaid_pattern, replace_mermaid, content, flags=re.DOTALL)
269
-
270
  def fix_image_paths(content, file_dir):
271
  """Fix image paths and add proper sizing"""
272
  # Pattern to match markdown images
@@ -297,32 +318,53 @@ jobs:
297
  )
298
 
299
  return content
300
-
301
  def main():
302
  if len(sys.argv) != 2:
303
  print("Usage: python preprocess_markdown.py <markdown_file>")
304
  sys.exit(1)
305
 
306
  md_file = sys.argv[1]
307
- file_dir = os.path.dirname(os.path.abspath(md_file))
308
-
309
- with open(md_file, 'r', encoding='utf-8') as f:
310
- content = f.read()
311
 
312
- # Process mermaid diagrams
313
- content = process_mermaid_diagrams(content, file_dir)
314
-
315
- # Fix image paths and sizing
316
- content = fix_image_paths(content, file_dir)
317
-
318
- # Write processed content
319
- processed_file = md_file.replace('.md', '_processed.md')
320
- with open(processed_file, 'w', encoding='utf-8') as f:
321
- f.write(content)
322
 
323
- print(f"Processed file saved as: {processed_file}")
324
- return processed_file
325
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  if __name__ == "__main__":
327
  main()
328
  EOF
@@ -339,16 +381,69 @@ jobs:
339
  pdf_path="$dir/$filename.pdf"
340
 
341
  echo "Processing $file..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
 
343
  # Preprocess the markdown file
344
  cd "$dir"
345
- processed_file=$(python3 "$GITHUB_WORKSPACE/preprocess_markdown.py" "$(basename "$file")")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
 
 
347
  if [ ! -f "$processed_file" ]; then
348
- echo "Preprocessing failed for $file, using original"
349
  processed_file="$(basename "$file")"
350
  fi
351
 
 
 
 
 
 
 
 
 
352
  echo "Converting $processed_file to $pdf_path"
353
 
354
  # Method 1: Try XeLaTeX with enhanced settings
@@ -371,7 +466,7 @@ jobs:
371
  --highlight-style=pygments \
372
  --wrap=auto \
373
  --dpi=300 \
374
- --verbose 2>/dev/null || {
375
 
376
  echo "XeLaTeX failed, trying HTML->PDF conversion..."
377
 
@@ -384,10 +479,11 @@ jobs:
384
  --toc \
385
  --number-sections \
386
  --highlight-style=pygments \
387
- -o "$dir/$filename.html"
388
 
389
  if [ -f "$dir/$filename.html" ]; then
390
- weasyprint "$dir/$filename.html" "$pdf_path" --presentational-hints || {
 
391
  echo "WeasyPrint failed, trying wkhtmltopdf..."
392
 
393
  # Method 3: wkhtmltopdf as final fallback
@@ -400,7 +496,7 @@ jobs:
400
  --encoding UTF-8 \
401
  --no-outline \
402
  --enable-local-file-access \
403
- "$dir/$filename.html" "$pdf_path" || {
404
  echo "All conversion methods failed for $file"
405
  continue
406
  }
@@ -408,12 +504,16 @@ jobs:
408
 
409
  # Clean up HTML file
410
  rm -f "$dir/$filename.html"
 
 
 
411
  fi
412
  }
413
 
414
- # Clean up processed file
415
  if [ "$processed_file" != "$(basename "$file")" ]; then
416
  rm -f "$processed_file"
 
417
  fi
418
 
419
  # Clean up generated mermaid images
@@ -421,11 +521,15 @@ jobs:
421
 
422
  if [ -f "$pdf_path" ]; then
423
  echo "✅ Successfully converted $file to $pdf_path"
 
424
  else
425
  echo "❌ Failed to convert $file"
426
  fi
 
 
 
427
  done
428
-
429
  # Upload PDF artifacts
430
  - name: Upload PDF artifacts
431
  uses: actions/upload-artifact@v4
 
210
  }
211
  EOF
212
 
213
+ # Fixed preprocessing script
214
  - name: Create preprocessing script
215
  run: |
216
  cat > preprocess_markdown.py << 'EOF'
 
220
  import sys
221
  import subprocess
222
  from pathlib import Path
223
+
224
  def process_mermaid_diagrams(content, file_dir):
225
  """Convert mermaid diagrams to images"""
226
  mermaid_pattern = r'```mermaid\n(.*?)\n```'
 
234
  png_file = f"{file_dir}/mermaid_{diagram_hash}.png"
235
 
236
  # Write mermaid code to file
237
+ try:
238
+ with open(mermaid_file, 'w', encoding='utf-8') as f:
239
+ f.write(mermaid_code)
240
+ except Exception as e:
241
+ print(f"Error writing mermaid file: {e}")
242
+ return f'\n```\n{mermaid_code}\n```\n'
243
 
244
  try:
245
  # Convert to SVG first
246
+ result = subprocess.run([
247
  'mmdc', '-i', mermaid_file, '-o', svg_file,
248
  '--theme', 'default', '--backgroundColor', 'white'
249
+ ], check=True, capture_output=True, text=True)
250
 
251
  # Convert SVG to PNG for better PDF compatibility
252
  subprocess.run([
253
  'rsvg-convert', '-f', 'png', '-o', png_file,
254
  '--width', '1200', '--height', '800', svg_file
255
+ ], check=True, capture_output=True, text=True)
256
 
257
  # Clean up intermediate files
258
+ try:
259
+ os.remove(mermaid_file)
260
+ except:
261
+ pass
262
+ try:
263
+ if os.path.exists(svg_file):
264
+ os.remove(svg_file)
265
+ except:
266
+ pass
267
 
268
  # Return markdown image syntax
269
  return f'\n<div class="mermaid-container">\n\n![Architecture Diagram]({os.path.basename(png_file)})\n\n</div>\n'
270
 
271
  except subprocess.CalledProcessError as e:
272
  print(f"Error converting mermaid diagram: {e}")
273
+ print(f"Command output: {e.stderr if e.stderr else 'No stderr'}")
274
+ # Clean up files on error
275
+ try:
276
+ os.remove(mermaid_file)
277
+ except:
278
+ pass
279
  return f'\n```\n{mermaid_code}\n```\n'
280
  except Exception as e:
281
  print(f"Unexpected error with mermaid: {e}")
282
+ # Clean up files on error
283
+ try:
284
+ os.remove(mermaid_file)
285
+ except:
286
+ pass
287
  return f'\n```\n{mermaid_code}\n```\n'
288
 
289
  return re.sub(mermaid_pattern, replace_mermaid, content, flags=re.DOTALL)
290
+
291
  def fix_image_paths(content, file_dir):
292
  """Fix image paths and add proper sizing"""
293
  # Pattern to match markdown images
 
318
  )
319
 
320
  return content
321
+
322
  def main():
323
  if len(sys.argv) != 2:
324
  print("Usage: python preprocess_markdown.py <markdown_file>")
325
  sys.exit(1)
326
 
327
  md_file = sys.argv[1]
 
 
 
 
328
 
329
+ # Check if file exists
330
+ if not os.path.exists(md_file):
331
+ print(f"Error: File {md_file} does not exist")
332
+ sys.exit(1)
 
 
 
 
 
 
333
 
334
+ try:
335
+ file_dir = os.path.dirname(os.path.abspath(md_file))
336
+
337
+ with open(md_file, 'r', encoding='utf-8') as f:
338
+ content = f.read()
339
+
340
+ print(f"Processing file: {md_file}")
341
+ print(f"File directory: {file_dir}")
342
+ print(f"Content length: {len(content)} characters")
343
+
344
+ # Process mermaid diagrams
345
+ original_content_length = len(content)
346
+ content = process_mermaid_diagrams(content, file_dir)
347
+ print(f"Mermaid processing complete. Content length: {len(content)}")
348
+
349
+ # Fix image paths and sizing
350
+ content = fix_image_paths(content, file_dir)
351
+ print(f"Image path fixing complete. Content length: {len(content)}")
352
+
353
+ # Write processed content
354
+ processed_file = md_file.replace('.md', '_processed.md')
355
+ with open(processed_file, 'w', encoding='utf-8') as f:
356
+ f.write(content)
357
+
358
+ print(f"Processed file saved as: {processed_file}")
359
+ print(processed_file) # This is what the shell script captures
360
+ return processed_file
361
+
362
+ except Exception as e:
363
+ print(f"Error processing {md_file}: {e}")
364
+ import traceback
365
+ traceback.print_exc()
366
+ sys.exit(1)
367
+
368
  if __name__ == "__main__":
369
  main()
370
  EOF
 
381
  pdf_path="$dir/$filename.pdf"
382
 
383
  echo "Processing $file..."
384
+ echo "Directory: $dir"
385
+ echo "Filename: $filename"
386
+ echo "PDF path: $pdf_path"
387
+
388
+ # Check if file exists and is readable
389
+ if [ ! -f "$file" ]; then
390
+ echo "ERROR: File $file does not exist"
391
+ continue
392
+ fi
393
+
394
+ if [ ! -r "$file" ]; then
395
+ echo "ERROR: File $file is not readable"
396
+ continue
397
+ fi
398
+
399
+ # Show file info for debugging
400
+ echo "File size: $(wc -c < "$file") bytes"
401
+ echo "File permissions: $(ls -la "$file")"
402
 
403
  # Preprocess the markdown file
404
  cd "$dir"
405
+ echo "Changed to directory: $(pwd)"
406
+ echo "Running preprocessing script..."
407
+
408
+ # Debug: Check if preprocessing script exists and is executable
409
+ if [ ! -f "$GITHUB_WORKSPACE/preprocess_markdown.py" ]; then
410
+ echo "ERROR: Preprocessing script not found at $GITHUB_WORKSPACE/preprocess_markdown.py"
411
+ processed_file="$(basename "$file")"
412
+ elif [ ! -x "$GITHUB_WORKSPACE/preprocess_markdown.py" ]; then
413
+ echo "WARNING: Preprocessing script is not executable, trying anyway..."
414
+ processed_file=$(python3 "$GITHUB_WORKSPACE/preprocess_markdown.py" "$(basename "$file")" 2>&1)
415
+ if [ $? -ne 0 ]; then
416
+ echo "Preprocessing failed with output: $processed_file"
417
+ processed_file="$(basename "$file")"
418
+ else
419
+ echo "Preprocessing succeeded: $processed_file"
420
+ fi
421
+ else
422
+ processed_file=$(python3 "$GITHUB_WORKSPACE/preprocess_markdown.py" "$(basename "$file")" 2>&1)
423
+ exit_code=$?
424
+ echo "Preprocessing exit code: $exit_code"
425
+ echo "Preprocessing output: $processed_file"
426
+
427
+ if [ $exit_code -ne 0 ]; then
428
+ echo "Preprocessing failed, using original file"
429
+ processed_file="$(basename "$file")"
430
+ fi
431
+ fi
432
 
433
+ # Verify processed file exists
434
  if [ ! -f "$processed_file" ]; then
435
+ echo "Processed file $processed_file does not exist, using original"
436
  processed_file="$(basename "$file")"
437
  fi
438
 
439
+ echo "Using file for conversion: $processed_file"
440
+
441
+ # Check if pandoc is available
442
+ if ! command -v pandoc &> /dev/null; then
443
+ echo "ERROR: pandoc is not installed or not in PATH"
444
+ continue
445
+ fi
446
+
447
  echo "Converting $processed_file to $pdf_path"
448
 
449
  # Method 1: Try XeLaTeX with enhanced settings
 
466
  --highlight-style=pygments \
467
  --wrap=auto \
468
  --dpi=300 \
469
+ --verbose 2>&1 || {
470
 
471
  echo "XeLaTeX failed, trying HTML->PDF conversion..."
472
 
 
479
  --toc \
480
  --number-sections \
481
  --highlight-style=pygments \
482
+ -o "$dir/$filename.html" 2>&1
483
 
484
  if [ -f "$dir/$filename.html" ]; then
485
+ echo "HTML file created, attempting WeasyPrint conversion..."
486
+ weasyprint "$dir/$filename.html" "$pdf_path" --presentational-hints 2>&1 || {
487
  echo "WeasyPrint failed, trying wkhtmltopdf..."
488
 
489
  # Method 3: wkhtmltopdf as final fallback
 
496
  --encoding UTF-8 \
497
  --no-outline \
498
  --enable-local-file-access \
499
+ "$dir/$filename.html" "$pdf_path" 2>&1 || {
500
  echo "All conversion methods failed for $file"
501
  continue
502
  }
 
504
 
505
  # Clean up HTML file
506
  rm -f "$dir/$filename.html"
507
+ else
508
+ echo "Failed to create HTML file for $file"
509
+ continue
510
  fi
511
  }
512
 
513
+ # Clean up processed file if it's different from original
514
  if [ "$processed_file" != "$(basename "$file")" ]; then
515
  rm -f "$processed_file"
516
+ echo "Cleaned up processed file: $processed_file"
517
  fi
518
 
519
  # Clean up generated mermaid images
 
521
 
522
  if [ -f "$pdf_path" ]; then
523
  echo "✅ Successfully converted $file to $pdf_path"
524
+ echo "PDF file size: $(wc -c < "$pdf_path") bytes"
525
  else
526
  echo "❌ Failed to convert $file"
527
  fi
528
+
529
+ # Return to original directory
530
+ cd "$GITHUB_WORKSPACE"
531
  done
532
+
533
  # Upload PDF artifacts
534
  - name: Upload PDF artifacts
535
  uses: actions/upload-artifact@v4