add:修改使用reportlab完成md转pdf
This commit is contained in:
@@ -38,6 +38,28 @@ except Exception:
|
||||
HTML = None
|
||||
CSS = None
|
||||
|
||||
try:
|
||||
from xhtml2pdf import pisa as _pisa # type: ignore
|
||||
_HAS_XHTML2PDF: bool = True
|
||||
except Exception:
|
||||
_pisa = None # type: ignore
|
||||
_HAS_XHTML2PDF: bool = False
|
||||
|
||||
# reportlab 用于生成支持中文的 PDF
|
||||
try:
|
||||
from reportlab.lib.pagesizes import A4
|
||||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||
from reportlab.lib.units import mm
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
||||
from reportlab.lib import colors
|
||||
from reportlab.pdfbase import pdfmetrics
|
||||
from reportlab.pdfbase.ttfonts import TTFont
|
||||
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_RIGHT
|
||||
_HAS_REPORTLAB: bool = True
|
||||
except Exception:
|
||||
A4 = None
|
||||
_HAS_REPORTLAB: bool = False
|
||||
|
||||
_mdit: Any = None
|
||||
_tasklists_plugin: Any = None
|
||||
_deflist_plugin: Any = None
|
||||
@@ -688,6 +710,214 @@ def _stylesheets_for(css_name: Optional[str], css_text: Optional[str]):
|
||||
sheets.append(CSS(filename=str(css_path)))
|
||||
return sheets
|
||||
|
||||
def _render_pdf_with_reportlab(md: str) -> bytes:
|
||||
"""
|
||||
使用 reportlab 生成支持中文的 PDF(纯 Python,无外部依赖)
|
||||
"""
|
||||
print(f"[DEBUG] _render_pdf_with_reportlab 被调用, md 长度: {len(md)}")
|
||||
bio = io.BytesIO()
|
||||
|
||||
# 创建 PDF 文档
|
||||
doc = SimpleDocTemplate(
|
||||
bio,
|
||||
pagesize=A4,
|
||||
rightMargin=20*mm,
|
||||
leftMargin=20*mm,
|
||||
topMargin=20*mm,
|
||||
bottomMargin=20*mm,
|
||||
)
|
||||
|
||||
# 存放 PDF 元素的列表
|
||||
story = []
|
||||
styles = getSampleStyleSheet()
|
||||
|
||||
# 尝试注册中文字体
|
||||
try:
|
||||
# Windows 系统字体
|
||||
font_path = r"C:\Windows\Fonts\msyh.ttc" # 微软雅黑
|
||||
if Path(font_path).exists():
|
||||
pdfmetrics.registerFont(TTFont('ChineseFont', font_path, subfontIndex=0))
|
||||
chinese_font = 'ChineseFont'
|
||||
else:
|
||||
# 尝试其他常见字体路径
|
||||
alternative_fonts = [
|
||||
r"C:\Windows\Fonts\simhei.ttf", # 黑体
|
||||
r"C:\Windows\Fonts\simsun.ttc", # 宋体
|
||||
"/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc", # Linux
|
||||
"/System/Library/Fonts/PingFang.ttc", # macOS
|
||||
]
|
||||
chinese_font = 'Helvetica' # 默认
|
||||
for font in alternative_fonts:
|
||||
if Path(font).exists():
|
||||
try:
|
||||
pdfmetrics.registerFont(TTFont('ChineseFont', font))
|
||||
chinese_font = 'ChineseFont'
|
||||
break
|
||||
except:
|
||||
continue
|
||||
except Exception:
|
||||
chinese_font = 'Helvetica'
|
||||
|
||||
# 创建支持中文的样式
|
||||
title_style = ParagraphStyle(
|
||||
'ChineseTitle',
|
||||
parent=styles['Heading1'],
|
||||
fontName=chinese_font,
|
||||
fontSize=18,
|
||||
textColor=colors.black,
|
||||
spaceAfter=12,
|
||||
spaceBefore=12,
|
||||
)
|
||||
|
||||
heading2_style = ParagraphStyle(
|
||||
'ChineseHeading2',
|
||||
parent=styles['Heading2'],
|
||||
fontName=chinese_font,
|
||||
fontSize=14,
|
||||
textColor=colors.black,
|
||||
spaceAfter=10,
|
||||
spaceBefore=10,
|
||||
)
|
||||
|
||||
normal_style = ParagraphStyle(
|
||||
'ChineseNormal',
|
||||
parent=styles['Normal'],
|
||||
fontName=chinese_font,
|
||||
fontSize=10,
|
||||
textColor=colors.black,
|
||||
spaceAfter=8,
|
||||
wordWrap='CJK', # 中文换行支持
|
||||
)
|
||||
|
||||
code_style = ParagraphStyle(
|
||||
'ChineseCode',
|
||||
parent=styles['Code'],
|
||||
fontName='Courier',
|
||||
fontSize=9,
|
||||
textColor=colors.black,
|
||||
backColor=colors.lightgrey,
|
||||
leftIndent=10,
|
||||
)
|
||||
|
||||
# 解析 markdown
|
||||
lines = md.split('\n')
|
||||
in_code_block = False
|
||||
code_lines = []
|
||||
|
||||
for line in lines:
|
||||
# 代码块处理
|
||||
if line.strip().startswith('```'):
|
||||
if in_code_block:
|
||||
# 代码块结束
|
||||
code_text = '\n'.join(code_lines)
|
||||
story.append(Paragraph(code_text.replace('<', '<').replace('>', '>'), code_style))
|
||||
story.append(Spacer(1, 6*mm))
|
||||
code_lines = []
|
||||
in_code_block = False
|
||||
else:
|
||||
in_code_block = True
|
||||
continue
|
||||
|
||||
if in_code_block:
|
||||
code_lines.append(line)
|
||||
continue
|
||||
|
||||
# 标题处理
|
||||
if line.startswith('# '):
|
||||
text = line[2:].strip()
|
||||
story.append(Paragraph(text, title_style))
|
||||
elif line.startswith('## '):
|
||||
text = line[3:].strip()
|
||||
story.append(Paragraph(text, heading2_style))
|
||||
elif line.startswith('### '):
|
||||
text = line[4:].strip()
|
||||
story.append(Paragraph(text, heading2_style))
|
||||
# 列表处理
|
||||
elif line.strip().startswith('- ') or line.strip().startswith('* '):
|
||||
text = line.strip()[2:]
|
||||
story.append(Paragraph(f'• {text}', normal_style))
|
||||
elif re.match(r'^\d+\.\s', line.strip()):
|
||||
text = re.sub(r'^\d+\.\s', '', line.strip())
|
||||
story.append(Paragraph(text, normal_style))
|
||||
# 空行
|
||||
elif not line.strip():
|
||||
story.append(Spacer(1, 3*mm))
|
||||
# 普通段落
|
||||
elif line.strip():
|
||||
# 处理粗体和斜体
|
||||
text = line.strip()
|
||||
text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', text)
|
||||
text = re.sub(r'\*(.+?)\*', r'<i>\1</i>', text)
|
||||
text = re.sub(r'`(.+?)`', r'<font face="Courier">\1</font>', text)
|
||||
story.append(Paragraph(text, normal_style))
|
||||
|
||||
# 生成 PDF
|
||||
doc.build(story)
|
||||
return bio.getvalue()
|
||||
|
||||
def _render_pdf_with_xhtml2pdf(md: str, html: str, css_name: Optional[str], css_text: Optional[str]) -> bytes:
|
||||
"""
|
||||
使用 xhtml2pdf 渲染 PDF(纯 Python,无外部依赖)
|
||||
"""
|
||||
# 使用简单的 markdown 转 HTML,避免复杂的 normalize_html
|
||||
simple_html = _render_markdown_html(md)
|
||||
|
||||
# 构建完整的 HTML 文档,确保格式正确
|
||||
full_html = f'''<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<style>
|
||||
@page {{
|
||||
margin: 20mm;
|
||||
}}
|
||||
body {{
|
||||
font-family: "Microsoft YaHei", "SimSun", Arial, sans-serif;
|
||||
font-size: 12pt;
|
||||
line-height: 1.6;
|
||||
}}
|
||||
h1, h2, h3, h4, h5, h6 {{
|
||||
color: #333;
|
||||
margin-top: 1em;
|
||||
margin-bottom: 0.5em;
|
||||
}}
|
||||
h1 {{ font-size: 24pt; font-weight: bold; }}
|
||||
h2 {{ font-size: 20pt; font-weight: bold; }}
|
||||
h3 {{ font-size: 16pt; font-weight: bold; }}
|
||||
p {{ margin-bottom: 1em; }}
|
||||
ul, ol {{ margin-left: 2em; }}
|
||||
table {{
|
||||
border-collapse: collapse;
|
||||
width: 100%;
|
||||
margin: 1em 0;
|
||||
}}
|
||||
th, td {{
|
||||
border: 1px solid #ddd;
|
||||
padding: 8px;
|
||||
}}
|
||||
th {{
|
||||
background-color: #f2f2f2;
|
||||
}}
|
||||
a {{ color: #1d4ed8; text-decoration: underline; }}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
{simple_html}
|
||||
</body>
|
||||
</html>'''
|
||||
|
||||
# 使用 BytesIO 接收 PDF 输出
|
||||
bio = io.BytesIO()
|
||||
|
||||
# 调用 pisa.CreatePDF
|
||||
_pisa.CreatePDF(
|
||||
full_html,
|
||||
dest=bio,
|
||||
encoding='utf-8'
|
||||
)
|
||||
|
||||
return bio.getvalue()
|
||||
|
||||
def md_to_pdf_bytes_with_renderer(md: str, renderer: str = "weasyprint", css_name: Optional[str] = None, css_text: Optional[str] = None, toc: bool = False, header_text: Optional[str] = None, footer_text: Optional[str] = None, logo_url: Optional[str] = None, copyright_text: Optional[str] = None, filename_text: Optional[str] = None, cover_src: Optional[str] = None, product_name: Optional[str] = None, document_name: Optional[str] = None, product_version: Optional[str] = None, document_version: Optional[str] = None) -> bytes:
|
||||
html = normalize_html(md, options={
|
||||
"toc": "1" if toc else "",
|
||||
@@ -702,8 +932,38 @@ def md_to_pdf_bytes_with_renderer(md: str, renderer: str = "weasyprint", css_nam
|
||||
"product_version": product_version,
|
||||
"document_version": document_version,
|
||||
})
|
||||
|
||||
# ========== PDF 渲染优先级 ==========
|
||||
# 1. reportlab (首选) - 纯 Python,支持中文,跨平台兼容
|
||||
# 2. WeasyPrint - 需要 GTK 系统库,Windows 上安装复杂
|
||||
# =====================================
|
||||
|
||||
print(f"[DEBUG] 开始 PDF 转换, _HAS_REPORTLAB={_HAS_REPORTLAB}, HTML is None={HTML is None}")
|
||||
|
||||
# 首选:reportlab(纯 Python,支持中文,无需外部依赖)
|
||||
if _HAS_REPORTLAB:
|
||||
try:
|
||||
print(f"[DEBUG] 尝试使用 reportlab...")
|
||||
return _render_pdf_with_reportlab(md)
|
||||
except Exception as e:
|
||||
# reportlab 失败,记录错误并继续尝试下一个方案
|
||||
import traceback
|
||||
error_detail = traceback.format_exc()
|
||||
print(f"[DEBUG] reportlab 失败: {str(e)}")
|
||||
print(f"[DEBUG] 错误详情:\n{error_detail}")
|
||||
|
||||
# 备选:WeasyPrint(需要系统库支持)
|
||||
if HTML is not None:
|
||||
stylesheets = _stylesheets_for(css_name, css_text)
|
||||
pdf_bytes = HTML(string=html).write_pdf(stylesheets=stylesheets or None)
|
||||
return pdf_bytes
|
||||
raise RuntimeError("WeasyPrint is not available")
|
||||
try:
|
||||
print(f"[DEBUG] 尝试使用 WeasyPrint...")
|
||||
stylesheets = _stylesheets_for(css_name, css_text)
|
||||
pdf_bytes = HTML(string=html).write_pdf(stylesheets=stylesheets or None)
|
||||
return pdf_bytes
|
||||
except Exception as e:
|
||||
# WeasyPrint 失败,记录错误
|
||||
import traceback
|
||||
error_detail = traceback.format_exc()
|
||||
print(f"[DEBUG] WeasyPrint 失败: {str(e)}")
|
||||
print(f"[DEBUG] 错误详情:\n{error_detail}")
|
||||
|
||||
raise RuntimeError("PDF 转换失败。reportlab 已安装但转换失败,请检查 markdown 格式")
|
||||
Reference in New Issue
Block a user