add:修改使用reportlab完成md转pdf

This commit is contained in:
2026-01-15 23:45:46 +08:00
parent cecc8c65be
commit 0cc1a9484e
7 changed files with 436 additions and 299 deletions

View File

@@ -2734,11 +2734,17 @@ async def api_pdf_convert(
# Return PDF file
if download:
from fastapi.responses import StreamingResponse
import urllib.parse
# 处理中文文件名 - 使用 URL 编码确保只包含 ASCII 字符
# 先将中文文件名进行百分比编码
safe_filename = urllib.parse.quote(output_filename, safe='')
return StreamingResponse(
io.BytesIO(pdf_bytes),
media_type="application/pdf",
headers={
"Content-Disposition": f"attachment; filename=\"{output_filename}\""
"Content-Disposition": f"attachment; filename={safe_filename}"
}
)
else:

View File

@@ -38,6 +38,28 @@ except Exception:
HTML = None
CSS = None
try:
from xhtml2pdf import pisa as _pisa # type: ignore
_HAS_XHTML2PDF: bool = True
except Exception:
_pisa = None # type: ignore
_HAS_XHTML2PDF: bool = False
# reportlab 用于生成支持中文的 PDF
try:
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import mm
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
from reportlab.lib import colors
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_RIGHT
_HAS_REPORTLAB: bool = True
except Exception:
A4 = None
_HAS_REPORTLAB: bool = False
_mdit: Any = None
_tasklists_plugin: Any = None
_deflist_plugin: Any = None
@@ -688,6 +710,214 @@ def _stylesheets_for(css_name: Optional[str], css_text: Optional[str]):
sheets.append(CSS(filename=str(css_path)))
return sheets
def _render_pdf_with_reportlab(md: str) -> bytes:
"""
使用 reportlab 生成支持中文的 PDF纯 Python无外部依赖
"""
print(f"[DEBUG] _render_pdf_with_reportlab 被调用, md 长度: {len(md)}")
bio = io.BytesIO()
# 创建 PDF 文档
doc = SimpleDocTemplate(
bio,
pagesize=A4,
rightMargin=20*mm,
leftMargin=20*mm,
topMargin=20*mm,
bottomMargin=20*mm,
)
# 存放 PDF 元素的列表
story = []
styles = getSampleStyleSheet()
# 尝试注册中文字体
try:
# Windows 系统字体
font_path = r"C:\Windows\Fonts\msyh.ttc" # 微软雅黑
if Path(font_path).exists():
pdfmetrics.registerFont(TTFont('ChineseFont', font_path, subfontIndex=0))
chinese_font = 'ChineseFont'
else:
# 尝试其他常见字体路径
alternative_fonts = [
r"C:\Windows\Fonts\simhei.ttf", # 黑体
r"C:\Windows\Fonts\simsun.ttc", # 宋体
"/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc", # Linux
"/System/Library/Fonts/PingFang.ttc", # macOS
]
chinese_font = 'Helvetica' # 默认
for font in alternative_fonts:
if Path(font).exists():
try:
pdfmetrics.registerFont(TTFont('ChineseFont', font))
chinese_font = 'ChineseFont'
break
except:
continue
except Exception:
chinese_font = 'Helvetica'
# 创建支持中文的样式
title_style = ParagraphStyle(
'ChineseTitle',
parent=styles['Heading1'],
fontName=chinese_font,
fontSize=18,
textColor=colors.black,
spaceAfter=12,
spaceBefore=12,
)
heading2_style = ParagraphStyle(
'ChineseHeading2',
parent=styles['Heading2'],
fontName=chinese_font,
fontSize=14,
textColor=colors.black,
spaceAfter=10,
spaceBefore=10,
)
normal_style = ParagraphStyle(
'ChineseNormal',
parent=styles['Normal'],
fontName=chinese_font,
fontSize=10,
textColor=colors.black,
spaceAfter=8,
wordWrap='CJK', # 中文换行支持
)
code_style = ParagraphStyle(
'ChineseCode',
parent=styles['Code'],
fontName='Courier',
fontSize=9,
textColor=colors.black,
backColor=colors.lightgrey,
leftIndent=10,
)
# 解析 markdown
lines = md.split('\n')
in_code_block = False
code_lines = []
for line in lines:
# 代码块处理
if line.strip().startswith('```'):
if in_code_block:
# 代码块结束
code_text = '\n'.join(code_lines)
story.append(Paragraph(code_text.replace('<', '&lt;').replace('>', '&gt;'), code_style))
story.append(Spacer(1, 6*mm))
code_lines = []
in_code_block = False
else:
in_code_block = True
continue
if in_code_block:
code_lines.append(line)
continue
# 标题处理
if line.startswith('# '):
text = line[2:].strip()
story.append(Paragraph(text, title_style))
elif line.startswith('## '):
text = line[3:].strip()
story.append(Paragraph(text, heading2_style))
elif line.startswith('### '):
text = line[4:].strip()
story.append(Paragraph(text, heading2_style))
# 列表处理
elif line.strip().startswith('- ') or line.strip().startswith('* '):
text = line.strip()[2:]
story.append(Paragraph(f'{text}', normal_style))
elif re.match(r'^\d+\.\s', line.strip()):
text = re.sub(r'^\d+\.\s', '', line.strip())
story.append(Paragraph(text, normal_style))
# 空行
elif not line.strip():
story.append(Spacer(1, 3*mm))
# 普通段落
elif line.strip():
# 处理粗体和斜体
text = line.strip()
text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', text)
text = re.sub(r'\*(.+?)\*', r'<i>\1</i>', text)
text = re.sub(r'`(.+?)`', r'<font face="Courier">\1</font>', text)
story.append(Paragraph(text, normal_style))
# 生成 PDF
doc.build(story)
return bio.getvalue()
def _render_pdf_with_xhtml2pdf(md: str, html: str, css_name: Optional[str], css_text: Optional[str]) -> bytes:
"""
使用 xhtml2pdf 渲染 PDF纯 Python无外部依赖
"""
# 使用简单的 markdown 转 HTML避免复杂的 normalize_html
simple_html = _render_markdown_html(md)
# 构建完整的 HTML 文档,确保格式正确
full_html = f'''<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<style>
@page {{
margin: 20mm;
}}
body {{
font-family: "Microsoft YaHei", "SimSun", Arial, sans-serif;
font-size: 12pt;
line-height: 1.6;
}}
h1, h2, h3, h4, h5, h6 {{
color: #333;
margin-top: 1em;
margin-bottom: 0.5em;
}}
h1 {{ font-size: 24pt; font-weight: bold; }}
h2 {{ font-size: 20pt; font-weight: bold; }}
h3 {{ font-size: 16pt; font-weight: bold; }}
p {{ margin-bottom: 1em; }}
ul, ol {{ margin-left: 2em; }}
table {{
border-collapse: collapse;
width: 100%;
margin: 1em 0;
}}
th, td {{
border: 1px solid #ddd;
padding: 8px;
}}
th {{
background-color: #f2f2f2;
}}
a {{ color: #1d4ed8; text-decoration: underline; }}
</style>
</head>
<body>
{simple_html}
</body>
</html>'''
# 使用 BytesIO 接收 PDF 输出
bio = io.BytesIO()
# 调用 pisa.CreatePDF
_pisa.CreatePDF(
full_html,
dest=bio,
encoding='utf-8'
)
return bio.getvalue()
def md_to_pdf_bytes_with_renderer(md: str, renderer: str = "weasyprint", css_name: Optional[str] = None, css_text: Optional[str] = None, toc: bool = False, header_text: Optional[str] = None, footer_text: Optional[str] = None, logo_url: Optional[str] = None, copyright_text: Optional[str] = None, filename_text: Optional[str] = None, cover_src: Optional[str] = None, product_name: Optional[str] = None, document_name: Optional[str] = None, product_version: Optional[str] = None, document_version: Optional[str] = None) -> bytes:
html = normalize_html(md, options={
"toc": "1" if toc else "",
@@ -702,8 +932,38 @@ def md_to_pdf_bytes_with_renderer(md: str, renderer: str = "weasyprint", css_nam
"product_version": product_version,
"document_version": document_version,
})
# ========== PDF 渲染优先级 ==========
# 1. reportlab (首选) - 纯 Python支持中文跨平台兼容
# 2. WeasyPrint - 需要 GTK 系统库Windows 上安装复杂
# =====================================
print(f"[DEBUG] 开始 PDF 转换, _HAS_REPORTLAB={_HAS_REPORTLAB}, HTML is None={HTML is None}")
# 首选reportlab纯 Python支持中文无需外部依赖
if _HAS_REPORTLAB:
try:
print(f"[DEBUG] 尝试使用 reportlab...")
return _render_pdf_with_reportlab(md)
except Exception as e:
# reportlab 失败,记录错误并继续尝试下一个方案
import traceback
error_detail = traceback.format_exc()
print(f"[DEBUG] reportlab 失败: {str(e)}")
print(f"[DEBUG] 错误详情:\n{error_detail}")
# 备选WeasyPrint需要系统库支持
if HTML is not None:
stylesheets = _stylesheets_for(css_name, css_text)
pdf_bytes = HTML(string=html).write_pdf(stylesheets=stylesheets or None)
return pdf_bytes
raise RuntimeError("WeasyPrint is not available")
try:
print(f"[DEBUG] 尝试使用 WeasyPrint...")
stylesheets = _stylesheets_for(css_name, css_text)
pdf_bytes = HTML(string=html).write_pdf(stylesheets=stylesheets or None)
return pdf_bytes
except Exception as e:
# WeasyPrint 失败,记录错误
import traceback
error_detail = traceback.format_exc()
print(f"[DEBUG] WeasyPrint 失败: {str(e)}")
print(f"[DEBUG] 错误详情:\n{error_detail}")
raise RuntimeError("PDF 转换失败。reportlab 已安装但转换失败,请检查 markdown 格式")