add:增加标题、列表、代码块、表格、引用等样式的处理
This commit is contained in:
@@ -49,14 +49,20 @@ except Exception:
|
|||||||
try:
|
try:
|
||||||
from reportlab.lib.pagesizes import A4
|
from reportlab.lib.pagesizes import A4
|
||||||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||||
from reportlab.lib.units import mm
|
from reportlab.lib.units import mm, cm
|
||||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, HRFlowable
|
||||||
from reportlab.lib import colors
|
from reportlab.lib import colors
|
||||||
from reportlab.pdfbase import pdfmetrics
|
from reportlab.pdfbase import pdfmetrics
|
||||||
from reportlab.pdfbase.ttfonts import TTFont
|
from reportlab.pdfbase.ttfonts import TTFont
|
||||||
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_RIGHT
|
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_RIGHT
|
||||||
|
from reportlab.platypus import KeepInFrame
|
||||||
|
from reportlab.pdfgen import canvas
|
||||||
|
from reportlab.lib.colors import HexColor
|
||||||
_HAS_REPORTLAB: bool = True
|
_HAS_REPORTLAB: bool = True
|
||||||
except Exception:
|
except Exception as e:
|
||||||
|
import traceback
|
||||||
|
print(f"[ERROR] reportlab import failed: {e}")
|
||||||
|
traceback.print_exc()
|
||||||
A4 = None
|
A4 = None
|
||||||
_HAS_REPORTLAB: bool = False
|
_HAS_REPORTLAB: bool = False
|
||||||
|
|
||||||
@@ -713,6 +719,7 @@ def _stylesheets_for(css_name: Optional[str], css_text: Optional[str]):
|
|||||||
def _render_pdf_with_reportlab(md: str) -> bytes:
|
def _render_pdf_with_reportlab(md: str) -> bytes:
|
||||||
"""
|
"""
|
||||||
使用 reportlab 生成支持中文的 PDF(纯 Python,无外部依赖)
|
使用 reportlab 生成支持中文的 PDF(纯 Python,无外部依赖)
|
||||||
|
完整支持 markdown 格式:标题、列表、代码块、表格、引用等
|
||||||
"""
|
"""
|
||||||
print(f"[DEBUG] _render_pdf_with_reportlab 被调用, md 长度: {len(md)}")
|
print(f"[DEBUG] _render_pdf_with_reportlab 被调用, md 长度: {len(md)}")
|
||||||
bio = io.BytesIO()
|
bio = io.BytesIO()
|
||||||
@@ -767,6 +774,7 @@ def _render_pdf_with_reportlab(md: str) -> bytes:
|
|||||||
textColor=colors.black,
|
textColor=colors.black,
|
||||||
spaceAfter=12,
|
spaceAfter=12,
|
||||||
spaceBefore=12,
|
spaceBefore=12,
|
||||||
|
leading=22,
|
||||||
)
|
)
|
||||||
|
|
||||||
heading2_style = ParagraphStyle(
|
heading2_style = ParagraphStyle(
|
||||||
@@ -777,6 +785,18 @@ def _render_pdf_with_reportlab(md: str) -> bytes:
|
|||||||
textColor=colors.black,
|
textColor=colors.black,
|
||||||
spaceAfter=10,
|
spaceAfter=10,
|
||||||
spaceBefore=10,
|
spaceBefore=10,
|
||||||
|
leading=18,
|
||||||
|
)
|
||||||
|
|
||||||
|
heading3_style = ParagraphStyle(
|
||||||
|
'ChineseHeading3',
|
||||||
|
parent=styles['Heading3'],
|
||||||
|
fontName=chinese_font,
|
||||||
|
fontSize=12,
|
||||||
|
textColor=colors.black,
|
||||||
|
spaceAfter=8,
|
||||||
|
spaceBefore=8,
|
||||||
|
leading=16,
|
||||||
)
|
)
|
||||||
|
|
||||||
normal_style = ParagraphStyle(
|
normal_style = ParagraphStyle(
|
||||||
@@ -787,69 +807,328 @@ def _render_pdf_with_reportlab(md: str) -> bytes:
|
|||||||
textColor=colors.black,
|
textColor=colors.black,
|
||||||
spaceAfter=8,
|
spaceAfter=8,
|
||||||
wordWrap='CJK', # 中文换行支持
|
wordWrap='CJK', # 中文换行支持
|
||||||
|
leading=14,
|
||||||
)
|
)
|
||||||
|
|
||||||
code_style = ParagraphStyle(
|
blockquote_style = ParagraphStyle(
|
||||||
'ChineseCode',
|
'ChineseBlockquote',
|
||||||
parent=styles['Code'],
|
parent=normal_style,
|
||||||
|
fontName=chinese_font,
|
||||||
|
leftIndent=10*mm,
|
||||||
|
textColor=colors.Color(0.4, 0.4, 0.4),
|
||||||
|
spaceAfter=8,
|
||||||
|
backColor=colors.Color(0.95, 0.95, 0.95),
|
||||||
|
)
|
||||||
|
|
||||||
|
code_block_style = ParagraphStyle(
|
||||||
|
'ChineseCodeBlock',
|
||||||
|
parent=normal_style,
|
||||||
fontName='Courier',
|
fontName='Courier',
|
||||||
fontSize=9,
|
fontSize=8,
|
||||||
textColor=colors.black,
|
textColor=colors.black,
|
||||||
backColor=colors.lightgrey,
|
backColor=colors.Color(0.98, 0.98, 0.98),
|
||||||
leftIndent=10,
|
leftIndent=5*mm,
|
||||||
|
rightIndent=5*mm,
|
||||||
|
spaceAfter=10,
|
||||||
|
spaceBefore=10,
|
||||||
|
leading=12,
|
||||||
)
|
)
|
||||||
|
|
||||||
# 解析 markdown
|
# 解析 markdown
|
||||||
lines = md.split('\n')
|
lines = md.split('\n')
|
||||||
|
i = 0
|
||||||
in_code_block = False
|
in_code_block = False
|
||||||
|
code_lang = ''
|
||||||
code_lines = []
|
code_lines = []
|
||||||
|
|
||||||
for line in lines:
|
def process_inline_markdown(text: str) -> str:
|
||||||
|
"""处理行内 markdown 格式:粗体、斜体、行内代码、链接"""
|
||||||
|
# 使用占位符来保护我们生成的 HTML 标签
|
||||||
|
placeholders = {}
|
||||||
|
placeholder_idx = 0
|
||||||
|
|
||||||
|
def save_placeholder(content):
|
||||||
|
nonlocal placeholder_idx
|
||||||
|
key = f"__PLACEHOLDER_{placeholder_idx}__"
|
||||||
|
placeholder_idx += 1
|
||||||
|
placeholders[key] = content
|
||||||
|
return key
|
||||||
|
|
||||||
|
# 先进行 HTML 转义(处理用户输入中的特殊字符)
|
||||||
|
text = text.replace('<', '<').replace('>', '>')
|
||||||
|
|
||||||
|
# 处理行内代码(避免和其他标记冲突)
|
||||||
|
def replace_code(match):
|
||||||
|
code_text = match.group(1)
|
||||||
|
# 代码内容不需要转义,直接使用
|
||||||
|
html = f'<font face="Courier" color="#d63384">{code_text}</font>'
|
||||||
|
return save_placeholder(html)
|
||||||
|
text = re.sub(r'`([^`]+)`', replace_code, text)
|
||||||
|
|
||||||
|
# 处理粗体
|
||||||
|
def replace_bold(match):
|
||||||
|
content = match.group(1)
|
||||||
|
html = f'<b>{content}</b>'
|
||||||
|
return save_placeholder(html)
|
||||||
|
text = re.sub(r'\*\*([^*]+)\*\*', replace_bold, text)
|
||||||
|
|
||||||
|
# 处理斜体
|
||||||
|
def replace_italic(match):
|
||||||
|
content = match.group(1)
|
||||||
|
html = f'<i>{content}</i>'
|
||||||
|
return save_placeholder(html)
|
||||||
|
text = re.sub(r'\*([^*]+)\*', replace_italic, text)
|
||||||
|
|
||||||
|
# 处理链接 [text](url) - 使用 reportlab 的 link 标签创建可点击的超链接
|
||||||
|
def replace_link(match):
|
||||||
|
link_text = match.group(1)
|
||||||
|
url = match.group(2)
|
||||||
|
# 使用蓝色下划线样式,link href 属性使链接可点击
|
||||||
|
html = f'<a href="{url}" color="blue"><u>{link_text}</u></a>'
|
||||||
|
return save_placeholder(html)
|
||||||
|
text = re.sub(r'\[([^\]]+)\]\(([^\)]+)\)', replace_link, text)
|
||||||
|
|
||||||
|
# 还原占位符为实际的 HTML 标签
|
||||||
|
for key, value in placeholders.items():
|
||||||
|
text = text.replace(key, value)
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
def parse_table(table_lines: list) -> None:
|
||||||
|
"""解析 markdown 表格并添加到 story"""
|
||||||
|
if not table_lines:
|
||||||
|
return
|
||||||
|
|
||||||
|
# 解析分隔行以确定列对齐方式
|
||||||
|
separator_line = table_lines[1] if len(table_lines) > 1 else ""
|
||||||
|
alignments = []
|
||||||
|
if separator_line:
|
||||||
|
parts = separator_line.split('|')[1:-1] # 去掉首尾空元素
|
||||||
|
for part in parts:
|
||||||
|
part = part.strip()
|
||||||
|
if part.startswith(':') and part.endswith(':'):
|
||||||
|
alignments.append('CENTER')
|
||||||
|
elif part.endswith(':'):
|
||||||
|
alignments.append('RIGHT')
|
||||||
|
else:
|
||||||
|
alignments.append('LEFT')
|
||||||
|
|
||||||
|
# 解析表头
|
||||||
|
header_cells = [cell.strip() for cell in table_lines[0].split('|')[1:-1]]
|
||||||
|
# 处理表头中的行内样式
|
||||||
|
processed_headers = []
|
||||||
|
for cell in header_cells:
|
||||||
|
processed = process_inline_markdown(cell)
|
||||||
|
processed_headers.append(Paragraph(processed, normal_style))
|
||||||
|
|
||||||
|
# 解析数据行(跳过分隔行)
|
||||||
|
row_data = [processed_headers]
|
||||||
|
for line in table_lines[2:]:
|
||||||
|
if '|' in line:
|
||||||
|
cells = [cell.strip() for cell in line.split('|')[1:-1]]
|
||||||
|
# 处理每个单元格中的行内样式
|
||||||
|
processed_cells = []
|
||||||
|
for cell in cells:
|
||||||
|
processed = process_inline_markdown(cell)
|
||||||
|
processed_cells.append(Paragraph(processed, normal_style))
|
||||||
|
row_data.append(processed_cells)
|
||||||
|
|
||||||
|
# 计算列宽(自动调整)
|
||||||
|
col_widths = []
|
||||||
|
num_cols = len(header_cells)
|
||||||
|
if num_cols > 0:
|
||||||
|
# 计算每列的最大宽度
|
||||||
|
max_content_width = (A4[0] - 40*mm) / num_cols # 减去左右边距
|
||||||
|
col_widths = [max_content_width] * num_cols
|
||||||
|
|
||||||
|
# 创建表格样式
|
||||||
|
table_style = TableStyle([
|
||||||
|
# 表头样式
|
||||||
|
('BACKGROUND', (0, 0), (-1, 0), colors.Color(0.4, 0.6, 0.9)), # 蓝色背景
|
||||||
|
('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
|
||||||
|
('FONTNAME', (0, 0), (-1, 0), chinese_font),
|
||||||
|
('FONTSIZE', (0, 0), (-1, 0), 10),
|
||||||
|
('BOTTOMPADDING', (0, 0), (-1, 0), 8),
|
||||||
|
('TOPPADDING', (0, 0), (-1, 0), 8),
|
||||||
|
('LEFTPADDING', (0, 0), (-1, -1), 6),
|
||||||
|
('RIGHTPADDING', (0, 0), (-1, -1), 6),
|
||||||
|
# 表头边框
|
||||||
|
('LINEABOVE', (0, 0), (-1, 0), 1, colors.black),
|
||||||
|
('LINEBELOW', (0, 0), (-1, 0), 1, colors.black),
|
||||||
|
('LINEBEFORE', (0, 0), (0, -1), 0.5, colors.grey),
|
||||||
|
('LINEAFTER', (-1, 0), (-1, -1), 0.5, colors.grey),
|
||||||
|
# 数据行样式
|
||||||
|
('BACKGROUND', (0, 1), (-1, -1), colors.white),
|
||||||
|
('FONTNAME', (0, 1), (-1, -1), chinese_font),
|
||||||
|
('FONTSIZE', (0, 1), (-1, -1), 9),
|
||||||
|
('TOPPADDING', (0, 1), (-1, -1), 6),
|
||||||
|
('BOTTOMPADDING', (0, 1), (-1, -1), 6),
|
||||||
|
# 斑马纹效果(交替行背景色)
|
||||||
|
('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.Color(0.95, 0.95, 0.98)]),
|
||||||
|
# 网格线
|
||||||
|
('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
|
||||||
|
# 设置对齐方式
|
||||||
|
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
|
||||||
|
])
|
||||||
|
|
||||||
|
# 应用列对齐
|
||||||
|
for col_idx, align in enumerate(alignments):
|
||||||
|
if align:
|
||||||
|
table_style.add('ALIGN', (col_idx, 0), (col_idx, -1), align)
|
||||||
|
|
||||||
|
# 创建表格
|
||||||
|
t = Table(row_data, colWidths=col_widths)
|
||||||
|
t.setStyle(table_style)
|
||||||
|
story.append(t)
|
||||||
|
story.append(Spacer(1, 8*mm))
|
||||||
|
|
||||||
|
while i < len(lines):
|
||||||
|
line = lines[i]
|
||||||
|
|
||||||
# 代码块处理
|
# 代码块处理
|
||||||
if line.strip().startswith('```'):
|
if line.strip().startswith('```'):
|
||||||
if in_code_block:
|
if in_code_block:
|
||||||
# 代码块结束
|
# 代码块结束
|
||||||
code_text = '\n'.join(code_lines)
|
code_text = '\n'.join(code_lines)
|
||||||
story.append(Paragraph(code_text.replace('<', '<').replace('>', '>'), code_style))
|
# 使用 pre 标签保留格式
|
||||||
story.append(Spacer(1, 6*mm))
|
escaped_code = code_text.replace('<', '<').replace('>', '>')
|
||||||
|
story.append(Paragraph(f'<font face="Courier" size="8">{escaped_code}</font>', code_block_style))
|
||||||
|
story.append(Spacer(1, 3*mm))
|
||||||
code_lines = []
|
code_lines = []
|
||||||
in_code_block = False
|
in_code_block = False
|
||||||
else:
|
else:
|
||||||
in_code_block = True
|
in_code_block = True
|
||||||
|
code_lang = line.strip()[3:] # 获取语言标识
|
||||||
|
i += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if in_code_block:
|
if in_code_block:
|
||||||
code_lines.append(line)
|
code_lines.append(line)
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 表格处理
|
||||||
|
if '|' in line and i + 1 < len(lines) and '|' in lines[i + 1]:
|
||||||
|
# 检查是否是分隔行
|
||||||
|
next_line = lines[i + 1].strip()
|
||||||
|
if re.match(r'^\|?\s*:?-+:?\s*(\|:?-+:?\s*)*\|?$', next_line):
|
||||||
|
table_lines = [line, next_line] # 包含表头和分隔行
|
||||||
|
i += 2
|
||||||
|
# 收集所有表格数据行
|
||||||
|
while i < len(lines) and '|' in lines[i] and not lines[i].strip().startswith('```'):
|
||||||
|
table_lines.append(lines[i])
|
||||||
|
i += 1
|
||||||
|
parse_table(table_lines)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 标题处理
|
# 标题处理
|
||||||
if line.startswith('# '):
|
if line.startswith('#### '):
|
||||||
text = line[2:].strip()
|
text = process_inline_markdown(line[5:].strip())
|
||||||
story.append(Paragraph(text, title_style))
|
h4_style = ParagraphStyle(
|
||||||
elif line.startswith('## '):
|
'ChineseHeading4',
|
||||||
text = line[3:].strip()
|
parent=heading3_style,
|
||||||
story.append(Paragraph(text, heading2_style))
|
fontSize=11,
|
||||||
|
)
|
||||||
|
story.append(Paragraph(text, h4_style))
|
||||||
elif line.startswith('### '):
|
elif line.startswith('### '):
|
||||||
text = line[4:].strip()
|
text = process_inline_markdown(line[4:].strip())
|
||||||
|
story.append(Paragraph(text, heading3_style))
|
||||||
|
elif line.startswith('## '):
|
||||||
|
text = process_inline_markdown(line[3:].strip())
|
||||||
story.append(Paragraph(text, heading2_style))
|
story.append(Paragraph(text, heading2_style))
|
||||||
# 列表处理
|
elif line.startswith('# '):
|
||||||
|
text = process_inline_markdown(line[2:].strip())
|
||||||
|
story.append(Paragraph(text, title_style))
|
||||||
|
|
||||||
|
# 引用块处理
|
||||||
|
elif line.strip().startswith('>'):
|
||||||
|
quote_text = line.strip()[1:].strip()
|
||||||
|
processed = process_inline_markdown(quote_text)
|
||||||
|
story.append(Paragraph(processed, blockquote_style))
|
||||||
|
|
||||||
|
# 无序列表处理(包括任务列表)
|
||||||
elif line.strip().startswith('- ') or line.strip().startswith('* '):
|
elif line.strip().startswith('- ') or line.strip().startswith('* '):
|
||||||
text = line.strip()[2:]
|
content = line.strip()[2:].strip()
|
||||||
|
|
||||||
|
# 检查是否是任务列表 [ ] 或 [x]
|
||||||
|
task_checked = None
|
||||||
|
if content.startswith('[ ]'):
|
||||||
|
# 未完成的任务
|
||||||
|
task_text = content[2:].strip()
|
||||||
|
task_checked = False
|
||||||
|
elif content.startswith('[x]') or content.startswith('[X]'):
|
||||||
|
# 已完成的任务
|
||||||
|
task_text = content[2:].strip()
|
||||||
|
task_checked = True
|
||||||
|
else:
|
||||||
|
# 普通列表项
|
||||||
|
task_text = content
|
||||||
|
task_checked = None
|
||||||
|
|
||||||
|
text = process_inline_markdown(task_text)
|
||||||
|
|
||||||
|
if task_checked is True:
|
||||||
|
# 使用复选框符号表示已完成
|
||||||
|
story.append(Paragraph(f'☑ {text}', normal_style))
|
||||||
|
elif task_checked is False:
|
||||||
|
# 使用复选框符号表示未完成
|
||||||
|
story.append(Paragraph(f'☐ {text}', normal_style))
|
||||||
|
else:
|
||||||
|
# 普通列表项
|
||||||
story.append(Paragraph(f'• {text}', normal_style))
|
story.append(Paragraph(f'• {text}', normal_style))
|
||||||
elif re.match(r'^\d+\.\s', line.strip()):
|
|
||||||
text = re.sub(r'^\d+\.\s', '', line.strip())
|
# 有序列表处理
|
||||||
story.append(Paragraph(text, normal_style))
|
elif re.match(r'^\s*\d+\.\s', line.strip()):
|
||||||
|
match = re.match(r'^\s*(\d+)\.\s(.*)$', line.strip())
|
||||||
|
if match:
|
||||||
|
num = match.group(1)
|
||||||
|
text = process_inline_markdown(match.group(2))
|
||||||
|
story.append(Paragraph(f'{num}. {text}', normal_style))
|
||||||
|
|
||||||
|
# 分隔线
|
||||||
|
elif line.strip() in ['---', '***', '___']:
|
||||||
|
# 使用 HRFlowable 绘制水平分割线
|
||||||
|
story.append(Spacer(1, 3*mm))
|
||||||
|
story.append(HRFlowable(
|
||||||
|
width="100%",
|
||||||
|
thickness=0.5,
|
||||||
|
lineCap='round',
|
||||||
|
color=colors.grey,
|
||||||
|
spaceBefore=1*mm,
|
||||||
|
spaceAfter=3*mm,
|
||||||
|
))
|
||||||
|
|
||||||
# 空行
|
# 空行
|
||||||
elif not line.strip():
|
elif not line.strip():
|
||||||
story.append(Spacer(1, 3*mm))
|
story.append(Spacer(1, 2*mm))
|
||||||
# 普通段落
|
|
||||||
|
# 普通段落(可能跨多行)
|
||||||
elif line.strip():
|
elif line.strip():
|
||||||
# 处理粗体和斜体
|
# 收集连续的非空行作为段落
|
||||||
text = line.strip()
|
paragraph_lines = [line.strip()]
|
||||||
text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', text)
|
i += 1
|
||||||
text = re.sub(r'\*(.+?)\*', r'<i>\1</i>', text)
|
while i < len(lines):
|
||||||
text = re.sub(r'`(.+?)`', r'<font face="Courier">\1</font>', text)
|
next_line = lines[i].strip()
|
||||||
story.append(Paragraph(text, normal_style))
|
# 遇到空行、标题、列表等特殊行时停止
|
||||||
|
if (not next_line or
|
||||||
|
next_line.startswith('#') or
|
||||||
|
next_line.startswith('>') or
|
||||||
|
next_line.startswith('-') or
|
||||||
|
next_line.startswith('*') or
|
||||||
|
next_line.startswith('```') or
|
||||||
|
re.match(r'^\d+\.\s', next_line) or
|
||||||
|
(next_line.startswith('---') or next_line.startswith('***')) or
|
||||||
|
('|' in next_line and i + 1 < len(lines) and '|' in lines[i + 1])):
|
||||||
|
break
|
||||||
|
paragraph_lines.append(next_line)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
paragraph_text = ' '.join(paragraph_lines)
|
||||||
|
processed = process_inline_markdown(paragraph_text)
|
||||||
|
story.append(Paragraph(processed, normal_style))
|
||||||
|
i -= 1 # 回退一行,因为外层会 i += 1
|
||||||
|
|
||||||
|
i += 1
|
||||||
|
|
||||||
# 生成 PDF
|
# 生成 PDF
|
||||||
doc.build(story)
|
doc.build(story)
|
||||||
|
|||||||
Submodule docling/docling deleted from ad97e52851
Reference in New Issue
Block a user