add:增加标题、列表、代码块、表格、引用等样式的处理
This commit is contained in:
@@ -49,14 +49,20 @@ except Exception:
|
||||
try:
|
||||
from reportlab.lib.pagesizes import A4
|
||||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||
from reportlab.lib.units import mm
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
||||
from reportlab.lib.units import mm, cm
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, HRFlowable
|
||||
from reportlab.lib import colors
|
||||
from reportlab.pdfbase import pdfmetrics
|
||||
from reportlab.pdfbase.ttfonts import TTFont
|
||||
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_RIGHT
|
||||
from reportlab.platypus import KeepInFrame
|
||||
from reportlab.pdfgen import canvas
|
||||
from reportlab.lib.colors import HexColor
|
||||
_HAS_REPORTLAB: bool = True
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f"[ERROR] reportlab import failed: {e}")
|
||||
traceback.print_exc()
|
||||
A4 = None
|
||||
_HAS_REPORTLAB: bool = False
|
||||
|
||||
@@ -713,6 +719,7 @@ def _stylesheets_for(css_name: Optional[str], css_text: Optional[str]):
|
||||
def _render_pdf_with_reportlab(md: str) -> bytes:
|
||||
"""
|
||||
使用 reportlab 生成支持中文的 PDF(纯 Python,无外部依赖)
|
||||
完整支持 markdown 格式:标题、列表、代码块、表格、引用等
|
||||
"""
|
||||
print(f"[DEBUG] _render_pdf_with_reportlab 被调用, md 长度: {len(md)}")
|
||||
bio = io.BytesIO()
|
||||
@@ -767,6 +774,7 @@ def _render_pdf_with_reportlab(md: str) -> bytes:
|
||||
textColor=colors.black,
|
||||
spaceAfter=12,
|
||||
spaceBefore=12,
|
||||
leading=22,
|
||||
)
|
||||
|
||||
heading2_style = ParagraphStyle(
|
||||
@@ -777,6 +785,18 @@ def _render_pdf_with_reportlab(md: str) -> bytes:
|
||||
textColor=colors.black,
|
||||
spaceAfter=10,
|
||||
spaceBefore=10,
|
||||
leading=18,
|
||||
)
|
||||
|
||||
heading3_style = ParagraphStyle(
|
||||
'ChineseHeading3',
|
||||
parent=styles['Heading3'],
|
||||
fontName=chinese_font,
|
||||
fontSize=12,
|
||||
textColor=colors.black,
|
||||
spaceAfter=8,
|
||||
spaceBefore=8,
|
||||
leading=16,
|
||||
)
|
||||
|
||||
normal_style = ParagraphStyle(
|
||||
@@ -787,69 +807,328 @@ def _render_pdf_with_reportlab(md: str) -> bytes:
|
||||
textColor=colors.black,
|
||||
spaceAfter=8,
|
||||
wordWrap='CJK', # 中文换行支持
|
||||
leading=14,
|
||||
)
|
||||
|
||||
code_style = ParagraphStyle(
|
||||
'ChineseCode',
|
||||
parent=styles['Code'],
|
||||
blockquote_style = ParagraphStyle(
|
||||
'ChineseBlockquote',
|
||||
parent=normal_style,
|
||||
fontName=chinese_font,
|
||||
leftIndent=10*mm,
|
||||
textColor=colors.Color(0.4, 0.4, 0.4),
|
||||
spaceAfter=8,
|
||||
backColor=colors.Color(0.95, 0.95, 0.95),
|
||||
)
|
||||
|
||||
code_block_style = ParagraphStyle(
|
||||
'ChineseCodeBlock',
|
||||
parent=normal_style,
|
||||
fontName='Courier',
|
||||
fontSize=9,
|
||||
fontSize=8,
|
||||
textColor=colors.black,
|
||||
backColor=colors.lightgrey,
|
||||
leftIndent=10,
|
||||
backColor=colors.Color(0.98, 0.98, 0.98),
|
||||
leftIndent=5*mm,
|
||||
rightIndent=5*mm,
|
||||
spaceAfter=10,
|
||||
spaceBefore=10,
|
||||
leading=12,
|
||||
)
|
||||
|
||||
# 解析 markdown
|
||||
lines = md.split('\n')
|
||||
i = 0
|
||||
in_code_block = False
|
||||
code_lang = ''
|
||||
code_lines = []
|
||||
|
||||
for line in lines:
|
||||
def process_inline_markdown(text: str) -> str:
|
||||
"""处理行内 markdown 格式:粗体、斜体、行内代码、链接"""
|
||||
# 使用占位符来保护我们生成的 HTML 标签
|
||||
placeholders = {}
|
||||
placeholder_idx = 0
|
||||
|
||||
def save_placeholder(content):
|
||||
nonlocal placeholder_idx
|
||||
key = f"__PLACEHOLDER_{placeholder_idx}__"
|
||||
placeholder_idx += 1
|
||||
placeholders[key] = content
|
||||
return key
|
||||
|
||||
# 先进行 HTML 转义(处理用户输入中的特殊字符)
|
||||
text = text.replace('<', '<').replace('>', '>')
|
||||
|
||||
# 处理行内代码(避免和其他标记冲突)
|
||||
def replace_code(match):
|
||||
code_text = match.group(1)
|
||||
# 代码内容不需要转义,直接使用
|
||||
html = f'<font face="Courier" color="#d63384">{code_text}</font>'
|
||||
return save_placeholder(html)
|
||||
text = re.sub(r'`([^`]+)`', replace_code, text)
|
||||
|
||||
# 处理粗体
|
||||
def replace_bold(match):
|
||||
content = match.group(1)
|
||||
html = f'<b>{content}</b>'
|
||||
return save_placeholder(html)
|
||||
text = re.sub(r'\*\*([^*]+)\*\*', replace_bold, text)
|
||||
|
||||
# 处理斜体
|
||||
def replace_italic(match):
|
||||
content = match.group(1)
|
||||
html = f'<i>{content}</i>'
|
||||
return save_placeholder(html)
|
||||
text = re.sub(r'\*([^*]+)\*', replace_italic, text)
|
||||
|
||||
# 处理链接 [text](url) - 使用 reportlab 的 link 标签创建可点击的超链接
|
||||
def replace_link(match):
|
||||
link_text = match.group(1)
|
||||
url = match.group(2)
|
||||
# 使用蓝色下划线样式,link href 属性使链接可点击
|
||||
html = f'<a href="{url}" color="blue"><u>{link_text}</u></a>'
|
||||
return save_placeholder(html)
|
||||
text = re.sub(r'\[([^\]]+)\]\(([^\)]+)\)', replace_link, text)
|
||||
|
||||
# 还原占位符为实际的 HTML 标签
|
||||
for key, value in placeholders.items():
|
||||
text = text.replace(key, value)
|
||||
|
||||
return text
|
||||
|
||||
def parse_table(table_lines: list) -> None:
|
||||
"""解析 markdown 表格并添加到 story"""
|
||||
if not table_lines:
|
||||
return
|
||||
|
||||
# 解析分隔行以确定列对齐方式
|
||||
separator_line = table_lines[1] if len(table_lines) > 1 else ""
|
||||
alignments = []
|
||||
if separator_line:
|
||||
parts = separator_line.split('|')[1:-1] # 去掉首尾空元素
|
||||
for part in parts:
|
||||
part = part.strip()
|
||||
if part.startswith(':') and part.endswith(':'):
|
||||
alignments.append('CENTER')
|
||||
elif part.endswith(':'):
|
||||
alignments.append('RIGHT')
|
||||
else:
|
||||
alignments.append('LEFT')
|
||||
|
||||
# 解析表头
|
||||
header_cells = [cell.strip() for cell in table_lines[0].split('|')[1:-1]]
|
||||
# 处理表头中的行内样式
|
||||
processed_headers = []
|
||||
for cell in header_cells:
|
||||
processed = process_inline_markdown(cell)
|
||||
processed_headers.append(Paragraph(processed, normal_style))
|
||||
|
||||
# 解析数据行(跳过分隔行)
|
||||
row_data = [processed_headers]
|
||||
for line in table_lines[2:]:
|
||||
if '|' in line:
|
||||
cells = [cell.strip() for cell in line.split('|')[1:-1]]
|
||||
# 处理每个单元格中的行内样式
|
||||
processed_cells = []
|
||||
for cell in cells:
|
||||
processed = process_inline_markdown(cell)
|
||||
processed_cells.append(Paragraph(processed, normal_style))
|
||||
row_data.append(processed_cells)
|
||||
|
||||
# 计算列宽(自动调整)
|
||||
col_widths = []
|
||||
num_cols = len(header_cells)
|
||||
if num_cols > 0:
|
||||
# 计算每列的最大宽度
|
||||
max_content_width = (A4[0] - 40*mm) / num_cols # 减去左右边距
|
||||
col_widths = [max_content_width] * num_cols
|
||||
|
||||
# 创建表格样式
|
||||
table_style = TableStyle([
|
||||
# 表头样式
|
||||
('BACKGROUND', (0, 0), (-1, 0), colors.Color(0.4, 0.6, 0.9)), # 蓝色背景
|
||||
('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
|
||||
('FONTNAME', (0, 0), (-1, 0), chinese_font),
|
||||
('FONTSIZE', (0, 0), (-1, 0), 10),
|
||||
('BOTTOMPADDING', (0, 0), (-1, 0), 8),
|
||||
('TOPPADDING', (0, 0), (-1, 0), 8),
|
||||
('LEFTPADDING', (0, 0), (-1, -1), 6),
|
||||
('RIGHTPADDING', (0, 0), (-1, -1), 6),
|
||||
# 表头边框
|
||||
('LINEABOVE', (0, 0), (-1, 0), 1, colors.black),
|
||||
('LINEBELOW', (0, 0), (-1, 0), 1, colors.black),
|
||||
('LINEBEFORE', (0, 0), (0, -1), 0.5, colors.grey),
|
||||
('LINEAFTER', (-1, 0), (-1, -1), 0.5, colors.grey),
|
||||
# 数据行样式
|
||||
('BACKGROUND', (0, 1), (-1, -1), colors.white),
|
||||
('FONTNAME', (0, 1), (-1, -1), chinese_font),
|
||||
('FONTSIZE', (0, 1), (-1, -1), 9),
|
||||
('TOPPADDING', (0, 1), (-1, -1), 6),
|
||||
('BOTTOMPADDING', (0, 1), (-1, -1), 6),
|
||||
# 斑马纹效果(交替行背景色)
|
||||
('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.Color(0.95, 0.95, 0.98)]),
|
||||
# 网格线
|
||||
('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
|
||||
# 设置对齐方式
|
||||
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
|
||||
])
|
||||
|
||||
# 应用列对齐
|
||||
for col_idx, align in enumerate(alignments):
|
||||
if align:
|
||||
table_style.add('ALIGN', (col_idx, 0), (col_idx, -1), align)
|
||||
|
||||
# 创建表格
|
||||
t = Table(row_data, colWidths=col_widths)
|
||||
t.setStyle(table_style)
|
||||
story.append(t)
|
||||
story.append(Spacer(1, 8*mm))
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
|
||||
# 代码块处理
|
||||
if line.strip().startswith('```'):
|
||||
if in_code_block:
|
||||
# 代码块结束
|
||||
code_text = '\n'.join(code_lines)
|
||||
story.append(Paragraph(code_text.replace('<', '<').replace('>', '>'), code_style))
|
||||
story.append(Spacer(1, 6*mm))
|
||||
# 使用 pre 标签保留格式
|
||||
escaped_code = code_text.replace('<', '<').replace('>', '>')
|
||||
story.append(Paragraph(f'<font face="Courier" size="8">{escaped_code}</font>', code_block_style))
|
||||
story.append(Spacer(1, 3*mm))
|
||||
code_lines = []
|
||||
in_code_block = False
|
||||
else:
|
||||
in_code_block = True
|
||||
code_lang = line.strip()[3:] # 获取语言标识
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if in_code_block:
|
||||
code_lines.append(line)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# 表格处理
|
||||
if '|' in line and i + 1 < len(lines) and '|' in lines[i + 1]:
|
||||
# 检查是否是分隔行
|
||||
next_line = lines[i + 1].strip()
|
||||
if re.match(r'^\|?\s*:?-+:?\s*(\|:?-+:?\s*)*\|?$', next_line):
|
||||
table_lines = [line, next_line] # 包含表头和分隔行
|
||||
i += 2
|
||||
# 收集所有表格数据行
|
||||
while i < len(lines) and '|' in lines[i] and not lines[i].strip().startswith('```'):
|
||||
table_lines.append(lines[i])
|
||||
i += 1
|
||||
parse_table(table_lines)
|
||||
continue
|
||||
|
||||
# 标题处理
|
||||
if line.startswith('# '):
|
||||
text = line[2:].strip()
|
||||
story.append(Paragraph(text, title_style))
|
||||
elif line.startswith('## '):
|
||||
text = line[3:].strip()
|
||||
story.append(Paragraph(text, heading2_style))
|
||||
if line.startswith('#### '):
|
||||
text = process_inline_markdown(line[5:].strip())
|
||||
h4_style = ParagraphStyle(
|
||||
'ChineseHeading4',
|
||||
parent=heading3_style,
|
||||
fontSize=11,
|
||||
)
|
||||
story.append(Paragraph(text, h4_style))
|
||||
elif line.startswith('### '):
|
||||
text = line[4:].strip()
|
||||
text = process_inline_markdown(line[4:].strip())
|
||||
story.append(Paragraph(text, heading3_style))
|
||||
elif line.startswith('## '):
|
||||
text = process_inline_markdown(line[3:].strip())
|
||||
story.append(Paragraph(text, heading2_style))
|
||||
# 列表处理
|
||||
elif line.startswith('# '):
|
||||
text = process_inline_markdown(line[2:].strip())
|
||||
story.append(Paragraph(text, title_style))
|
||||
|
||||
# 引用块处理
|
||||
elif line.strip().startswith('>'):
|
||||
quote_text = line.strip()[1:].strip()
|
||||
processed = process_inline_markdown(quote_text)
|
||||
story.append(Paragraph(processed, blockquote_style))
|
||||
|
||||
# 无序列表处理(包括任务列表)
|
||||
elif line.strip().startswith('- ') or line.strip().startswith('* '):
|
||||
text = line.strip()[2:]
|
||||
story.append(Paragraph(f'• {text}', normal_style))
|
||||
elif re.match(r'^\d+\.\s', line.strip()):
|
||||
text = re.sub(r'^\d+\.\s', '', line.strip())
|
||||
story.append(Paragraph(text, normal_style))
|
||||
content = line.strip()[2:].strip()
|
||||
|
||||
# 检查是否是任务列表 [ ] 或 [x]
|
||||
task_checked = None
|
||||
if content.startswith('[ ]'):
|
||||
# 未完成的任务
|
||||
task_text = content[2:].strip()
|
||||
task_checked = False
|
||||
elif content.startswith('[x]') or content.startswith('[X]'):
|
||||
# 已完成的任务
|
||||
task_text = content[2:].strip()
|
||||
task_checked = True
|
||||
else:
|
||||
# 普通列表项
|
||||
task_text = content
|
||||
task_checked = None
|
||||
|
||||
text = process_inline_markdown(task_text)
|
||||
|
||||
if task_checked is True:
|
||||
# 使用复选框符号表示已完成
|
||||
story.append(Paragraph(f'☑ {text}', normal_style))
|
||||
elif task_checked is False:
|
||||
# 使用复选框符号表示未完成
|
||||
story.append(Paragraph(f'☐ {text}', normal_style))
|
||||
else:
|
||||
# 普通列表项
|
||||
story.append(Paragraph(f'• {text}', normal_style))
|
||||
|
||||
# 有序列表处理
|
||||
elif re.match(r'^\s*\d+\.\s', line.strip()):
|
||||
match = re.match(r'^\s*(\d+)\.\s(.*)$', line.strip())
|
||||
if match:
|
||||
num = match.group(1)
|
||||
text = process_inline_markdown(match.group(2))
|
||||
story.append(Paragraph(f'{num}. {text}', normal_style))
|
||||
|
||||
# 分隔线
|
||||
elif line.strip() in ['---', '***', '___']:
|
||||
# 使用 HRFlowable 绘制水平分割线
|
||||
story.append(Spacer(1, 3*mm))
|
||||
story.append(HRFlowable(
|
||||
width="100%",
|
||||
thickness=0.5,
|
||||
lineCap='round',
|
||||
color=colors.grey,
|
||||
spaceBefore=1*mm,
|
||||
spaceAfter=3*mm,
|
||||
))
|
||||
|
||||
# 空行
|
||||
elif not line.strip():
|
||||
story.append(Spacer(1, 3*mm))
|
||||
# 普通段落
|
||||
story.append(Spacer(1, 2*mm))
|
||||
|
||||
# 普通段落(可能跨多行)
|
||||
elif line.strip():
|
||||
# 处理粗体和斜体
|
||||
text = line.strip()
|
||||
text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', text)
|
||||
text = re.sub(r'\*(.+?)\*', r'<i>\1</i>', text)
|
||||
text = re.sub(r'`(.+?)`', r'<font face="Courier">\1</font>', text)
|
||||
story.append(Paragraph(text, normal_style))
|
||||
# 收集连续的非空行作为段落
|
||||
paragraph_lines = [line.strip()]
|
||||
i += 1
|
||||
while i < len(lines):
|
||||
next_line = lines[i].strip()
|
||||
# 遇到空行、标题、列表等特殊行时停止
|
||||
if (not next_line or
|
||||
next_line.startswith('#') or
|
||||
next_line.startswith('>') or
|
||||
next_line.startswith('-') or
|
||||
next_line.startswith('*') or
|
||||
next_line.startswith('```') or
|
||||
re.match(r'^\d+\.\s', next_line) or
|
||||
(next_line.startswith('---') or next_line.startswith('***')) or
|
||||
('|' in next_line and i + 1 < len(lines) and '|' in lines[i + 1])):
|
||||
break
|
||||
paragraph_lines.append(next_line)
|
||||
i += 1
|
||||
|
||||
paragraph_text = ' '.join(paragraph_lines)
|
||||
processed = process_inline_markdown(paragraph_text)
|
||||
story.append(Paragraph(processed, normal_style))
|
||||
i -= 1 # 回退一行,因为外层会 i += 1
|
||||
|
||||
i += 1
|
||||
|
||||
# 生成 PDF
|
||||
doc.build(story)
|
||||
|
||||
Reference in New Issue
Block a user