Files
FunMD_Convert/docling/app/tests/test_md_to_docx.py

54 lines
1.6 KiB
Python
Raw Normal View History

2026-01-07 17:18:26 +08:00
import io
import os
import base64
from pathlib import Path
from zipfile import ZipFile
from app.services.docling_adapter import md_to_docx_bytes
def _make_png(tmpdir: Path) -> Path:
# Minimal 1x1 PNG
data = base64.b64decode(
b"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgYAAAAAMAASsJTYQAAAAASUVORK5CYII="
)
p = tmpdir / "tiny.png"
p.write_bytes(data)
return p
def test_md_to_docx_renders_blocks_and_media(tmp_path: Path):
png = _make_png(tmp_path)
html = (
f"<h1>标题</h1>"
f"<p>内容</p>"
f"<pre><code>print(\"hello\")\n</code></pre>"
f"<img src='{png.as_posix()}'>"
f"<table><thead><tr><th>A</th><th>B</th></tr></thead>"
f"<tbody><tr><td>1</td><td>2</td></tr></tbody></table>"
)
docx = md_to_docx_bytes(
html,
toc=True,
header_text="Left|Right",
footer_text="Footer",
filename_text="FileName",
product_name="Product",
document_name="DocName",
product_version="1.0",
document_version="2.0",
)
assert isinstance(docx, (bytes, bytearray)) and len(docx) > 0
zf = ZipFile(io.BytesIO(docx))
names = set(zf.namelist())
assert any(n.startswith("word/") for n in names)
# Document XML should contain core texts
doc_xml = zf.read("word/document.xml").decode("utf-8")
for tok in ["标题", "内容", "print(\"hello\")", "A", "B", "1", "2"]:
assert tok in doc_xml
# Media should be present for the image
assert any(n.startswith("word/media/") for n in names)