FunMD_Convert/docling/app/tests/test_md_to_docx.py

import io
import os
import base64
from pathlib import Path
from zipfile import ZipFile

from app.services.docling_adapter import md_to_docx_bytes


def _make_png(tmpdir: Path) -> Path:
    # Minimal 1x1 PNG
    data = base64.b64decode(
        b"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgYAAAAAMAASsJTYQAAAAASUVORK5CYII="
    )
    p = tmpdir / "tiny.png"
    p.write_bytes(data)
    return p


def test_md_to_docx_renders_blocks_and_media(tmp_path: Path):
    png = _make_png(tmp_path)
    html = (
        f"<h1>标题</h1>"
        f"<p>内容</p>"
        f"<pre><code>print(\"hello\")\n</code></pre>"
        f"<img src='{png.as_posix()}'>"
        f"<table><thead><tr><th>A</th><th>B</th></tr></thead>"
        f"<tbody><tr><td>1</td><td>2</td></tr></tbody></table>"
    )

    docx = md_to_docx_bytes(
        html,
        toc=True,
        header_text="Left|Right",
        footer_text="Footer",
        filename_text="FileName",
        product_name="Product",
        document_name="DocName",
        product_version="1.0",
        document_version="2.0",
    )

    assert isinstance(docx, (bytes, bytearray)) and len(docx) > 0
    zf = ZipFile(io.BytesIO(docx))
    names = set(zf.namelist())
    assert any(n.startswith("word/") for n in names)
    # Document XML should contain core texts
    doc_xml = zf.read("word/document.xml").decode("utf-8")
    for tok in ["标题", "内容", "print(\"hello\")", "A", "B", "1", "2"]:
        assert tok in doc_xml
    # Media should be present for the image
    assert any(n.startswith("word/media/") for n in names)
Import project files 2026-01-07 17:18:26 +08:00			`import io`
			`import os`
			`import base64`
			`from pathlib import Path`
			`from zipfile import ZipFile`

			`from app.services.docling_adapter import md_to_docx_bytes`


			`def _make_png(tmpdir: Path) -> Path:`
			`# Minimal 1x1 PNG`
			`data = base64.b64decode(`
			`b"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgYAAAAAMAASsJTYQAAAAASUVORK5CYII="`
			`)`
			`p = tmpdir / "tiny.png"`
			`p.write_bytes(data)`
			`return p`


			`def test_md_to_docx_renders_blocks_and_media(tmp_path: Path):`
			`png = _make_png(tmp_path)`
			`html = (`
			`f"<h1>标题</h1>"`
			`f"<p>内容</p>"`
			`f"<pre><code>print(\"hello\")\n</code></pre>"`
			`f"<img src='{png.as_posix()}'>"`
			`f"<table><thead><tr><th>A</th><th>B</th></tr></thead>"`
			`f"<tbody><tr><td>1</td><td>2</td></tr></tbody></table>"`
			`)`

			`docx = md_to_docx_bytes(`
			`html,`
			`toc=True,`
			`header_text="Left\|Right",`
			`footer_text="Footer",`
			`filename_text="FileName",`
			`product_name="Product",`
			`document_name="DocName",`
			`product_version="1.0",`
			`document_version="2.0",`
			`)`

			`assert isinstance(docx, (bytes, bytearray)) and len(docx) > 0`
			`zf = ZipFile(io.BytesIO(docx))`
			`names = set(zf.namelist())`
			`assert any(n.startswith("word/") for n in names)`
			`# Document XML should contain core texts`
			`doc_xml = zf.read("word/document.xml").decode("utf-8")`
			`for tok in ["标题", "内容", "print(\"hello\")", "A", "B", "1", "2"]:`
			`assert tok in doc_xml`
			`# Media should be present for the image`
			`assert any(n.startswith("word/media/") for n in names)`