Files
FunMD_Convert/docling/app/tests/test_word2markdown_inline_images.py

52 lines
1.3 KiB
Python
Raw Permalink Normal View History

2026-01-07 17:18:26 +08:00
import unittest
from pathlib import Path
import base64
import tempfile
import sys
# ensure 'app' package is importable
try:
root = Path(__file__).resolve().parents[2]
p = str(root)
if p not in sys.path:
sys.path.insert(0, p)
except Exception:
pass
from docx import Document
from app.services.word2markdown import convert_any
def _tiny_png_bytes() -> bytes:
return base64.b64decode(
b"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgYAAAAAMAASsJTYQAAAAASUVORK5CYII="
)
class InlineImagesTest(unittest.TestCase):
def test_paragraph_image_order(self):
tmp = Path(tempfile.mkdtemp(prefix="w2m_inline_test_"))
img = tmp / "tiny.png"
img.write_bytes(_tiny_png_bytes())
docx = tmp / "sample.docx"
doc = Document()
doc.add_paragraph("前文A")
doc.add_picture(str(img)) # 图片单独段落
doc.add_paragraph("后文B")
doc.save(str(docx))
enc, md = convert_any(docx)
self.assertEqual(enc, "utf-8")
a_pos = md.find("前文A")
img_pos = md.find("![Image](data:")
b_pos = md.find("后文B")
# 顺序应为 A -> 图片 -> B
self.assertTrue(a_pos != -1 and img_pos != -1 and b_pos != -1)
self.assertTrue(a_pos < img_pos < b_pos)
if __name__ == "__main__":
unittest.main()