Import project files
This commit is contained in:
51
docling/app/tests/test_word2markdown_inline_images.py
Normal file
51
docling/app/tests/test_word2markdown_inline_images.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
import base64
|
||||
import tempfile
|
||||
import sys
|
||||
|
||||
# ensure 'app' package is importable
|
||||
try:
|
||||
root = Path(__file__).resolve().parents[2]
|
||||
p = str(root)
|
||||
if p not in sys.path:
|
||||
sys.path.insert(0, p)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
from docx import Document
|
||||
|
||||
from app.services.word2markdown import convert_any
|
||||
|
||||
|
||||
def _tiny_png_bytes() -> bytes:
|
||||
return base64.b64decode(
|
||||
b"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgYAAAAAMAASsJTYQAAAAASUVORK5CYII="
|
||||
)
|
||||
|
||||
|
||||
class InlineImagesTest(unittest.TestCase):
|
||||
def test_paragraph_image_order(self):
|
||||
tmp = Path(tempfile.mkdtemp(prefix="w2m_inline_test_"))
|
||||
img = tmp / "tiny.png"
|
||||
img.write_bytes(_tiny_png_bytes())
|
||||
|
||||
docx = tmp / "sample.docx"
|
||||
doc = Document()
|
||||
doc.add_paragraph("前文A")
|
||||
doc.add_picture(str(img)) # 图片单独段落
|
||||
doc.add_paragraph("后文B")
|
||||
doc.save(str(docx))
|
||||
|
||||
enc, md = convert_any(docx)
|
||||
self.assertEqual(enc, "utf-8")
|
||||
a_pos = md.find("前文A")
|
||||
img_pos = md.find("
|
||||
b_pos = md.find("后文B")
|
||||
# 顺序应为 A -> 图片 -> B
|
||||
self.assertTrue(a_pos != -1 and img_pos != -1 and b_pos != -1)
|
||||
self.assertTrue(a_pos < img_pos < b_pos)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user