Files
FunMD_Convert/docling/tests/test_api_prd.py
2026-01-07 17:18:26 +08:00

132 lines
4.6 KiB
Python

import os
import sys
import tempfile
from pathlib import Path
from fastapi.testclient import TestClient
import types
root = Path(__file__).resolve().parents[2] / "docling"
sys.path.insert(0, str(root))
dc = types.ModuleType('docling.document_converter')
class _DC:
def __init__(self, *a, **k):
pass
def convert(self, src):
class R:
class D:
def export_to_markdown(self, image_mode=None):
return ""
def export_to_html(self):
return ""
def export_to_json(self):
return "{}"
def export_to_doctags(self):
return "{}"
document = D()
return R()
class _PF:
def __init__(self, *a, **k):
pass
dc.DocumentConverter = _DC
dc.PdfFormatOption = _PF
sys.modules['docling.document_converter'] = dc
bm = types.ModuleType('docling.datamodel.base_models')
class _IF:
PDF = 'pdf'
bm.InputFormat = _IF
sys.modules['docling.datamodel.base_models'] = bm
pl = types.ModuleType('docling.pipeline.standard_pdf_pipeline')
class _SP:
def __init__(self, *a, **k):
pass
pl.StandardPdfPipeline = _SP
sys.modules['docling.pipeline.standard_pdf_pipeline'] = pl
po = types.ModuleType('docling.datamodel.pipeline_options')
class _PPO:
def __init__(self, *a, **k):
pass
po.PdfPipelineOptions = _PPO
sys.modules['docling.datamodel.pipeline_options'] = po
ct = types.ModuleType('docling_core.types.doc')
class _IRM:
PLACEHOLDER = 'placeholder'
ct.ImageRefMode = _IRM
sys.modules['docling_core.types.doc'] = ct
da = types.ModuleType('app.services.docling_adapter')
def _convert_source(src, export):
return ("", "text/markdown")
def _md2docx(md, **k):
return b""
def _md2pdf(md, *a, **k):
return b""
def _infer(source_url, upload_name):
return "document"
def _san(name):
return name or "document"
def _load():
return {}
def _save(m):
return None
da.convert_source = _convert_source
da.md_to_docx_bytes = _md2docx
da.md_to_pdf_bytes_with_renderer = _md2pdf
da.infer_basename = _infer
da.sanitize_filename = _san
da.load_linkmap = _load
da.save_linkmap = _save
sys.modules['app.services.docling_adapter'] = da
import app.server as server
class DummyMinio:
def __init__(self):
self.objs = []
def put_object(self, bucket_name, object_name, data, length, content_type):
self.objs.append((bucket_name, object_name, length, content_type))
def get_presigned_url(self, method, bucket, obj, expires=None):
return f"http://127.0.0.1:9000/{bucket}/{obj}"
def presigned_get_object(self, bucket, obj, expires=None):
return f"http://127.0.0.1:9000/{bucket}/{obj}"
PNG = (b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\nIDATx\x9cc\xf8\x0f\x00\x01\x01\x01\x00\x18\xdd\xdc\xa4\x00\x00\x00\x00IEND\xaeB`\x82")
def setup_module(module=None):
server._minio_current = lambda: (DummyMinio(), "doctest", "http://127.0.0.1:9000", "assets")
def fake_convert(src, export="markdown", engine=None):
d = Path(tempfile.mkdtemp(prefix="artifacts_"))
(d / "img.png").write_bytes(PNG)
return ("utf-8", "A\n<!-- image -->\nB", str(d))
server._converter_v2.convert = fake_convert
server._extract_pdf_images = lambda pdf_path: [("png", PNG), ("png", PNG)]
import unittest
class TestApiConvert(unittest.TestCase):
@classmethod
def setUpClass(cls):
setup_module()
def test_api_convert_save_true_returns_md_url(self):
app = server.app
mc = server._minio_current()
assert mc[1] == 'doctest'
c = TestClient(app)
files = {"file": ("管理端使用说明 (1).pdf", b"%PDF-1.4\n")}
data = {"export": "markdown", "save": "true", "filename": "管理端使用说明 (1)"}
r = c.post("/api/convert", files=files, data=data)
j = r.json()
self.assertEqual(j["code"], 0, str(j))
self.assertTrue(j["data"]["name"].lower().endswith(".md"))
self.assertTrue(j["data"]["minio_url"].lower().endswith(".md"))
def test_api_convert_save_false_returns_content_and_md_name(self):
app = server.app
mc = server._minio_current()
assert mc[1] == 'doctest'
c = TestClient(app)
files = {"file": ("文档.pdf", b"%PDF-1.4\n")}
data = {"export": "markdown", "save": "false", "filename": "文档"}
r = c.post("/api/convert", files=files, data=data)
j = r.json()
self.assertEqual(j["code"], 0, str(j))
self.assertTrue(j["data"]["name"].lower().endswith(".md"))
self.assertIn("![image](", j["data"]["content"])