import os import sys import tempfile from pathlib import Path from fastapi.testclient import TestClient import types root = Path(__file__).resolve().parents[2] / "docling" sys.path.insert(0, str(root)) dc = types.ModuleType('docling.document_converter') class _DC: def __init__(self, *a, **k): pass def convert(self, src): class R: class D: def export_to_markdown(self, image_mode=None): return "" def export_to_html(self): return "" def export_to_json(self): return "{}" def export_to_doctags(self): return "{}" document = D() return R() class _PF: def __init__(self, *a, **k): pass dc.DocumentConverter = _DC dc.PdfFormatOption = _PF sys.modules['docling.document_converter'] = dc bm = types.ModuleType('docling.datamodel.base_models') class _IF: PDF = 'pdf' bm.InputFormat = _IF sys.modules['docling.datamodel.base_models'] = bm pl = types.ModuleType('docling.pipeline.standard_pdf_pipeline') class _SP: def __init__(self, *a, **k): pass pl.StandardPdfPipeline = _SP sys.modules['docling.pipeline.standard_pdf_pipeline'] = pl po = types.ModuleType('docling.datamodel.pipeline_options') class _PPO: def __init__(self, *a, **k): pass po.PdfPipelineOptions = _PPO sys.modules['docling.datamodel.pipeline_options'] = po ct = types.ModuleType('docling_core.types.doc') class _IRM: PLACEHOLDER = 'placeholder' ct.ImageRefMode = _IRM sys.modules['docling_core.types.doc'] = ct da = types.ModuleType('app.services.docling_adapter') def _convert_source(src, export): return ("", "text/markdown") def _md2docx(md, **k): return b"" def _md2pdf(md, *a, **k): return b"" def _infer(source_url, upload_name): return "document" def _san(name): return name or "document" def _load(): return {} def _save(m): return None da.convert_source = _convert_source da.md_to_docx_bytes = _md2docx da.md_to_pdf_bytes_with_renderer = _md2pdf da.infer_basename = _infer da.sanitize_filename = _san da.load_linkmap = _load da.save_linkmap = _save sys.modules['app.services.docling_adapter'] = da import app.server as server class DummyMinio: def __init__(self): self.objs = [] def put_object(self, bucket_name, object_name, data, length, content_type): self.objs.append((bucket_name, object_name, length, content_type)) def get_presigned_url(self, method, bucket, obj, expires=None): return f"http://127.0.0.1:9000/{bucket}/{obj}" def presigned_get_object(self, bucket, obj, expires=None): return f"http://127.0.0.1:9000/{bucket}/{obj}" PNG = (b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\nIDATx\x9cc\xf8\x0f\x00\x01\x01\x01\x00\x18\xdd\xdc\xa4\x00\x00\x00\x00IEND\xaeB`\x82") def setup_module(module=None): server._minio_current = lambda: (DummyMinio(), "doctest", "http://127.0.0.1:9000", "assets") def fake_convert(src, export="markdown", engine=None): d = Path(tempfile.mkdtemp(prefix="artifacts_")) (d / "img.png").write_bytes(PNG) return ("utf-8", "A\n\nB", str(d)) server._converter_v2.convert = fake_convert server._extract_pdf_images = lambda pdf_path: [("png", PNG), ("png", PNG)] import unittest class TestApiConvert(unittest.TestCase): @classmethod def setUpClass(cls): setup_module() def test_api_convert_save_true_returns_md_url(self): app = server.app mc = server._minio_current() assert mc[1] == 'doctest' c = TestClient(app) files = {"file": ("管理端使用说明 (1).pdf", b"%PDF-1.4\n")} data = {"export": "markdown", "save": "true", "filename": "管理端使用说明 (1)"} r = c.post("/api/convert", files=files, data=data) j = r.json() self.assertEqual(j["code"], 0, str(j)) self.assertTrue(j["data"]["name"].lower().endswith(".md")) self.assertTrue(j["data"]["minio_url"].lower().endswith(".md")) def test_api_convert_save_false_returns_content_and_md_name(self): app = server.app mc = server._minio_current() assert mc[1] == 'doctest' c = TestClient(app) files = {"file": ("文档.pdf", b"%PDF-1.4\n")} data = {"export": "markdown", "save": "false", "filename": "文档"} r = c.post("/api/convert", files=files, data=data) j = r.json() self.assertEqual(j["code"], 0, str(j)) self.assertTrue(j["data"]["name"].lower().endswith(".md")) self.assertIn("![image](", j["data"]["content"])