Import project files
This commit is contained in:
131
docling/tests/test_api_prd.py
Normal file
131
docling/tests/test_api_prd.py
Normal file
@@ -0,0 +1,131 @@
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from fastapi.testclient import TestClient
|
||||
import types
|
||||
|
||||
root = Path(__file__).resolve().parents[2] / "docling"
|
||||
sys.path.insert(0, str(root))
|
||||
dc = types.ModuleType('docling.document_converter')
|
||||
class _DC:
|
||||
def __init__(self, *a, **k):
|
||||
pass
|
||||
def convert(self, src):
|
||||
class R:
|
||||
class D:
|
||||
def export_to_markdown(self, image_mode=None):
|
||||
return ""
|
||||
def export_to_html(self):
|
||||
return ""
|
||||
def export_to_json(self):
|
||||
return "{}"
|
||||
def export_to_doctags(self):
|
||||
return "{}"
|
||||
document = D()
|
||||
return R()
|
||||
class _PF:
|
||||
def __init__(self, *a, **k):
|
||||
pass
|
||||
dc.DocumentConverter = _DC
|
||||
dc.PdfFormatOption = _PF
|
||||
sys.modules['docling.document_converter'] = dc
|
||||
bm = types.ModuleType('docling.datamodel.base_models')
|
||||
class _IF:
|
||||
PDF = 'pdf'
|
||||
bm.InputFormat = _IF
|
||||
sys.modules['docling.datamodel.base_models'] = bm
|
||||
pl = types.ModuleType('docling.pipeline.standard_pdf_pipeline')
|
||||
class _SP:
|
||||
def __init__(self, *a, **k):
|
||||
pass
|
||||
pl.StandardPdfPipeline = _SP
|
||||
sys.modules['docling.pipeline.standard_pdf_pipeline'] = pl
|
||||
po = types.ModuleType('docling.datamodel.pipeline_options')
|
||||
class _PPO:
|
||||
def __init__(self, *a, **k):
|
||||
pass
|
||||
po.PdfPipelineOptions = _PPO
|
||||
sys.modules['docling.datamodel.pipeline_options'] = po
|
||||
ct = types.ModuleType('docling_core.types.doc')
|
||||
class _IRM:
|
||||
PLACEHOLDER = 'placeholder'
|
||||
ct.ImageRefMode = _IRM
|
||||
sys.modules['docling_core.types.doc'] = ct
|
||||
da = types.ModuleType('app.services.docling_adapter')
|
||||
def _convert_source(src, export):
|
||||
return ("", "text/markdown")
|
||||
def _md2docx(md, **k):
|
||||
return b""
|
||||
def _md2pdf(md, *a, **k):
|
||||
return b""
|
||||
def _infer(source_url, upload_name):
|
||||
return "document"
|
||||
def _san(name):
|
||||
return name or "document"
|
||||
def _load():
|
||||
return {}
|
||||
def _save(m):
|
||||
return None
|
||||
da.convert_source = _convert_source
|
||||
da.md_to_docx_bytes = _md2docx
|
||||
da.md_to_pdf_bytes_with_renderer = _md2pdf
|
||||
da.infer_basename = _infer
|
||||
da.sanitize_filename = _san
|
||||
da.load_linkmap = _load
|
||||
da.save_linkmap = _save
|
||||
sys.modules['app.services.docling_adapter'] = da
|
||||
import app.server as server
|
||||
|
||||
class DummyMinio:
|
||||
def __init__(self):
|
||||
self.objs = []
|
||||
def put_object(self, bucket_name, object_name, data, length, content_type):
|
||||
self.objs.append((bucket_name, object_name, length, content_type))
|
||||
def get_presigned_url(self, method, bucket, obj, expires=None):
|
||||
return f"http://127.0.0.1:9000/{bucket}/{obj}"
|
||||
def presigned_get_object(self, bucket, obj, expires=None):
|
||||
return f"http://127.0.0.1:9000/{bucket}/{obj}"
|
||||
|
||||
PNG = (b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\nIDATx\x9cc\xf8\x0f\x00\x01\x01\x01\x00\x18\xdd\xdc\xa4\x00\x00\x00\x00IEND\xaeB`\x82")
|
||||
|
||||
def setup_module(module=None):
|
||||
server._minio_current = lambda: (DummyMinio(), "doctest", "http://127.0.0.1:9000", "assets")
|
||||
def fake_convert(src, export="markdown", engine=None):
|
||||
d = Path(tempfile.mkdtemp(prefix="artifacts_"))
|
||||
(d / "img.png").write_bytes(PNG)
|
||||
return ("utf-8", "A\n<!-- image -->\nB", str(d))
|
||||
server._converter_v2.convert = fake_convert
|
||||
server._extract_pdf_images = lambda pdf_path: [("png", PNG), ("png", PNG)]
|
||||
|
||||
import unittest
|
||||
|
||||
class TestApiConvert(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
setup_module()
|
||||
def test_api_convert_save_true_returns_md_url(self):
|
||||
app = server.app
|
||||
mc = server._minio_current()
|
||||
assert mc[1] == 'doctest'
|
||||
c = TestClient(app)
|
||||
files = {"file": ("管理端使用说明 (1).pdf", b"%PDF-1.4\n")}
|
||||
data = {"export": "markdown", "save": "true", "filename": "管理端使用说明 (1)"}
|
||||
r = c.post("/api/convert", files=files, data=data)
|
||||
j = r.json()
|
||||
self.assertEqual(j["code"], 0, str(j))
|
||||
self.assertTrue(j["data"]["name"].lower().endswith(".md"))
|
||||
self.assertTrue(j["data"]["minio_url"].lower().endswith(".md"))
|
||||
|
||||
def test_api_convert_save_false_returns_content_and_md_name(self):
|
||||
app = server.app
|
||||
mc = server._minio_current()
|
||||
assert mc[1] == 'doctest'
|
||||
c = TestClient(app)
|
||||
files = {"file": ("文档.pdf", b"%PDF-1.4\n")}
|
||||
data = {"export": "markdown", "save": "false", "filename": "文档"}
|
||||
r = c.post("/api/convert", files=files, data=data)
|
||||
j = r.json()
|
||||
self.assertEqual(j["code"], 0, str(j))
|
||||
self.assertTrue(j["data"]["name"].lower().endswith(".md"))
|
||||
self.assertIn("
|
||||
Reference in New Issue
Block a user