Files
FunMD_Convert/docling/app/tests/run_edge_cases_debug.py
2026-01-07 17:18:26 +08:00

98 lines
3.4 KiB
Python

import io
import zipfile
from pathlib import Path
from fastapi.testclient import TestClient
import sys
from pathlib import Path as _Path
base = _Path(__file__).resolve().parents[2]
sys.path.insert(0, str(base))
sys.path.insert(0, str(base / "docling"))
import app.server as server
class FakeMinio:
def __init__(self):
self.objs = {}
def put_object(self, bucket_name: str, object_name: str, data: io.BytesIO, length: int, content_type: str):
self.objs[(bucket_name, object_name)] = data.read(length)
def get_presigned_url(self, method: str, bucket: str, obj: str, expires: int):
return f"http://minio.test/presigned/{bucket}/{obj}"
def presigned_get_object(self, bucket: str, obj: str, expires: int):
return f"http://minio.test/presigned/{bucket}/{obj}"
def setup():
server.RUNTIME_CONFIG["minio"].update({
"endpoint": "127.0.0.1:9000",
"public": "http://127.0.0.1:9000",
"access": "ak",
"secret": "sk",
"bucket": "test",
"secure": "false",
"prefix": "assets",
"store_final": "true",
"public_read": "true",
})
fake = FakeMinio()
def _cur():
return fake, "test", "http://127.0.0.1:9000", "assets"
server._minio_current = _cur # type: ignore
def run():
setup()
app = server.app
c = TestClient(app)
r = c.post("/api/archive/process", data={"id": "missing"})
print("invalid-id:", r.status_code, r.json())
tmp = Path("/tmp/run_edge_cases_debug")
tmp.mkdir(parents=True, exist_ok=True)
rar_path = tmp / "pkg.rar"
rar_path.write_bytes(b"RAR")
with open(rar_path, "rb") as fp:
files = {"file": ("pkg.rar", fp.read())}
r1 = c.post("/api/archive/stage", files=files)
sid = r1.json()["data"]["id"]
r2 = c.post("/api/archive/process", data={"id": sid})
print("rar-process:", r2.status_code, r2.json())
r3 = c.post("/api/archive/process", data={"id": sid})
print("rar-reprocess:", r3.status_code, r3.json())
root = tmp / "listcase2"
root.mkdir(parents=True, exist_ok=True)
(root / "img.png").write_bytes(b"PNG")
(root / "a.md").write_text("![](img.png)", "utf-8")
(root / "b.txt").write_text("![](img.png)", "utf-8")
lines = ["", "# comment", "http://example.com/x.md", str(root / "a.md"), str(root / "b.txt")]
data_bytes = "\n".join(lines).encode("utf-8")
files = {"list_file": ("list.txt", data_bytes)}
r4 = c.post("/api/upload-list", files=files, data={"prefix": "assets", "versionId": "1005"})
print("upload-list:", r4.status_code, r4.json())
zpath = tmp / "dup.zip"
base = tmp / "src"
sub = base / "sub"
sub.mkdir(parents=True, exist_ok=True)
(base / "a.md").write_text("![](img.png)", "utf-8")
(base / "img.png").write_bytes(b"PNG")
(sub / "a.md").write_text("![](../img.png)", "utf-8")
with zipfile.ZipFile(str(zpath), "w") as zf:
zf.write(str(base / "a.md"), arcname="a.md")
zf.write(str(base / "img.png"), arcname="img.png")
zf.write(str(sub / "a.md"), arcname="sub/a.md")
with open(zpath, "rb") as fp:
files = {"file": ("dup.zip", fp.read())}
r5 = c.post("/api/archive/stage", files=files)
sid2 = r5.json()["data"]["id"]
r6 = c.post("/api/archive/process", data={"id": sid2, "prefix": "assets", "versionId": "1006"})
print("archive-dup:", r6.status_code, r6.json())
if __name__ == "__main__":
run()