Import project files
This commit is contained in:
185
docling/app/tests/test_batch_upload_endpoints.py
Normal file
185
docling/app/tests/test_batch_upload_endpoints.py
Normal file
@@ -0,0 +1,185 @@
|
||||
import io
|
||||
import os
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
import app.server as server
|
||||
|
||||
|
||||
class FakeMinio:
|
||||
def __init__(self):
|
||||
self.objs = {}
|
||||
|
||||
def put_object(self, bucket_name: str, object_name: str, data: io.BytesIO, length: int, content_type: str):
|
||||
self.objs[(bucket_name, object_name)] = data.read(length)
|
||||
|
||||
def get_presigned_url(self, method: str, bucket: str, obj: str, expires: int):
|
||||
return f"http://minio.test/presigned/{bucket}/{obj}"
|
||||
|
||||
def presigned_get_object(self, bucket: str, obj: str, expires: int):
|
||||
return f"http://minio.test/presigned/{bucket}/{obj}"
|
||||
|
||||
|
||||
def setup_module(module=None):
|
||||
server.RUNTIME_CONFIG["minio"].update({
|
||||
"endpoint": "127.0.0.1:9000",
|
||||
"public": "http://127.0.0.1:9000",
|
||||
"access": "ak",
|
||||
"secret": "sk",
|
||||
"bucket": "test",
|
||||
"secure": "false",
|
||||
"prefix": "assets",
|
||||
"store_final": "true",
|
||||
"public_read": "true",
|
||||
})
|
||||
|
||||
fake = FakeMinio()
|
||||
|
||||
def _cur_cfg(_cfg):
|
||||
return fake, "test", "http://127.0.0.1:9000", "assets"
|
||||
server.minio_current = _cur_cfg # type: ignore
|
||||
try:
|
||||
server._minio_current = lambda: _cur_cfg(None) # type: ignore
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def test_archive_stage_and_process(tmp_path: Path):
|
||||
app = server.app
|
||||
c = TestClient(app)
|
||||
|
||||
zpath = tmp_path / "pkg.zip"
|
||||
md_dir = tmp_path / "docs"
|
||||
img_dir = md_dir / "images"
|
||||
img_dir.mkdir(parents=True, exist_ok=True)
|
||||
(img_dir / "p.png").write_bytes(b"PNG")
|
||||
(md_dir / "a.md").write_text("", "utf-8")
|
||||
|
||||
with zipfile.ZipFile(str(zpath), "w") as zf:
|
||||
zf.write(str(md_dir / "a.md"), arcname="a.md")
|
||||
zf.write(str(img_dir / "p.png"), arcname="images/p.png")
|
||||
|
||||
with open(zpath, "rb") as fp:
|
||||
files = {"file": ("pkg.zip", fp.read())}
|
||||
r1 = c.post("/api/archive/stage", files=files)
|
||||
assert r1.status_code == 200
|
||||
j1 = r1.json()
|
||||
assert j1["code"] == 0 and j1["data"]["id"]
|
||||
sid = j1["data"]["id"]
|
||||
|
||||
r2 = c.post("/api/archive/process", data={"id": sid, "prefix": "assets", "versionId": "1001"})
|
||||
assert r2.status_code == 200
|
||||
j2 = r2.json()
|
||||
assert j2["code"] == 0
|
||||
assert j2["data"]["count"] >= 1
|
||||
assert "import" in j2["data"]
|
||||
|
||||
|
||||
def test_upload_list(tmp_path: Path):
|
||||
app = server.app
|
||||
c = TestClient(app)
|
||||
|
||||
root = tmp_path / "listcase"
|
||||
root.mkdir(parents=True, exist_ok=True)
|
||||
(root / "img.png").write_bytes(b"PNG")
|
||||
(root / "b.md").write_text("", "utf-8")
|
||||
|
||||
list_text = str(root / "b.md")
|
||||
lf = io.BytesIO(list_text.encode("utf-8"))
|
||||
|
||||
files = {"list_file": ("list.txt", lf.getvalue())}
|
||||
r = c.post("/api/upload-list", files=files, data={"prefix": "assets", "versionId": "1002"})
|
||||
assert r.status_code == 200
|
||||
j = r.json()
|
||||
assert j["code"] == 0
|
||||
assert j["data"]["count"] >= 1
|
||||
assert "import" in j["data"]
|
||||
|
||||
|
||||
def test_archive_process_html_conversion(tmp_path: Path):
|
||||
app = server.app
|
||||
c = TestClient(app)
|
||||
|
||||
zpath = tmp_path / "web.zip"
|
||||
root = tmp_path / "web"
|
||||
static = root / "static"
|
||||
static.mkdir(parents=True, exist_ok=True)
|
||||
(static / "pic.png").write_bytes(b"PNG")
|
||||
|
||||
(root / "index.html").write_text("<html><body><h1>T</h1><img src='static/pic.png'/></body></html>", "utf-8")
|
||||
pages = root / "pages"
|
||||
pages.mkdir(parents=True, exist_ok=True)
|
||||
(pages / "a.html").write_text("<img src='../static/pic.png'>", "utf-8")
|
||||
|
||||
with zipfile.ZipFile(str(zpath), "w") as zf:
|
||||
for p in root.rglob("*"):
|
||||
if p.is_file():
|
||||
zf.write(str(p), arcname=p.relative_to(root).as_posix())
|
||||
|
||||
with open(zpath, "rb") as fp:
|
||||
files = {"file": ("web.zip", fp.read())}
|
||||
r1 = c.post("/api/archive/stage", files=files)
|
||||
assert r1.status_code == 200
|
||||
sid = r1.json()["data"]["id"]
|
||||
|
||||
r2 = c.post("/api/archive/process", data={"id": sid, "prefix": "assets", "versionId": "1003"})
|
||||
assert r2.status_code == 200
|
||||
j = r2.json()
|
||||
assert j["code"] == 0
|
||||
|
||||
files_list = j["data"]["files"]
|
||||
names = {Path(str(f.get("source") or "")).name for f in files_list}
|
||||
assert "index.md" in names
|
||||
assert "a.md" in names
|
||||
for f in files_list:
|
||||
n = Path(str(f.get("source") or "")).name
|
||||
if n in {"index.md", "a.md"}:
|
||||
assert f.get("minio_url")
|
||||
assert str(f.get("object_name") or "").startswith("assets/rewritten/")
|
||||
|
||||
imp = j["data"]["import"]
|
||||
nodes = []
|
||||
def walk(children):
|
||||
for n in children:
|
||||
if n.get("type") == "FILE":
|
||||
nodes.append(n.get("name"))
|
||||
elif n.get("type") == "FOLDER":
|
||||
walk(n.get("children", []))
|
||||
walk(imp["tree"])
|
||||
assert "index" in nodes
|
||||
assert "a" in nodes
|
||||
|
||||
|
||||
def test_archive_process_html_abs_uppercase(tmp_path: Path):
|
||||
app = server.app
|
||||
c = TestClient(app)
|
||||
|
||||
zpath = tmp_path / "web2.zip"
|
||||
root = tmp_path / "web2"
|
||||
(root / "static").mkdir(parents=True, exist_ok=True)
|
||||
(root / "static" / "p.png").write_bytes(b"PNG")
|
||||
|
||||
(root / "INDEX.HTML").write_text("<img src='/static/p.png'>", "utf-8")
|
||||
(root / "pages").mkdir(parents=True, exist_ok=True)
|
||||
(root / "pages" / "A.HTM").write_text("<img src='/static/p.png'>", "utf-8")
|
||||
|
||||
with zipfile.ZipFile(str(zpath), "w") as zf:
|
||||
for p in root.rglob("*"):
|
||||
if p.is_file():
|
||||
zf.write(str(p), arcname=p.relative_to(root).as_posix())
|
||||
|
||||
with open(zpath, "rb") as fp:
|
||||
files = {"file": ("web2.zip", fp.read())}
|
||||
r1 = c.post("/api/archive/stage", files=files)
|
||||
assert r1.status_code == 200
|
||||
sid = r1.json()["data"]["id"]
|
||||
|
||||
r2 = c.post("/api/archive/process", data={"id": sid, "prefix": "assets", "versionId": "1004"})
|
||||
assert r2.status_code == 200
|
||||
j = r2.json()
|
||||
assert j["code"] == 0
|
||||
files_list = j["data"]["files"]
|
||||
names = {Path(str(f.get("source") or "")).name for f in files_list}
|
||||
assert "INDEX.md" in names
|
||||
assert "A.md" in names
|
||||
Reference in New Issue
Block a user