2026-01-07 17:18:26 +08:00
|
|
|
|
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request, Query
|
|
|
|
|
|
from fastapi.responses import Response, HTMLResponse, JSONResponse, FileResponse
|
|
|
|
|
|
from fastapi.staticfiles import StaticFiles
|
|
|
|
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
import tempfile
|
|
|
|
|
|
import os
|
|
|
|
|
|
import asyncio
|
|
|
|
|
|
from typing import Optional, List, Dict, Tuple
|
|
|
|
|
|
from datetime import timedelta
|
|
|
|
|
|
import mimetypes
|
|
|
|
|
|
from urllib.request import urlopen, Request
|
|
|
|
|
|
from urllib.error import HTTPError, URLError
|
|
|
|
|
|
from urllib.parse import urlsplit, urlunsplit, quote, unquote
|
|
|
|
|
|
import logging
|
|
|
|
|
|
import traceback
|
|
|
|
|
|
import time
|
|
|
|
|
|
import re
|
|
|
|
|
|
import io
|
|
|
|
|
|
import shutil
|
|
|
|
|
|
import uuid
|
|
|
|
|
|
import subprocess
|
|
|
|
|
|
import sys
|
|
|
|
|
|
import json
|
|
|
|
|
|
try:
|
|
|
|
|
|
from minio import Minio # type: ignore
|
|
|
|
|
|
import urllib3 # type: ignore
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
Minio = None
|
|
|
|
|
|
urllib3 = None # type: ignore
|
|
|
|
|
|
from pydantic import BaseModel
|
|
|
|
|
|
|
|
|
|
|
|
class ConvertResponse(BaseModel):
|
|
|
|
|
|
minio_url: Optional[str]
|
|
|
|
|
|
minio_presigned_url: Optional[str]
|
|
|
|
|
|
name: str
|
|
|
|
|
|
media_type: str
|
|
|
|
|
|
|
|
|
|
|
|
class MinioPresignResponse(BaseModel):
|
|
|
|
|
|
bucket: str
|
|
|
|
|
|
object: str
|
|
|
|
|
|
minio_url: Optional[str]
|
|
|
|
|
|
minio_presigned_url: Optional[str]
|
|
|
|
|
|
expires: int
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
import fitz # type: ignore
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
fitz = None # type: ignore
|
|
|
|
|
|
from app.services.docling_adapter import (
|
|
|
|
|
|
convert_source,
|
|
|
|
|
|
md_to_docx_bytes,
|
|
|
|
|
|
md_to_pdf_bytes_with_renderer,
|
|
|
|
|
|
infer_basename,
|
|
|
|
|
|
sanitize_filename,
|
|
|
|
|
|
load_linkmap,
|
|
|
|
|
|
save_linkmap,
|
|
|
|
|
|
)
|
|
|
|
|
|
from app.services.unified_converter import FormatConverter
|
|
|
|
|
|
from app.services.minio_utils import minio_current, join_prefix, presigned_read
|
2026-01-13 22:56:22 +08:00
|
|
|
|
from app.services.pdf_converter import (
|
|
|
|
|
|
word_to_pdf_bytes,
|
|
|
|
|
|
markdown_to_pdf_bytes,
|
|
|
|
|
|
markdown_file_to_pdf_bytes,
|
|
|
|
|
|
read_file_content,
|
|
|
|
|
|
)
|
2026-01-07 17:18:26 +08:00
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
@api Server Application
|
|
|
|
|
|
@description FastAPI server providing document conversion endpoints and MinIO integration
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
app = FastAPI()
|
|
|
|
|
|
app.add_middleware(
|
|
|
|
|
|
CORSMiddleware,
|
|
|
|
|
|
allow_origins=["*"],
|
|
|
|
|
|
allow_methods=["*"],
|
|
|
|
|
|
allow_headers=["*"],
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
_ui_dir = Path(__file__).resolve().parents[2] / "frontend" / "dist"
|
|
|
|
|
|
if _ui_dir.exists():
|
|
|
|
|
|
app.mount("/ui", StaticFiles(directory=str(_ui_dir), html=True), name="ui")
|
|
|
|
|
|
try:
|
|
|
|
|
|
assets_dir = _ui_dir / "assets"
|
|
|
|
|
|
if assets_dir.exists():
|
|
|
|
|
|
app.mount("/assets", StaticFiles(directory=str(assets_dir)), name="assets")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
try:
|
|
|
|
|
|
svg_path = _ui_dir / "vite.svg"
|
|
|
|
|
|
if svg_path.exists():
|
|
|
|
|
|
@app.get("/vite.svg")
|
|
|
|
|
|
def _vite_svg():
|
|
|
|
|
|
return FileResponse(str(svg_path), media_type="image/svg+xml")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/health")
|
|
|
|
|
|
def health():
|
|
|
|
|
|
"""
|
|
|
|
|
|
@function health
|
|
|
|
|
|
@description Health check endpoint
|
|
|
|
|
|
@return {"status": "ok"}
|
|
|
|
|
|
"""
|
|
|
|
|
|
return {"status": "ok"}
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/convert")
|
|
|
|
|
|
async def convert(
|
|
|
|
|
|
file: Optional[UploadFile] = File(None),
|
|
|
|
|
|
source_url: Optional[str] = Form(None),
|
|
|
|
|
|
export: str = Form("markdown"),
|
|
|
|
|
|
save: Optional[bool] = Form(False),
|
|
|
|
|
|
filename: Optional[str] = Form(None),
|
|
|
|
|
|
):
|
|
|
|
|
|
"""
|
|
|
|
|
|
@function convert
|
|
|
|
|
|
@description Convert various document formats to Markdown/HTML/JSON
|
|
|
|
|
|
@param file Uploaded file (optional)
|
|
|
|
|
|
@param source_url URL of the source document (optional)
|
|
|
|
|
|
@param export Target export format (default: markdown)
|
|
|
|
|
|
@param save Whether to save to MinIO (default: False)
|
|
|
|
|
|
@param filename Custom filename for the output
|
|
|
|
|
|
@return JSON response with conversion result or MinIO URL
|
|
|
|
|
|
"""
|
|
|
|
|
|
if (file is None and not source_url) or (file is not None and source_url):
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="provide exactly one of file or source_url")
|
|
|
|
|
|
export = _normalize_export(export)
|
|
|
|
|
|
if source_url:
|
|
|
|
|
|
enc, content, artifacts_dir = await asyncio.to_thread(_converter_v2.convert, source_url, export=export)
|
|
|
|
|
|
base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(source_url, None))
|
|
|
|
|
|
out_ext = _export_ext(export)
|
|
|
|
|
|
ct = _media_type(export)
|
|
|
|
|
|
if export.lower() == "markdown":
|
|
|
|
|
|
try:
|
|
|
|
|
|
client_rw, bucket_rw, public_rw, prefix_rw = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client_rw is not None and bucket_rw and public_rw:
|
|
|
|
|
|
base_dir = Path(artifacts_dir) if artifacts_dir else Path(tempfile.mkdtemp(prefix="md_assets_"))
|
|
|
|
|
|
new_text, _ms = _rewrite_md_assets_to_minio(
|
|
|
|
|
|
content,
|
|
|
|
|
|
base_dir,
|
|
|
|
|
|
client_rw,
|
|
|
|
|
|
bucket_rw,
|
|
|
|
|
|
public_rw,
|
|
|
|
|
|
prefix_rw,
|
|
|
|
|
|
search_root=(Path(artifacts_dir) if artifacts_dir else None),
|
|
|
|
|
|
)
|
|
|
|
|
|
content = new_text
|
|
|
|
|
|
try:
|
|
|
|
|
|
if artifacts_dir:
|
|
|
|
|
|
_bulk_upload_assets(Path(artifacts_dir), client_rw, bucket_rw, public_rw, prefix_rw)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is None or not bucket or not public_base:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="MinIO is not configured for save")
|
|
|
|
|
|
rc_store_final = str(RUNTIME_CONFIG.get("minio", {}).get("store_final") or "true").lower() in {"1","true","yes","on"}
|
|
|
|
|
|
if not rc_store_final:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="Saving to MinIO is disabled by configuration")
|
|
|
|
|
|
out_name = f"{base}{out_ext}"
|
|
|
|
|
|
obj = join_prefix(prefix, f"converted/{out_name}")
|
|
|
|
|
|
raw = content.encode(enc or "utf-8")
|
|
|
|
|
|
bio = io.BytesIO(raw)
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type=ct) # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
minio_url = f"{public_base}/{bucket}/{obj}"
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
minio_presigned_url = presigned_read(client, bucket, obj, exp)
|
|
|
|
|
|
resp = JSONResponse({
|
|
|
|
|
|
"minio_url": minio_url,
|
|
|
|
|
|
"minio_presigned_url": minio_presigned_url,
|
|
|
|
|
|
"name": out_name,
|
|
|
|
|
|
"export": export,
|
|
|
|
|
|
"media_type": ct
|
|
|
|
|
|
})
|
|
|
|
|
|
try:
|
|
|
|
|
|
if artifacts_dir:
|
|
|
|
|
|
shutil.rmtree(artifacts_dir, ignore_errors=True)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return resp
|
|
|
|
|
|
assert file is not None
|
|
|
|
|
|
suffix = ""
|
|
|
|
|
|
if file.filename and "." in file.filename:
|
|
|
|
|
|
suffix = "." + file.filename.rsplit(".", 1)[-1]
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
|
|
|
|
|
tmp.write(await file.read())
|
|
|
|
|
|
tmp_path = tmp.name
|
|
|
|
|
|
try:
|
|
|
|
|
|
enc, content, artifacts_dir = await asyncio.to_thread(_converter_v2.convert, tmp_path, export=export)
|
|
|
|
|
|
base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(None, file.filename))
|
|
|
|
|
|
out_ext = _export_ext(export)
|
|
|
|
|
|
ct = _media_type(export)
|
|
|
|
|
|
if export.lower() == "markdown":
|
|
|
|
|
|
try:
|
|
|
|
|
|
client_rw, bucket_rw, public_rw, prefix_rw = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client_rw is not None and bucket_rw and public_rw:
|
|
|
|
|
|
base_dir = Path(artifacts_dir) if artifacts_dir else Path(tempfile.mkdtemp(prefix="md_assets_"))
|
|
|
|
|
|
new_text, _ms = _rewrite_md_assets_to_minio(
|
|
|
|
|
|
content,
|
|
|
|
|
|
base_dir,
|
|
|
|
|
|
client_rw,
|
|
|
|
|
|
bucket_rw,
|
|
|
|
|
|
public_rw,
|
|
|
|
|
|
prefix_rw,
|
|
|
|
|
|
search_root=(Path(artifacts_dir) if artifacts_dir else None),
|
|
|
|
|
|
)
|
|
|
|
|
|
content = new_text
|
|
|
|
|
|
try:
|
|
|
|
|
|
if artifacts_dir:
|
|
|
|
|
|
_bulk_upload_assets(Path(artifacts_dir), client_rw, bucket_rw, public_rw, prefix_rw)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is None or not bucket or not public_base:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="MinIO is not configured for save")
|
|
|
|
|
|
rc_store_final = str(RUNTIME_CONFIG.get("minio", {}).get("store_final") or "true").lower() in {"1","true","yes","on"}
|
|
|
|
|
|
if not rc_store_final:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="Saving to MinIO is disabled by configuration")
|
|
|
|
|
|
out_name = f"{base}{out_ext}"
|
|
|
|
|
|
obj = join_prefix(prefix, f"converted/{out_name}")
|
|
|
|
|
|
raw = content.encode(enc or "utf-8")
|
|
|
|
|
|
bio = io.BytesIO(raw)
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type=ct) # type: ignore
|
|
|
|
|
|
minio_url = f"{public_base}/{bucket}/{obj}"
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
minio_presigned_url = presigned_read(client, bucket, obj, exp)
|
|
|
|
|
|
resp = JSONResponse({
|
|
|
|
|
|
"minio_url": minio_url,
|
|
|
|
|
|
"minio_presigned_url": minio_presigned_url,
|
|
|
|
|
|
"name": out_name,
|
|
|
|
|
|
"export": export,
|
|
|
|
|
|
"media_type": ct
|
|
|
|
|
|
})
|
|
|
|
|
|
try:
|
|
|
|
|
|
if artifacts_dir:
|
|
|
|
|
|
shutil.rmtree(artifacts_dir, ignore_errors=True)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return resp
|
|
|
|
|
|
finally:
|
|
|
|
|
|
try:
|
|
|
|
|
|
os.remove(tmp_path)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
profiles_dir = Path(__file__).parent / "configs"
|
|
|
|
|
|
profiles_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/")
|
|
|
|
|
|
def index():
|
|
|
|
|
|
return JSONResponse({"ok": True, "service": "docling-api", "version": "v2"})
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/@vite/client")
|
|
|
|
|
|
def vite_client_stub():
|
|
|
|
|
|
return JSONResponse({"ok": True})
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/refresh.js")
|
|
|
|
|
|
def refresh_js_stub():
|
|
|
|
|
|
return Response(content="window.initClient=function(){},window.addRefresh=function(){};", media_type="application/javascript")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
RUNTIME_CONFIG: Dict[str, Dict[str, Optional[str]]] = {
|
|
|
|
|
|
"minio": {
|
|
|
|
|
|
"endpoint": None,
|
|
|
|
|
|
"public": None,
|
|
|
|
|
|
"access": None,
|
|
|
|
|
|
"secret": None,
|
|
|
|
|
|
"bucket": None,
|
|
|
|
|
|
"secure": None,
|
|
|
|
|
|
"prefix": None,
|
|
|
|
|
|
"store_final": "true",
|
|
|
|
|
|
"public_read": "true",
|
|
|
|
|
|
},
|
|
|
|
|
|
"db": {
|
|
|
|
|
|
"webhook_url": None,
|
|
|
|
|
|
"token": None,
|
|
|
|
|
|
},
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def _normalize_export(export: str) -> str:
|
|
|
|
|
|
e = (export or "").strip().lower()
|
|
|
|
|
|
allowed = {"markdown", "html", "json", "doctags"}
|
|
|
|
|
|
if e not in allowed:
|
|
|
|
|
|
raise HTTPException(status_code=422, detail="unsupported export")
|
|
|
|
|
|
return e
|
|
|
|
|
|
|
|
|
|
|
|
def _normalize_engine(engine: Optional[str]) -> Optional[str]:
|
|
|
|
|
|
if engine is None:
|
|
|
|
|
|
return None
|
|
|
|
|
|
e = (engine or "").strip().lower()
|
|
|
|
|
|
allowed = {"docling", "word2markdown", "pandoc", "custom"}
|
|
|
|
|
|
if e not in allowed:
|
|
|
|
|
|
raise HTTPException(status_code=422, detail="unsupported engine")
|
|
|
|
|
|
return e
|
|
|
|
|
|
|
|
|
|
|
|
def _fix_garbled_name(name: str) -> str:
|
|
|
|
|
|
try:
|
|
|
|
|
|
s = name
|
|
|
|
|
|
t = s.strip()
|
|
|
|
|
|
# If pure ASCII, no fix needed
|
|
|
|
|
|
if all(ord(c) < 128 for c in t):
|
|
|
|
|
|
return name
|
|
|
|
|
|
# Try to reconstruct original bytes assuming CP437 (Zip default when UTF-8 flag not set)
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw = s.encode("cp437", errors="strict")
|
|
|
|
|
|
except UnicodeEncodeError:
|
|
|
|
|
|
# Not CP437 mojibake, keep original
|
|
|
|
|
|
return name
|
|
|
|
|
|
encs = [
|
|
|
|
|
|
"gb18030",
|
|
|
|
|
|
"gbk",
|
|
|
|
|
|
"cp936",
|
|
|
|
|
|
"utf-8",
|
|
|
|
|
|
"big5",
|
|
|
|
|
|
"cp950",
|
|
|
|
|
|
"shift_jis",
|
|
|
|
|
|
"cp932",
|
|
|
|
|
|
"cp949",
|
|
|
|
|
|
"euc-kr",
|
|
|
|
|
|
"euc-jp",
|
|
|
|
|
|
]
|
|
|
|
|
|
for e in encs:
|
|
|
|
|
|
try:
|
|
|
|
|
|
fixed = raw.decode(e)
|
|
|
|
|
|
if fixed:
|
|
|
|
|
|
return fixed
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
continue
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return name
|
|
|
|
|
|
|
|
|
|
|
|
def _safe_target(base: Path, name: str) -> Optional[Path]:
|
|
|
|
|
|
try:
|
|
|
|
|
|
n = name.replace("\\", "/").lstrip("/")
|
|
|
|
|
|
parts = [p for p in n.split("/") if p and p not in {".", ".."}]
|
|
|
|
|
|
tgt = base / "/".join(parts)
|
|
|
|
|
|
rp = tgt.resolve()
|
|
|
|
|
|
rb = base.resolve()
|
|
|
|
|
|
try:
|
|
|
|
|
|
rp.relative_to(rb)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return None
|
|
|
|
|
|
return rp
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def _zip_extract_safely(zf: object, dest: Path) -> None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
for zi in zf.infolist(): # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
name = str(getattr(zi, "filename", ""))
|
|
|
|
|
|
flag = int(getattr(zi, "flag_bits", 0))
|
|
|
|
|
|
use = name
|
|
|
|
|
|
if (flag & 0x800) == 0:
|
|
|
|
|
|
use = _fix_garbled_name(name)
|
|
|
|
|
|
target = _safe_target(dest, use)
|
|
|
|
|
|
if target is None:
|
|
|
|
|
|
continue
|
|
|
|
|
|
if hasattr(zi, "is_dir") and zi.is_dir(): # type: ignore
|
|
|
|
|
|
target.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
continue
|
|
|
|
|
|
target.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
with zf.open(zi, "r") as src: # type: ignore
|
|
|
|
|
|
data = src.read()
|
|
|
|
|
|
with open(target, "wb") as out:
|
|
|
|
|
|
out.write(data)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
continue
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
def _tar_extract_safely(tf: object, dest: Path) -> None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
for m in tf.getmembers(): # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
name = str(getattr(m, "name", ""))
|
|
|
|
|
|
use = _fix_garbled_name(name)
|
|
|
|
|
|
target = _safe_target(dest, use)
|
|
|
|
|
|
if target is None:
|
|
|
|
|
|
continue
|
|
|
|
|
|
if getattr(m, "isdir", lambda: False)():
|
|
|
|
|
|
target.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
continue
|
|
|
|
|
|
target.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
f = tf.extractfile(m) # type: ignore
|
|
|
|
|
|
if f is None:
|
|
|
|
|
|
continue
|
|
|
|
|
|
data = f.read()
|
|
|
|
|
|
with open(target, "wb") as out:
|
|
|
|
|
|
out.write(data)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
continue
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
def _minio_head_bucket(client: object, bucket: str) -> bool:
|
|
|
|
|
|
try:
|
|
|
|
|
|
if hasattr(client, "bucket_exists"):
|
|
|
|
|
|
try:
|
|
|
|
|
|
return bool(client.bucket_exists(bucket)) # type: ignore
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
try:
|
|
|
|
|
|
region = client._get_region(bucket) # type: ignore
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
region = "us-east-1"
|
|
|
|
|
|
client._url_open(method="HEAD", region=region, bucket_name=bucket) # type: ignore
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
try:
|
|
|
|
|
|
names = [getattr(b, "name", None) for b in client.list_buckets()] # type: ignore
|
|
|
|
|
|
return bucket in set(n for n in names if n)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def _minio_create_bucket(client: object, bucket: str) -> bool:
|
|
|
|
|
|
# Prefer SDK methods, fallback to low-level call
|
|
|
|
|
|
try:
|
|
|
|
|
|
if hasattr(client, "bucket_exists"):
|
|
|
|
|
|
try:
|
|
|
|
|
|
if client.bucket_exists(bucket): # type: ignore
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
if hasattr(client, "make_bucket"):
|
|
|
|
|
|
try:
|
|
|
|
|
|
client.make_bucket(bucket) # type: ignore
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
try:
|
|
|
|
|
|
region = client._get_region(bucket) # type: ignore
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
region = "us-east-1"
|
|
|
|
|
|
try:
|
|
|
|
|
|
client.make_bucket(bucket, location=region) # type: ignore
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
try:
|
|
|
|
|
|
try:
|
|
|
|
|
|
region = client._get_region(bucket) # type: ignore
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
region = "us-east-1"
|
|
|
|
|
|
client._url_open(method="PUT", region=region, bucket_name=bucket) # type: ignore
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception as ce:
|
|
|
|
|
|
if "BucketAlreadyOwnedByYou" in str(ce) or "BucketAlreadyExists" in str(ce):
|
|
|
|
|
|
return True
|
|
|
|
|
|
raise
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
raise e
|
|
|
|
|
|
def _minio_client(endpoint: str, access: str, secret: str, secure: bool):
|
|
|
|
|
|
if urllib3 is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
http = urllib3.PoolManager(timeout=urllib3.Timeout(connect=3.0, read=20.0))
|
|
|
|
|
|
return Minio(endpoint=endpoint, access_key=access, secret_key=secret, secure=secure, http_client=http) # type: ignore
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return Minio(endpoint=endpoint, access_key=access, secret_key=secret, secure=secure) # type: ignore
|
|
|
|
|
|
return Minio(endpoint=endpoint, access_key=access, secret_key=secret, secure=secure) # type: ignore
|
|
|
|
|
|
def _minio_time_hint(endpoint: str, secure: bool) -> Optional[str]:
|
|
|
|
|
|
try:
|
|
|
|
|
|
scheme = "https" if secure else "http"
|
|
|
|
|
|
r = urlopen(f"{scheme}://{endpoint}", timeout=3)
|
|
|
|
|
|
srv_date = r.headers.get("Date")
|
|
|
|
|
|
if not srv_date:
|
|
|
|
|
|
return None
|
|
|
|
|
|
from email.utils import parsedate_to_datetime
|
|
|
|
|
|
from datetime import datetime, timezone
|
|
|
|
|
|
dt = parsedate_to_datetime(srv_date)
|
|
|
|
|
|
now = datetime.now(timezone.utc)
|
|
|
|
|
|
diff = abs((now - dt).total_seconds())
|
|
|
|
|
|
return f"服务器时间与本机相差约 {int(diff)} 秒"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _db_notify(payload: Dict[str, object]):
|
|
|
|
|
|
try:
|
|
|
|
|
|
import requests # type: ignore
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return
|
|
|
|
|
|
url = (RUNTIME_CONFIG.get("db", {}).get("webhook_url") or "").strip()
|
|
|
|
|
|
if not url:
|
|
|
|
|
|
return
|
|
|
|
|
|
token = (RUNTIME_CONFIG.get("db", {}).get("token") or "")
|
|
|
|
|
|
headers = {"Content-Type": "application/json"}
|
|
|
|
|
|
if token:
|
|
|
|
|
|
headers["Authorization"] = f"Bearer {token}"
|
|
|
|
|
|
try:
|
|
|
|
|
|
requests.post(url, json=payload, headers=headers, timeout=5)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/config/minio")
|
|
|
|
|
|
async def set_minio_config(
|
|
|
|
|
|
endpoint: str = Form(...),
|
|
|
|
|
|
public: Optional[str] = Form(None),
|
|
|
|
|
|
access: str = Form(...),
|
|
|
|
|
|
secret: str = Form(...),
|
|
|
|
|
|
bucket: str = Form(...),
|
|
|
|
|
|
secure: Optional[str] = Form("false"),
|
|
|
|
|
|
prefix: Optional[str] = Form(None),
|
|
|
|
|
|
store_final: Optional[str] = Form("true"),
|
|
|
|
|
|
public_read: Optional[str] = Form("true"),
|
|
|
|
|
|
):
|
|
|
|
|
|
ep_raw = (endpoint or "").strip()
|
|
|
|
|
|
ep_host = ep_raw
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import urlsplit
|
|
|
|
|
|
u = urlsplit(ep_raw)
|
|
|
|
|
|
if u.scheme:
|
|
|
|
|
|
ep_host = (u.netloc or ep_raw).split("/")[0]
|
|
|
|
|
|
else:
|
|
|
|
|
|
ep_host = ep_raw.split("/")[0]
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
ep_host = ep_raw.split("/")[0]
|
|
|
|
|
|
# reject console port or console paths for endpoint
|
|
|
|
|
|
try:
|
|
|
|
|
|
if (":9001" in ep_host) or ("/browser" in ep_raw) or ("/minio" in ep_raw):
|
|
|
|
|
|
return {"ok": False, "error": "请使用 MinIO API 端口 9000(而非 9001 控制台)"}
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
pub_val = public
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import urlsplit
|
|
|
|
|
|
pu = urlsplit((public or "").strip())
|
|
|
|
|
|
if (pu.netloc.endswith(":9001") or "/browser" in (public or "") or "/minio" in (public or "")):
|
|
|
|
|
|
pub_val = None
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
if public and (":9001" in public or "/browser" in public or "/minio" in public):
|
|
|
|
|
|
pub_val = None
|
|
|
|
|
|
# ensure public has scheme
|
|
|
|
|
|
try:
|
|
|
|
|
|
if pub_val:
|
|
|
|
|
|
from urllib.parse import urlsplit
|
|
|
|
|
|
pu = urlsplit(pub_val.strip())
|
|
|
|
|
|
scheme = pu.scheme or ("https" if str(secure or "false").lower() in {"1","true","yes","on"} else "http")
|
|
|
|
|
|
host = pu.netloc or pu.path.split("/")[0]
|
|
|
|
|
|
pub_val = f"{scheme}://{host}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
try:
|
|
|
|
|
|
if pub_val:
|
|
|
|
|
|
host = pub_val.strip().split("/")[0]
|
|
|
|
|
|
scheme = "https" if str(secure or "false").lower() in {"1","true","yes","on"} else "http"
|
|
|
|
|
|
pub_val = f"{scheme}://{host}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
RUNTIME_CONFIG["minio"].update({
|
|
|
|
|
|
"endpoint": ep_host,
|
|
|
|
|
|
"public": pub_val,
|
|
|
|
|
|
"access": access,
|
|
|
|
|
|
"secret": secret,
|
|
|
|
|
|
"bucket": bucket,
|
|
|
|
|
|
"secure": secure,
|
|
|
|
|
|
"prefix": prefix,
|
|
|
|
|
|
"store_final": store_final,
|
|
|
|
|
|
"public_read": public_read,
|
|
|
|
|
|
})
|
|
|
|
|
|
client, bkt, pub, _ = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is None or not bkt or not pub:
|
|
|
|
|
|
return {"ok": False, "error": "MinIO config invalid"}
|
|
|
|
|
|
try:
|
|
|
|
|
|
pr = str(public_read or "true").lower() in {"1","true","yes","on"}
|
|
|
|
|
|
if pr:
|
|
|
|
|
|
policy = {
|
|
|
|
|
|
"Version": "2012-10-17",
|
|
|
|
|
|
"Statement": [
|
|
|
|
|
|
{"Effect": "Allow", "Principal": "*", "Action": ["s3:GetBucketLocation", "s3:ListBucket"], "Resource": [f"arn:aws:s3:::{bkt}"]},
|
|
|
|
|
|
{"Effect": "Allow", "Principal": "*", "Action": ["s3:GetObject"], "Resource": [f"arn:aws:s3:::{bkt}/*"]},
|
|
|
|
|
|
],
|
|
|
|
|
|
}
|
|
|
|
|
|
import json as _json
|
|
|
|
|
|
client.set_bucket_policy(bucket_name=bkt, policy=_json.dumps(policy)) # type: ignore
|
|
|
|
|
|
else:
|
|
|
|
|
|
try:
|
|
|
|
|
|
client.delete_bucket_policy(bkt) # type: ignore
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return {"ok": True}
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/config/minio/test")
|
|
|
|
|
|
async def test_minio_config(
|
|
|
|
|
|
endpoint: str = Form(...),
|
|
|
|
|
|
public: Optional[str] = Form(None),
|
|
|
|
|
|
access: str = Form(...),
|
|
|
|
|
|
secret: str = Form(...),
|
|
|
|
|
|
bucket: str = Form(...),
|
|
|
|
|
|
secure: Optional[str] = Form("false"),
|
|
|
|
|
|
create_if_missing: Optional[str] = Form("true"),
|
|
|
|
|
|
public_read: Optional[str] = Form("false"),
|
|
|
|
|
|
):
|
|
|
|
|
|
if Minio is None:
|
|
|
|
|
|
return {"ok": False, "connected": False, "bucket_exists": False, "error": "minio client not available"}
|
|
|
|
|
|
try:
|
|
|
|
|
|
sec = str(secure or "false").lower() in {"1","true","yes","on"}
|
|
|
|
|
|
ep_raw = (endpoint or "").strip()
|
|
|
|
|
|
ep_host = ep_raw
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import urlsplit
|
|
|
|
|
|
u = urlsplit(ep_raw)
|
|
|
|
|
|
if u.scheme:
|
|
|
|
|
|
ep_host = (u.netloc or ep_raw).split("/")[0]
|
|
|
|
|
|
else:
|
|
|
|
|
|
ep_host = ep_raw.split("/")[0]
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
ep_host = ep_raw.split("/")[0]
|
|
|
|
|
|
if ":9001" in ep_host or "/browser" in ep_raw or "/minio" in ep_raw:
|
|
|
|
|
|
return {"ok": False, "connected": False, "bucket_exists": False, "error": "请使用 MinIO API 端口 9000(而非 9001 控制台)"}
|
|
|
|
|
|
client = _minio_client(endpoint=ep_host, access=access, secret=secret, secure=sec)
|
|
|
|
|
|
# handshake fallback
|
|
|
|
|
|
try:
|
|
|
|
|
|
try:
|
|
|
|
|
|
client.list_buckets() # type: ignore
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
if sec and ("SSL" in str(e) or "HTTPSConnectionPool" in str(e) or "SSLError" in str(e)):
|
|
|
|
|
|
client = _minio_client(endpoint=ep_host, access=access, secret=secret, secure=False)
|
|
|
|
|
|
sec = False
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
exists = False
|
|
|
|
|
|
created = False
|
|
|
|
|
|
exists = _minio_head_bucket(client, bucket)
|
|
|
|
|
|
if not exists and str(create_if_missing or "true").lower() in {"1","true","yes","on"}:
|
|
|
|
|
|
if _minio_create_bucket(client, bucket):
|
|
|
|
|
|
exists = True
|
|
|
|
|
|
created = True
|
|
|
|
|
|
# 始终根据 public_read 应用/移除策略(即使桶已存在)
|
|
|
|
|
|
try:
|
|
|
|
|
|
import json as _json
|
|
|
|
|
|
if str(public_read or "false").lower() in {"1","true","yes","on"}:
|
|
|
|
|
|
policy = {
|
|
|
|
|
|
"Version": "2012-10-17",
|
|
|
|
|
|
"Statement": [
|
|
|
|
|
|
{"Effect": "Allow", "Principal": "*", "Action": ["s3:GetBucketLocation", "s3:ListBucket"], "Resource": [f"arn:aws:s3:::{bucket}"]},
|
|
|
|
|
|
{"Effect": "Allow", "Principal": "*", "Action": ["s3:GetObject"], "Resource": [f"arn:aws:s3:::{bucket}/*"]},
|
|
|
|
|
|
],
|
|
|
|
|
|
}
|
|
|
|
|
|
client.set_bucket_policy(bucket_name=bucket, policy=_json.dumps(policy)) # type: ignore
|
|
|
|
|
|
else:
|
|
|
|
|
|
try:
|
|
|
|
|
|
client.delete_bucket_policy(bucket) # type: ignore
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return {"ok": True, "connected": True, "bucket_exists": exists, "created": created, "hint": ("使用 HTTPS 访问 9000 端口可能失败,请确认启用 HTTPS 与证书配置匹配" if sec and (public or "").startswith("http://") else None)}
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
hint = None
|
|
|
|
|
|
if "RequestTimeTooSkewed" in str(e):
|
|
|
|
|
|
hint = _minio_time_hint(ep_host, sec)
|
|
|
|
|
|
return {"ok": False, "connected": False, "bucket_exists": False, "error": str(e), "hint": hint}
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/config/profile/list")
|
|
|
|
|
|
async def list_profiles():
|
|
|
|
|
|
names: List[str] = []
|
|
|
|
|
|
try:
|
|
|
|
|
|
for p in profiles_dir.rglob("*.json"):
|
|
|
|
|
|
try:
|
|
|
|
|
|
names.append(p.stem)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
continue
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return {"ok": True, "profiles": sorted(set(names))}
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/config/profile/activate")
|
|
|
|
|
|
async def activate_profile(name: str = Form(...)):
|
|
|
|
|
|
target = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
for p in profiles_dir.rglob("*.json"):
|
|
|
|
|
|
if p.stem.lower() == (name or "").strip().lower():
|
|
|
|
|
|
target = p
|
|
|
|
|
|
break
|
|
|
|
|
|
if target is None:
|
|
|
|
|
|
raise HTTPException(status_code=404, detail="profile not found")
|
|
|
|
|
|
active_path = profiles_dir / "active.json"
|
|
|
|
|
|
data = json.loads(target.read_text("utf-8"))
|
|
|
|
|
|
# 应用并覆盖到运行时配置
|
|
|
|
|
|
try:
|
|
|
|
|
|
minio_cfg = data.get("minio", {})
|
|
|
|
|
|
if isinstance(minio_cfg, dict) and minio_cfg:
|
|
|
|
|
|
sanitized = dict(minio_cfg)
|
|
|
|
|
|
try:
|
|
|
|
|
|
ep = str(sanitized.get("endpoint") or "").strip()
|
|
|
|
|
|
if ep and ":9001" in ep:
|
|
|
|
|
|
h = ep.split("/")[0]
|
|
|
|
|
|
if ":" in h:
|
|
|
|
|
|
parts = h.split(":")
|
|
|
|
|
|
sanitized["endpoint"] = f"{parts[0]}:9000"
|
|
|
|
|
|
else:
|
|
|
|
|
|
sanitized["endpoint"] = h
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
try:
|
|
|
|
|
|
pub = str(sanitized.get("public") or "").strip()
|
|
|
|
|
|
if pub and (":9001" in pub or "/browser" in pub or "/minio" in pub):
|
|
|
|
|
|
host = pub.split("/")[0]
|
|
|
|
|
|
sec = str(sanitized.get("secure") or RUNTIME_CONFIG.get("minio", {}).get("secure") or "false").lower() in {"1","true","yes","on"}
|
|
|
|
|
|
scheme = "https" if sec else "http"
|
|
|
|
|
|
if ":" in host:
|
|
|
|
|
|
base_host = host.split(":")[0]
|
|
|
|
|
|
sanitized["public"] = f"{scheme}://{base_host}:9000"
|
|
|
|
|
|
else:
|
|
|
|
|
|
sanitized["public"] = f"{scheme}://{host}:9000"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
RUNTIME_CONFIG["minio"].update(sanitized)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
try:
|
|
|
|
|
|
db_cfg = data.get("db", {})
|
|
|
|
|
|
if isinstance(db_cfg, dict) and db_cfg:
|
|
|
|
|
|
RUNTIME_CONFIG["db"].update(db_cfg)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
# 写入 active.json 以便后续观察者检测到变更
|
|
|
|
|
|
active_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), "utf-8")
|
|
|
|
|
|
return {"ok": True, "active": target.stem}
|
|
|
|
|
|
except HTTPException:
|
|
|
|
|
|
raise
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/system/time/check")
|
|
|
|
|
|
def system_time_check(
|
|
|
|
|
|
endpoint: Optional[str] = Query(None),
|
|
|
|
|
|
public: Optional[str] = Query(None),
|
|
|
|
|
|
secure: Optional[str] = Query(None),
|
|
|
|
|
|
):
|
|
|
|
|
|
try:
|
|
|
|
|
|
rc = RUNTIME_CONFIG.get("minio", {})
|
|
|
|
|
|
ep_raw = (endpoint or rc.get("endpoint") or "").strip()
|
|
|
|
|
|
pub_raw = (public or rc.get("public") or "").strip()
|
|
|
|
|
|
sec_flag = secure if secure is not None else (rc.get("secure") or "false")
|
|
|
|
|
|
sec = str(sec_flag or "false").lower() in {"1","true","yes","on"}
|
|
|
|
|
|
scheme = "https" if sec else "http"
|
|
|
|
|
|
# 解析 host(优先 public,其次 endpoint)
|
|
|
|
|
|
def _host(s: str) -> str:
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import urlsplit
|
|
|
|
|
|
u = urlsplit(s)
|
|
|
|
|
|
return (u.netloc or s).split("/")[0] if u.scheme else s.split("/")[0]
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return s.split("/")[0]
|
|
|
|
|
|
base_host = _host(pub_raw or ep_raw)
|
|
|
|
|
|
if not base_host:
|
|
|
|
|
|
from datetime import datetime, timezone
|
|
|
|
|
|
now = datetime.now(timezone.utc)
|
|
|
|
|
|
return {"ok": True, "server_time": None, "local_time": now.isoformat(), "diff_sec": None, "hint": "未配置 MinIO 端点"}
|
|
|
|
|
|
# 构造候选检测 URL(尽量使用 MinIO 健康端点以获取标准 Date 头)
|
|
|
|
|
|
base = f"{scheme}://{base_host}"
|
|
|
|
|
|
candidates = [
|
|
|
|
|
|
base,
|
|
|
|
|
|
base + "/minio/health/live",
|
|
|
|
|
|
base + "/minio/health/ready",
|
|
|
|
|
|
base + "/minio/health/version",
|
|
|
|
|
|
]
|
|
|
|
|
|
srv_date = None
|
|
|
|
|
|
for url in candidates:
|
|
|
|
|
|
try:
|
|
|
|
|
|
req = Request(url, method="HEAD")
|
|
|
|
|
|
r = urlopen(req, timeout=3)
|
|
|
|
|
|
d = r.headers.get("Date") or r.headers.get("date")
|
|
|
|
|
|
if d:
|
|
|
|
|
|
srv_date = d
|
|
|
|
|
|
break
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
try:
|
|
|
|
|
|
r = urlopen(url, timeout=3)
|
|
|
|
|
|
d = r.headers.get("Date") or r.headers.get("date")
|
|
|
|
|
|
if d:
|
|
|
|
|
|
srv_date = d
|
|
|
|
|
|
break
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
# 如果按当前 scheme 获取失败,尝试切换 scheme 再试一次
|
|
|
|
|
|
if not srv_date:
|
|
|
|
|
|
alt_scheme = "http" if scheme == "https" else "https"
|
|
|
|
|
|
alt_base = f"{alt_scheme}://{base_host}"
|
|
|
|
|
|
alt_candidates = [
|
|
|
|
|
|
alt_base,
|
|
|
|
|
|
alt_base + "/minio/health/live",
|
|
|
|
|
|
alt_base + "/minio/health/ready",
|
|
|
|
|
|
alt_base + "/minio/health/version",
|
|
|
|
|
|
]
|
|
|
|
|
|
for url in alt_candidates:
|
|
|
|
|
|
try:
|
|
|
|
|
|
req = Request(url, method="HEAD")
|
|
|
|
|
|
r = urlopen(req, timeout=3)
|
|
|
|
|
|
d = r.headers.get("Date") or r.headers.get("date")
|
|
|
|
|
|
if d:
|
|
|
|
|
|
srv_date = d
|
|
|
|
|
|
break
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
try:
|
|
|
|
|
|
r = urlopen(url, timeout=3)
|
|
|
|
|
|
d = r.headers.get("Date") or r.headers.get("date")
|
|
|
|
|
|
if d:
|
|
|
|
|
|
srv_date = d
|
|
|
|
|
|
break
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
from datetime import datetime, timezone
|
|
|
|
|
|
now = datetime.now(timezone.utc)
|
|
|
|
|
|
diff = None
|
|
|
|
|
|
if srv_date:
|
|
|
|
|
|
from email.utils import parsedate_to_datetime
|
|
|
|
|
|
try:
|
|
|
|
|
|
dt = parsedate_to_datetime(srv_date)
|
|
|
|
|
|
diff = int(abs((now - dt).total_seconds()))
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
diff = None
|
|
|
|
|
|
hint = _minio_time_hint(base_host, sec)
|
|
|
|
|
|
return {"ok": True, "server_time": srv_date, "local_time": now.isoformat(), "diff_sec": diff, "hint": hint}
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
return {"ok": False, "error": str(e)}
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/system/time/sync")
|
|
|
|
|
|
async def system_time_sync(method: Optional[str] = Form("auto"), ntp_server: Optional[str] = Form(None)):
|
|
|
|
|
|
cmds = []
|
|
|
|
|
|
servers = [s for s in [ntp_server, "time.apple.com", "pool.ntp.org"] if s]
|
|
|
|
|
|
for srv in servers:
|
|
|
|
|
|
if (method or "auto") in {"auto", "sntp"}:
|
|
|
|
|
|
cmds.append(["sntp", "-sS", srv])
|
|
|
|
|
|
if (method or "auto") in {"auto", "ntpdate"}:
|
|
|
|
|
|
cmds.append(["ntpdate", "-u", srv])
|
|
|
|
|
|
outputs = []
|
|
|
|
|
|
success = False
|
|
|
|
|
|
for cmd in cmds:
|
|
|
|
|
|
try:
|
|
|
|
|
|
p = subprocess.run(cmd, capture_output=True, text=True, timeout=8)
|
|
|
|
|
|
outputs.append({"cmd": " ".join(cmd), "code": p.returncode, "out": p.stdout, "err": p.stderr})
|
|
|
|
|
|
if p.returncode == 0:
|
|
|
|
|
|
success = True
|
|
|
|
|
|
break
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
outputs.append({"cmd": " ".join(cmd), "code": -1, "out": "", "err": str(e)})
|
|
|
|
|
|
if not success and sys.platform == "darwin":
|
|
|
|
|
|
elev_cmds = []
|
|
|
|
|
|
for srv in servers:
|
|
|
|
|
|
elev_cmds.append(["osascript", "-e", f'do shell script "sntp -sS {srv}" with administrator privileges'])
|
|
|
|
|
|
elev_cmds.append(["osascript", "-e", f'do shell script "ntpdate -u {srv}" with administrator privileges'])
|
|
|
|
|
|
elev_cmds.append(["osascript", "-e", f'do shell script "/usr/sbin/systemsetup -setnetworktimeserver {srv}" with administrator privileges'])
|
|
|
|
|
|
elev_cmds.append(["osascript", "-e", 'do shell script "/usr/sbin/systemsetup -setusingnetworktime on" with administrator privileges'])
|
|
|
|
|
|
for cmd in elev_cmds:
|
|
|
|
|
|
try:
|
|
|
|
|
|
p = subprocess.run(cmd, capture_output=True, text=True, timeout=12)
|
|
|
|
|
|
outputs.append({"cmd": " ".join(cmd), "code": p.returncode, "out": p.stdout, "err": p.stderr})
|
|
|
|
|
|
if p.returncode == 0:
|
|
|
|
|
|
success = True
|
|
|
|
|
|
break
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
outputs.append({"cmd": " ".join(cmd), "code": -1, "out": "", "err": str(e)})
|
|
|
|
|
|
chk = system_time_check()
|
|
|
|
|
|
return {"ok": success, "result": outputs, "check": chk}
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/api/system/time/check")
|
|
|
|
|
|
def system_time_check_api(
|
|
|
|
|
|
endpoint: Optional[str] = Query(None),
|
|
|
|
|
|
public: Optional[str] = Query(None),
|
|
|
|
|
|
secure: Optional[str] = Query(None),
|
|
|
|
|
|
):
|
|
|
|
|
|
return system_time_check(endpoint=endpoint, public=public, secure=secure)
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/api/system/time/sync")
|
|
|
|
|
|
async def system_time_sync_api(method: Optional[str] = Form("auto"), ntp_server: Optional[str] = Form(None)):
|
|
|
|
|
|
return await system_time_sync(method=method, ntp_server=ntp_server)
|
|
|
|
|
|
|
|
|
|
|
|
async def _auto_time_calibration():
|
|
|
|
|
|
try:
|
|
|
|
|
|
await asyncio.sleep(1.0)
|
|
|
|
|
|
chk = system_time_check()
|
|
|
|
|
|
try:
|
|
|
|
|
|
diff = int((chk or {}).get("diff_sec") or 0)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
diff = 0
|
|
|
|
|
|
if diff and diff > 120:
|
|
|
|
|
|
try:
|
|
|
|
|
|
await system_time_sync(method="auto", ntp_server=None)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/config/minio/buckets")
|
|
|
|
|
|
def list_minio_buckets(
|
|
|
|
|
|
endpoint: str,
|
|
|
|
|
|
access: str,
|
|
|
|
|
|
secret: str,
|
|
|
|
|
|
secure: Optional[str] = "false",
|
|
|
|
|
|
):
|
|
|
|
|
|
if Minio is None:
|
|
|
|
|
|
return {"ok": False, "error": "minio client not available", "buckets": []}
|
|
|
|
|
|
try:
|
|
|
|
|
|
sec = str(secure or "false").lower() in {"1","true","yes","on"}
|
|
|
|
|
|
client = _minio_client(endpoint=endpoint, access=access, secret=secret, secure=sec)
|
|
|
|
|
|
names = [b.name for b in client.list_buckets()]
|
|
|
|
|
|
return {"ok": True, "buckets": names}
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
return {"ok": False, "error": str(e), "buckets": []}
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/config/minio/create-bucket")
|
|
|
|
|
|
async def create_minio_bucket(
|
|
|
|
|
|
endpoint: str = Form(...),
|
|
|
|
|
|
access: str = Form(...),
|
|
|
|
|
|
secret: str = Form(...),
|
|
|
|
|
|
bucket: str = Form(...),
|
|
|
|
|
|
secure: Optional[str] = Form("false"),
|
|
|
|
|
|
public_read: Optional[str] = Form("false"),
|
|
|
|
|
|
):
|
|
|
|
|
|
if Minio is None:
|
|
|
|
|
|
return {"ok": False, "error": "minio client not available"}
|
|
|
|
|
|
try:
|
|
|
|
|
|
sec = str(secure or "false").lower() in {"1","true","yes","on"}
|
|
|
|
|
|
ep_raw = (endpoint or "").strip()
|
|
|
|
|
|
ep_host = ep_raw
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import urlsplit
|
|
|
|
|
|
u = urlsplit(ep_raw)
|
|
|
|
|
|
if u.scheme:
|
|
|
|
|
|
ep_host = (u.netloc or ep_raw).split("/")[0]
|
|
|
|
|
|
else:
|
|
|
|
|
|
ep_host = ep_raw.split("/")[0]
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
ep_host = ep_raw.split("/")[0]
|
|
|
|
|
|
if ":9001" in ep_host or "/browser" in ep_raw or "/minio" in ep_raw:
|
|
|
|
|
|
return {"ok": False, "error": "请使用 MinIO API 端口 9000(而非 9001 控制台)"}
|
|
|
|
|
|
client = _minio_client(endpoint=ep_host, access=access, secret=secret, secure=sec)
|
|
|
|
|
|
try:
|
|
|
|
|
|
try:
|
|
|
|
|
|
client.list_buckets() # type: ignore
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
if sec and ("SSL" in str(e) or "HTTPSConnectionPool" in str(e) or "SSLError" in str(e)):
|
|
|
|
|
|
client = _minio_client(endpoint=ep_host, access=access, secret=secret, secure=False)
|
|
|
|
|
|
sec = False
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
_minio_create_bucket(client, bucket)
|
|
|
|
|
|
try:
|
|
|
|
|
|
pr = str(public_read or "false").lower() in {"1","true","yes","on"}
|
|
|
|
|
|
if pr:
|
|
|
|
|
|
policy = {
|
|
|
|
|
|
"Version": "2012-10-17",
|
|
|
|
|
|
"Statement": [
|
|
|
|
|
|
{"Effect": "Allow", "Principal": {"AWS": ["*"]}, "Action": ["s3:GetBucketLocation", "s3:ListBucket"], "Resource": [f"arn:aws:s3:::{bucket}"]},
|
|
|
|
|
|
{"Effect": "Allow", "Principal": {"AWS": ["*"]}, "Action": ["s3:GetObject"], "Resource": [f"arn:aws:s3:::{bucket}/*"]},
|
|
|
|
|
|
],
|
|
|
|
|
|
}
|
|
|
|
|
|
import json as _json
|
|
|
|
|
|
client.set_bucket_policy(bucket, _json.dumps(policy)) # type: ignore
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return {"ok": True, "bucket_exists": True}
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
hint = None
|
|
|
|
|
|
if "RequestTimeTooSkewed" in str(e):
|
|
|
|
|
|
hint = _minio_time_hint(ep_host, sec)
|
|
|
|
|
|
return {"ok": False, "error": str(e), "hint": hint}
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/minio/presign", response_model=MinioPresignResponse)
|
|
|
|
|
|
async def minio_presign(
|
|
|
|
|
|
url: Optional[str] = Form(None),
|
|
|
|
|
|
object_name: Optional[str] = Form(None),
|
|
|
|
|
|
bucket: Optional[str] = Form(None),
|
|
|
|
|
|
expires: Optional[int] = Form(3600),
|
|
|
|
|
|
):
|
|
|
|
|
|
client, cfg_bucket, public_base, _ = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is None:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="MinIO 未配置")
|
|
|
|
|
|
obj = (object_name or "").strip()
|
|
|
|
|
|
bkt = (bucket or cfg_bucket or "").strip()
|
|
|
|
|
|
if (not obj) and url:
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import urlsplit, unquote
|
|
|
|
|
|
u = urlsplit((url or "").strip())
|
|
|
|
|
|
path = u.path or ""
|
|
|
|
|
|
parts = [p for p in path.split("/") if p]
|
|
|
|
|
|
if parts:
|
|
|
|
|
|
if not bkt:
|
|
|
|
|
|
bkt = parts[0]
|
|
|
|
|
|
obj = "/".join(parts[1:])
|
|
|
|
|
|
obj = unquote(obj)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
obj = obj
|
|
|
|
|
|
if not bkt or not obj:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="bucket 与 object_name/URL 不能为空")
|
|
|
|
|
|
exp = int(expires or 3600)
|
|
|
|
|
|
ps = presigned_read(client, bkt, obj, exp) if client is not None else None
|
|
|
|
|
|
pub_url = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
if public_base:
|
|
|
|
|
|
pub_url = f"{public_base}/{bkt}/{_quote(obj, safe='/')}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pub_url = None
|
|
|
|
|
|
return MinioPresignResponse(
|
|
|
|
|
|
bucket=bkt,
|
|
|
|
|
|
object=obj,
|
|
|
|
|
|
minio_url=pub_url,
|
|
|
|
|
|
minio_presigned_url=ps,
|
|
|
|
|
|
expires=exp,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/minio/object")
|
|
|
|
|
|
def minio_object(bucket: Optional[str] = None, object: str = ""):
|
|
|
|
|
|
client, cfg_bucket, public_base, _ = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is None:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="MinIO 未配置")
|
|
|
|
|
|
bkt = (bucket or cfg_bucket or "").strip()
|
|
|
|
|
|
obj_in = (object or "").strip()
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import unquote as _unquote
|
|
|
|
|
|
obj = _unquote(obj_in)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
obj = obj_in
|
|
|
|
|
|
if not bkt or not obj:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="bucket 与 object 不能为空")
|
|
|
|
|
|
ct = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
try:
|
|
|
|
|
|
st = client.stat_object(bucket_name=bkt, object_name=obj) # type: ignore
|
|
|
|
|
|
except TypeError:
|
|
|
|
|
|
st = client.stat_object(bkt, obj) # type: ignore
|
|
|
|
|
|
ct = getattr(st, "content_type", None)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
ct = None
|
|
|
|
|
|
data = b""
|
|
|
|
|
|
try:
|
|
|
|
|
|
try:
|
|
|
|
|
|
resp = client.get_object(bucket_name=bkt, object_name=obj) # type: ignore
|
|
|
|
|
|
except TypeError:
|
|
|
|
|
|
resp = client.get_object(bkt, obj) # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
data = resp.read() # type: ignore
|
|
|
|
|
|
finally:
|
|
|
|
|
|
try:
|
|
|
|
|
|
resp.close() # type: ignore
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
raise HTTPException(status_code=403, detail=str(e))
|
|
|
|
|
|
media = ct or detect_mime(obj, data)
|
|
|
|
|
|
headers = {"Content-Disposition": f"inline; filename*=UTF-8''" + quote(Path(obj).name)}
|
|
|
|
|
|
return Response(content=data, media_type=media, headers=headers)
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/config/db")
|
|
|
|
|
|
async def set_db_config(webhook_url: Optional[str] = Form(None), token: Optional[str] = Form(None)):
|
|
|
|
|
|
RUNTIME_CONFIG["db"].update({"webhook_url": webhook_url, "token": token})
|
|
|
|
|
|
return {"ok": True}
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/config")
|
|
|
|
|
|
def get_config_snapshot():
|
|
|
|
|
|
safe = {
|
|
|
|
|
|
"minio": {
|
|
|
|
|
|
k: ("***" if k == "secret" and v else v)
|
|
|
|
|
|
for k, v in RUNTIME_CONFIG.get("minio", {}).items()
|
|
|
|
|
|
},
|
|
|
|
|
|
"db": RUNTIME_CONFIG.get("db", {}),
|
|
|
|
|
|
}
|
|
|
|
|
|
return safe
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/config/profiles")
|
|
|
|
|
|
def list_profiles():
|
|
|
|
|
|
names = []
|
|
|
|
|
|
try:
|
|
|
|
|
|
for p in profiles_dir.glob("*.json"):
|
|
|
|
|
|
names.append(p.stem)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
names = []
|
|
|
|
|
|
return {"ok": True, "profiles": sorted(names)}
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/config/save_profile")
|
|
|
|
|
|
async def save_profile(name: str = Form(...)):
|
|
|
|
|
|
if not name.strip():
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="name required")
|
|
|
|
|
|
data = {
|
|
|
|
|
|
"minio": RUNTIME_CONFIG.get("minio", {}),
|
|
|
|
|
|
"db": RUNTIME_CONFIG.get("db", {}),
|
|
|
|
|
|
}
|
|
|
|
|
|
import json as _json
|
|
|
|
|
|
path = profiles_dir / f"{sanitize_filename(name)}.json"
|
|
|
|
|
|
try:
|
|
|
|
|
|
path.write_text(_json.dumps(data, ensure_ascii=False, indent=2), "utf-8")
|
|
|
|
|
|
return {"ok": True, "name": path.stem}
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/config/load_profile")
|
|
|
|
|
|
def load_profile(name: str):
|
|
|
|
|
|
import json as _json
|
|
|
|
|
|
path = profiles_dir / f"{sanitize_filename(name)}.json"
|
|
|
|
|
|
if not path.exists():
|
|
|
|
|
|
raise HTTPException(status_code=404, detail="profile not found")
|
|
|
|
|
|
try:
|
|
|
|
|
|
data = _json.loads(path.read_text("utf-8"))
|
|
|
|
|
|
m = data.get("minio", {})
|
|
|
|
|
|
d = data.get("db", {})
|
|
|
|
|
|
RUNTIME_CONFIG["minio"].update(m)
|
|
|
|
|
|
RUNTIME_CONFIG["db"].update(d)
|
|
|
|
|
|
client, bkt, pub, _ = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is None or not bkt or not pub:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="MinIO config invalid")
|
|
|
|
|
|
return {"ok": True, "config": data}
|
|
|
|
|
|
except HTTPException:
|
|
|
|
|
|
raise
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
# Auto-load DB config from app/configs without restart or page refresh
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
def _choose_default_config_file() -> Optional[Path]:
|
|
|
|
|
|
try:
|
|
|
|
|
|
candidates: List[Path] = []
|
|
|
|
|
|
for p in profiles_dir.rglob("*.json"):
|
|
|
|
|
|
candidates.append(p)
|
|
|
|
|
|
if not candidates:
|
|
|
|
|
|
return None
|
|
|
|
|
|
by_name = {x.stem.lower(): x for x in candidates}
|
|
|
|
|
|
for prefer in ("active", "default", "test"):
|
|
|
|
|
|
if prefer in by_name:
|
|
|
|
|
|
return by_name[prefer]
|
|
|
|
|
|
return sorted(candidates, key=lambda x: x.stat().st_mtime, reverse=True)[0]
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def _apply_configs_from_file(path: Path) -> None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
import json as _json
|
|
|
|
|
|
data = _json.loads(path.read_text("utf-8"))
|
|
|
|
|
|
db_cfg = data.get("db", {})
|
|
|
|
|
|
if isinstance(db_cfg, dict) and db_cfg:
|
|
|
|
|
|
RUNTIME_CONFIG["db"].update(db_cfg)
|
|
|
|
|
|
minio_cfg = data.get("minio", {})
|
|
|
|
|
|
if isinstance(minio_cfg, dict) and minio_cfg:
|
|
|
|
|
|
sanitized = dict(minio_cfg)
|
|
|
|
|
|
try:
|
|
|
|
|
|
ep = str(sanitized.get("endpoint") or "").strip()
|
|
|
|
|
|
if ep and ":9001" in ep:
|
|
|
|
|
|
h = ep.split("/")[0]
|
|
|
|
|
|
if ":" in h:
|
|
|
|
|
|
parts = h.split(":")
|
|
|
|
|
|
sanitized["endpoint"] = f"{parts[0]}:9000"
|
|
|
|
|
|
else:
|
|
|
|
|
|
sanitized["endpoint"] = h
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
try:
|
|
|
|
|
|
pub = str(sanitized.get("public") or "").strip()
|
|
|
|
|
|
if pub and (":9001" in pub or "/browser" in pub or "/minio" in pub):
|
|
|
|
|
|
host = pub.split("/")[0]
|
|
|
|
|
|
sec = str(sanitized.get("secure") or RUNTIME_CONFIG.get("minio", {}).get("secure") or "false").lower() in {"1","true","yes","on"}
|
|
|
|
|
|
scheme = "https" if sec else "http"
|
|
|
|
|
|
if ":" in host:
|
|
|
|
|
|
base_host = host.split(":")[0]
|
|
|
|
|
|
sanitized["public"] = f"{scheme}://{base_host}:9000"
|
|
|
|
|
|
else:
|
|
|
|
|
|
sanitized["public"] = f"{scheme}://{host}:9000"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
for k, v in sanitized.items():
|
|
|
|
|
|
try:
|
|
|
|
|
|
cur = RUNTIME_CONFIG["minio"].get(k)
|
|
|
|
|
|
if cur in (None, ""):
|
|
|
|
|
|
RUNTIME_CONFIG["minio"][k] = v
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
RUNTIME_CONFIG["minio"][k] = v
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
async def _watch_db_config_changes(interval_sec: float = 3.0) -> None:
|
|
|
|
|
|
last_path: Optional[Path] = _choose_default_config_file()
|
|
|
|
|
|
last_mtime: float = (last_path.stat().st_mtime if last_path and last_path.exists() else 0.0)
|
|
|
|
|
|
# Apply once at startup
|
|
|
|
|
|
if last_path:
|
|
|
|
|
|
_apply_configs_from_file(last_path)
|
|
|
|
|
|
while True:
|
|
|
|
|
|
try:
|
|
|
|
|
|
cur = _choose_default_config_file()
|
|
|
|
|
|
if cur and cur.exists():
|
|
|
|
|
|
mt = cur.stat().st_mtime
|
|
|
|
|
|
if cur != last_path or mt > last_mtime:
|
|
|
|
|
|
_apply_configs_from_file(cur)
|
|
|
|
|
|
last_path = cur
|
|
|
|
|
|
last_mtime = mt
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
await asyncio.sleep(interval_sec)
|
|
|
|
|
|
|
|
|
|
|
|
@app.on_event("startup")
|
|
|
|
|
|
async def _startup_autoload_configs():
|
|
|
|
|
|
try:
|
|
|
|
|
|
asyncio.create_task(_watch_db_config_changes(interval_sec=3.0))
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
try:
|
|
|
|
|
|
asyncio.create_task(_auto_time_calibration())
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/md/convert", response_model=ConvertResponse)
|
|
|
|
|
|
async def md_convert(
|
|
|
|
|
|
md_file: Optional[UploadFile] = File(None),
|
|
|
|
|
|
markdown_text: Optional[str] = Form(None),
|
|
|
|
|
|
markdown_url: Optional[str] = Form(None),
|
|
|
|
|
|
target: str = Form("docx"),
|
|
|
|
|
|
save: Optional[bool] = Form(False),
|
|
|
|
|
|
filename: Optional[str] = Form(None),
|
|
|
|
|
|
css_name: Optional[str] = Form(None),
|
|
|
|
|
|
css_text: Optional[str] = Form(None),
|
|
|
|
|
|
toc: Optional[bool] = Form(True),
|
|
|
|
|
|
header_text: Optional[str] = Form(None),
|
|
|
|
|
|
footer_text: Optional[str] = Form(None),
|
|
|
|
|
|
logo_url: Optional[str] = Form(None),
|
|
|
|
|
|
logo_file: Optional[UploadFile] = File(None),
|
|
|
|
|
|
cover_url: Optional[str] = Form(None),
|
|
|
|
|
|
cover_file: Optional[UploadFile] = File(None),
|
|
|
|
|
|
product_name: Optional[str] = Form(None),
|
|
|
|
|
|
document_name: Optional[str] = Form(None),
|
|
|
|
|
|
product_version: Optional[str] = Form(None),
|
|
|
|
|
|
document_version: Optional[str] = Form(None),
|
|
|
|
|
|
copyright_text: Optional[str] = Form(None),
|
|
|
|
|
|
):
|
|
|
|
|
|
"""
|
|
|
|
|
|
@function md_convert
|
|
|
|
|
|
@description Advanced Markdown conversion endpoint supporting custom styling, logos, and metadata
|
|
|
|
|
|
@param md_file Uploaded Markdown file (optional)
|
|
|
|
|
|
@param markdown_text Raw Markdown text (optional)
|
|
|
|
|
|
@param markdown_url URL to Markdown file (optional)
|
|
|
|
|
|
@param target Output format (docx/pdf)
|
|
|
|
|
|
@param save Save to MinIO
|
|
|
|
|
|
@param filename Output filename
|
|
|
|
|
|
@param css_name Predefined CSS profile name
|
|
|
|
|
|
@param css_text Custom CSS content
|
|
|
|
|
|
@param toc Include Table of Contents
|
|
|
|
|
|
@param header_text Custom header text
|
|
|
|
|
|
@param footer_text Custom footer text
|
|
|
|
|
|
@param logo_url URL for logo image
|
|
|
|
|
|
@param logo_file Uploaded logo file
|
|
|
|
|
|
@param cover_url URL for cover image
|
|
|
|
|
|
@param cover_file Uploaded cover file
|
|
|
|
|
|
@param product_name Product name for cover
|
|
|
|
|
|
@param document_name Document name for cover
|
|
|
|
|
|
@param product_version Product version for cover
|
|
|
|
|
|
@param document_version Document version for cover
|
|
|
|
|
|
@param copyright_text Copyright text
|
|
|
|
|
|
@return File download or JSON response
|
|
|
|
|
|
"""
|
|
|
|
|
|
logging.info(f"md_convert start target={target} save={save} filename={filename}")
|
|
|
|
|
|
provided = 0
|
|
|
|
|
|
if md_file is not None:
|
|
|
|
|
|
provided += 1
|
|
|
|
|
|
if markdown_text:
|
|
|
|
|
|
provided += 1
|
|
|
|
|
|
if markdown_url:
|
|
|
|
|
|
provided += 1
|
|
|
|
|
|
if provided != 1:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="provide exactly one of md_file, markdown_text, markdown_url")
|
|
|
|
|
|
if target.lower() not in {"docx", "pdf"}:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="target must be docx or pdf")
|
|
|
|
|
|
mappings: List[Dict[str, str]] = []
|
|
|
|
|
|
base_dir = Path(".").resolve()
|
|
|
|
|
|
if md_file is not None:
|
|
|
|
|
|
content = (await md_file.read()).decode("utf-8", errors="ignore")
|
|
|
|
|
|
base_dir = Path(md_file.filename or ".").resolve().parent if md_file and md_file.filename else Path(".")
|
|
|
|
|
|
base = sanitize_filename(filename) if filename else sanitize_filename(os.path.splitext(md_file.filename or "document")[0])
|
|
|
|
|
|
elif markdown_url:
|
|
|
|
|
|
src = markdown_url.strip()
|
|
|
|
|
|
try:
|
|
|
|
|
|
if src.lower().startswith("http"):
|
|
|
|
|
|
already_escaped = "%" in src
|
|
|
|
|
|
safe = src if already_escaped else _safe_http_url(src)
|
|
|
|
|
|
try:
|
|
|
|
|
|
with urlopen(safe, timeout=10) as r:
|
|
|
|
|
|
raw = r.read()
|
|
|
|
|
|
try:
|
|
|
|
|
|
logging.info(f"md_convert fetched markdown_url len={len(raw)} url={safe}")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except UnicodeEncodeError:
|
|
|
|
|
|
alt = quote(src, safe=':/?&=%#')
|
|
|
|
|
|
with urlopen(_safe_http_url(alt), timeout=10) as r:
|
|
|
|
|
|
raw = r.read()
|
|
|
|
|
|
try:
|
|
|
|
|
|
logging.info(f"md_convert fetched markdown_url(len={len(raw)}) with alt url")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except HTTPError as err:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail={"error": "fetch_failed", "status": err.code, "url": getattr(err, 'url', src)})
|
|
|
|
|
|
except URLError as err:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail={"error": "fetch_failed", "status": None, "url": src, "reason": str(getattr(err, 'reason', err))})
|
|
|
|
|
|
try:
|
|
|
|
|
|
content = raw.decode("utf-8")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
content = raw.decode("latin-1", errors="ignore")
|
|
|
|
|
|
else:
|
|
|
|
|
|
with open(src, "r", encoding="utf-8", errors="ignore") as f:
|
|
|
|
|
|
content = f.read()
|
|
|
|
|
|
base_dir = Path(src).resolve().parent
|
|
|
|
|
|
except HTTPException:
|
|
|
|
|
|
raise
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail={"error": "fetch_failed", "url": src, "message": str(e)})
|
|
|
|
|
|
base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(src, None))
|
|
|
|
|
|
else:
|
|
|
|
|
|
content = markdown_text or ""
|
|
|
|
|
|
base = sanitize_filename(filename) if filename else "document"
|
|
|
|
|
|
# Rewrite local assets to MinIO URLs if configured
|
|
|
|
|
|
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is not None and bucket and public_base and base_dir:
|
|
|
|
|
|
try:
|
|
|
|
|
|
content, mappings = _rewrite_md_assets_to_minio(content, base_dir, client, bucket, public_base, prefix)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
# Prepare common assets (logo, cover) for both DOCX and PDF
|
|
|
|
|
|
logo_src = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if logo_file is not None and getattr(logo_file, "filename", None):
|
|
|
|
|
|
lb = await logo_file.read()
|
|
|
|
|
|
mime = detect_image_mime(logo_file.filename, lb)
|
|
|
|
|
|
safe_logo = sanitize_filename(os.path.splitext(logo_file.filename or "logo")[0])
|
|
|
|
|
|
extl = "." + (logo_file.filename.rsplit(".", 1)[-1].lower() if "." in (logo_file.filename or "") else "png")
|
|
|
|
|
|
obj_logo = join_prefix(prefix, f"uploads/logo/{int(time.time())}-{safe_logo}{extl}")
|
|
|
|
|
|
bio = io.BytesIO(lb)
|
|
|
|
|
|
if client is not None and bucket and public_base:
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj_logo, data=bio, length=len(lb), content_type=mime) # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
enc = _quote(obj_logo, safe="/")
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
ps = presigned_read(client, bucket, obj_logo, exp) if client is not None else None
|
|
|
|
|
|
logo_src = ps or f"{public_base}/{bucket}/{enc}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
logo_src = f"{public_base}/{bucket}/{obj_logo}"
|
|
|
|
|
|
try:
|
|
|
|
|
|
if not save:
|
|
|
|
|
|
import base64 as _b64
|
|
|
|
|
|
logo_src = f"data:{mime};base64," + _b64.b64encode(lb).decode("ascii")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
elif logo_url:
|
|
|
|
|
|
u = logo_url.strip()
|
|
|
|
|
|
if u.lower().startswith("http://") or u.lower().startswith("https://"):
|
|
|
|
|
|
logo_src = u
|
|
|
|
|
|
elif u.startswith("/"):
|
|
|
|
|
|
p = Path(u)
|
|
|
|
|
|
try:
|
|
|
|
|
|
lb = p.read_bytes()
|
|
|
|
|
|
mime = detect_image_mime(p.name, lb)
|
|
|
|
|
|
obj_logo = join_prefix(prefix, f"uploads/logo/{int(time.time())}-{sanitize_filename(p.stem)}{p.suffix or '.png'}")
|
|
|
|
|
|
bio = io.BytesIO(lb)
|
|
|
|
|
|
if client is not None and bucket and public_base:
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj_logo, data=bio, length=len(lb), content_type=mime) # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
enc = _quote(obj_logo, safe="/")
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
ps = presigned_read(client, bucket, obj_logo, exp) if client is not None else None
|
|
|
|
|
|
logo_src = ps or f"{public_base}/{bucket}/{enc}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
logo_src = f"{public_base}/{bucket}/{obj_logo}"
|
|
|
|
|
|
try:
|
|
|
|
|
|
if not save:
|
|
|
|
|
|
import base64 as _b64
|
|
|
|
|
|
logo_src = f"data:{mime};base64," + _b64.b64encode(lb).decode("ascii")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
logo_src = p.resolve().as_uri()
|
|
|
|
|
|
else:
|
|
|
|
|
|
p = Path(u)
|
|
|
|
|
|
try:
|
|
|
|
|
|
lb = p.read_bytes()
|
|
|
|
|
|
mime = detect_image_mime(p.name, lb)
|
|
|
|
|
|
obj_logo = join_prefix(prefix, f"uploads/logo/{int(time.time())}-{sanitize_filename(p.stem)}{p.suffix or '.png'}")
|
|
|
|
|
|
bio = io.BytesIO(lb)
|
|
|
|
|
|
if client is not None and bucket and public_base:
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj_logo, data=bio, length=len(lb), content_type=mime) # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
enc = _quote(obj_logo, safe="/")
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
ps = presigned_read(client, bucket, obj_logo, exp) if client is not None else None
|
|
|
|
|
|
logo_src = ps or f"{public_base}/{bucket}/{enc}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
logo_src = f"{public_base}/{bucket}/{obj_logo}"
|
|
|
|
|
|
try:
|
|
|
|
|
|
if not save:
|
|
|
|
|
|
import base64 as _b64
|
|
|
|
|
|
logo_src = f"data:{mime};base64," + _b64.b64encode(lb).decode("ascii")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
logo_src = p.resolve().as_uri()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
logo_src = None
|
|
|
|
|
|
|
|
|
|
|
|
cover_src = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
limit = 2 * 1024 * 1024
|
|
|
|
|
|
if cover_file is not None and getattr(cover_file, "filename", None):
|
|
|
|
|
|
cb = await cover_file.read()
|
|
|
|
|
|
if len(cb) > limit:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="cover image exceeds 2MB limit")
|
|
|
|
|
|
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
mime = detect_image_mime(cover_file.filename, cb)
|
|
|
|
|
|
safe_cov = sanitize_filename(os.path.splitext(cover_file.filename or "cover")[0])
|
|
|
|
|
|
extc = "." + (cover_file.filename.rsplit(".", 1)[-1].lower() if "." in (cover_file.filename or "") else "png")
|
|
|
|
|
|
obj_cov = join_prefix(prefix, f"uploads/cover/{int(time.time())}-{safe_cov}{extc}")
|
|
|
|
|
|
bio = io.BytesIO(cb)
|
|
|
|
|
|
if client is not None and bucket and public_base:
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj_cov, data=bio, length=len(cb), content_type=mime) # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
enc = _quote(obj_cov, safe="/")
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
ps = presigned_read(client, bucket, obj_cov, exp) if client is not None else None
|
|
|
|
|
|
cover_src = ps or f"{public_base}/{bucket}/{enc}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
cover_src = f"{public_base}/{bucket}/{obj_cov}"
|
|
|
|
|
|
try:
|
|
|
|
|
|
if not save:
|
|
|
|
|
|
import base64 as _b64
|
|
|
|
|
|
cover_src = f"data:{mime};base64," + _b64.b64encode(cb).decode("ascii")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
elif cover_url:
|
|
|
|
|
|
cu = cover_url.strip()
|
|
|
|
|
|
if cu.lower().startswith("http://") or cu.lower().startswith("https://"):
|
|
|
|
|
|
cover_src = cu
|
|
|
|
|
|
else:
|
|
|
|
|
|
p = Path(cu)
|
|
|
|
|
|
rb = p.read_bytes()
|
|
|
|
|
|
if len(rb) > limit:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="cover image exceeds 2MB limit")
|
|
|
|
|
|
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
mime = detect_image_mime(cu, rb)
|
|
|
|
|
|
obj_cov = join_prefix(prefix, f"uploads/cover/{int(time.time())}-{sanitize_filename(p.stem)}{p.suffix or '.png'}")
|
|
|
|
|
|
bio = io.BytesIO(rb)
|
|
|
|
|
|
if client is not None and bucket and public_base:
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj_cov, data=bio, length=len(rb), content_type=mime) # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
enc = _quote(obj_cov, safe="/")
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
ps = presigned_read(client, bucket, obj_cov, exp) if client is not None else None
|
|
|
|
|
|
cover_src = ps or f"{public_base}/{bucket}/{enc}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
cover_src = f"{public_base}/{bucket}/{obj_cov}"
|
|
|
|
|
|
try:
|
|
|
|
|
|
if not save:
|
|
|
|
|
|
import base64 as _b64
|
|
|
|
|
|
cover_src = f"data:{mime};base64," + _b64.b64encode(rb).decode("ascii")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except HTTPException:
|
|
|
|
|
|
raise
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
cover_src = None
|
|
|
|
|
|
logging.info(f"md_convert assets prepared logo_src={bool(logo_src)} cover_src={bool(cover_src)} css_name={css_name} css_text_len={(len(css_text) if css_text else 0)}")
|
|
|
|
|
|
if target.lower() == "docx":
|
|
|
|
|
|
data = md_to_docx_bytes(
|
|
|
|
|
|
content,
|
|
|
|
|
|
toc=bool(toc),
|
|
|
|
|
|
header_text=header_text,
|
|
|
|
|
|
footer_text=footer_text,
|
|
|
|
|
|
logo_url=logo_src or logo_url,
|
|
|
|
|
|
copyright_text=copyright_text,
|
|
|
|
|
|
filename_text=base,
|
|
|
|
|
|
cover_src=cover_src,
|
|
|
|
|
|
product_name=product_name,
|
|
|
|
|
|
document_name=document_name,
|
|
|
|
|
|
product_version=product_version,
|
|
|
|
|
|
document_version=document_version,
|
|
|
|
|
|
)
|
|
|
|
|
|
media = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
|
|
|
|
ext = ".docx"
|
|
|
|
|
|
# Upload final docx to MinIO
|
|
|
|
|
|
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
minio_url = None
|
|
|
|
|
|
minio_presigned_url = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
rc_store_final = str(RUNTIME_CONFIG.get("minio", {}).get("store_final") or "true").lower() in {"1","true","yes","on"}
|
|
|
|
|
|
if client is not None and bucket and public_base and rc_store_final:
|
|
|
|
|
|
out_name = f"{base}{ext}"
|
|
|
|
|
|
obj = f"{(prefix or '').strip('/')}/converted/{out_name}".lstrip("/")
|
|
|
|
|
|
bio = io.BytesIO(data)
|
|
|
|
|
|
ct = media or "application/octet-stream"
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(data), content_type=ct) # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
minio_url = f"{public_base}/{bucket}/{obj}"
|
|
|
|
|
|
try:
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
minio_presigned_url = presigned_read(client, bucket, obj, exp) if client is not None else None
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
minio_presigned_url = None
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
minio_url = None
|
|
|
|
|
|
logging.info(f"md_convert done docx name={base}{ext} size={len(data)}")
|
|
|
|
|
|
_db_notify({
|
|
|
|
|
|
"type": "md_convert",
|
|
|
|
|
|
"base": base,
|
|
|
|
|
|
"target": target.lower(),
|
|
|
|
|
|
"local_url": None,
|
|
|
|
|
|
"minio_url": minio_url,
|
|
|
|
|
|
"minio_presigned_url": minio_presigned_url,
|
|
|
|
|
|
"mappings": mappings,
|
|
|
|
|
|
"time": int(time.time())
|
|
|
|
|
|
})
|
|
|
|
|
|
return ConvertResponse(
|
|
|
|
|
|
minio_url=minio_url,
|
|
|
|
|
|
minio_presigned_url=minio_presigned_url,
|
|
|
|
|
|
name=f"{base}{ext}",
|
|
|
|
|
|
media_type=media,
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
use_css_name = css_name if css_name else ("default" if not css_text else None)
|
|
|
|
|
|
data = md_to_pdf_bytes_with_renderer(
|
|
|
|
|
|
content,
|
|
|
|
|
|
"weasyprint",
|
|
|
|
|
|
css_name=use_css_name,
|
|
|
|
|
|
css_text=css_text,
|
|
|
|
|
|
toc=bool(toc),
|
|
|
|
|
|
header_text=header_text,
|
|
|
|
|
|
footer_text=footer_text,
|
|
|
|
|
|
logo_url=logo_src or logo_url,
|
|
|
|
|
|
copyright_text=copyright_text,
|
|
|
|
|
|
filename_text=base,
|
|
|
|
|
|
cover_src=cover_src,
|
|
|
|
|
|
product_name=product_name,
|
|
|
|
|
|
document_name=document_name,
|
|
|
|
|
|
product_version=product_version,
|
|
|
|
|
|
document_version=document_version,
|
|
|
|
|
|
)
|
|
|
|
|
|
media = "application/pdf"
|
|
|
|
|
|
ext = ".pdf"
|
|
|
|
|
|
minio_url = None
|
|
|
|
|
|
minio_presigned_url = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
rc_store_final = str(RUNTIME_CONFIG.get("minio", {}).get("store_final") or "true").lower() in {"1","true","yes","on"}
|
|
|
|
|
|
if client is not None and bucket and public_base and rc_store_final:
|
|
|
|
|
|
out_name = f"{base}{ext}"
|
|
|
|
|
|
obj = f"{(prefix or '').strip('/')}/converted/{out_name}".lstrip("/")
|
|
|
|
|
|
bio = io.BytesIO(data)
|
|
|
|
|
|
ct = media or "application/octet-stream"
|
|
|
|
|
|
try:
|
|
|
|
|
|
if ct.startswith("text/") and "charset" not in ct.lower():
|
|
|
|
|
|
ct = ct + "; charset=utf-8"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(data), content_type=ct) # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
minio_url = f"{public_base}/{bucket}/{obj}"
|
|
|
|
|
|
try:
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
minio_presigned_url = presigned_read(client, bucket, obj, exp) if client is not None else None
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
minio_presigned_url = None
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
minio_url = None
|
|
|
|
|
|
logging.info(f"md_convert done pdf name={base}{ext} size={len(data)}")
|
|
|
|
|
|
_db_notify({
|
|
|
|
|
|
"type": "md_convert",
|
|
|
|
|
|
"base": base,
|
|
|
|
|
|
"target": target.lower(),
|
|
|
|
|
|
"local_url": None,
|
|
|
|
|
|
"minio_url": minio_url,
|
|
|
|
|
|
"minio_presigned_url": minio_presigned_url,
|
|
|
|
|
|
"mappings": mappings,
|
|
|
|
|
|
"time": int(time.time())
|
|
|
|
|
|
})
|
|
|
|
|
|
return ConvertResponse(
|
|
|
|
|
|
minio_url=minio_url,
|
|
|
|
|
|
minio_presigned_url=minio_presigned_url,
|
|
|
|
|
|
name=f"{base}{ext}",
|
|
|
|
|
|
media_type=media,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/config/linkmap")
|
|
|
|
|
|
def get_linkmap():
|
|
|
|
|
|
return load_linkmap()
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/config/linkmap")
|
|
|
|
|
|
async def set_linkmap(mapping: dict):
|
|
|
|
|
|
try:
|
|
|
|
|
|
save_linkmap(mapping)
|
|
|
|
|
|
return {"ok": True}
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
|
|
def detect_image_mime(filename: Optional[str], data: bytes) -> str:
|
|
|
|
|
|
ext = (os.path.splitext(filename or "")[1] or "").lower()
|
|
|
|
|
|
if ext in {".png"}:
|
|
|
|
|
|
return "image/png"
|
|
|
|
|
|
if ext in {".jpg", ".jpeg"}:
|
|
|
|
|
|
return "image/jpeg"
|
|
|
|
|
|
if ext in {".svg"}:
|
|
|
|
|
|
return "image/svg+xml"
|
|
|
|
|
|
if ext in {".webp"}:
|
|
|
|
|
|
return "image/webp"
|
|
|
|
|
|
if data.startswith(b"\x89PNG\r\n\x1a\n"):
|
|
|
|
|
|
return "image/png"
|
|
|
|
|
|
if data.startswith(b"\xff\xd8\xff"):
|
|
|
|
|
|
return "image/jpeg"
|
|
|
|
|
|
if len(data) >= 12 and data[:4] == b"RIFF" and data[8:12] == b"WEBP":
|
|
|
|
|
|
return "image/webp"
|
|
|
|
|
|
try:
|
|
|
|
|
|
head = data[:512].decode("utf-8", errors="ignore")
|
|
|
|
|
|
if "<svg" in head:
|
|
|
|
|
|
return "image/svg+xml"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
guessed, _ = mimetypes.guess_type(filename or "")
|
|
|
|
|
|
if guessed:
|
|
|
|
|
|
return guessed
|
|
|
|
|
|
return "image/png"
|
|
|
|
|
|
|
|
|
|
|
|
def detect_mime(filename: Optional[str], data: bytes) -> str:
|
|
|
|
|
|
ext = (os.path.splitext(filename or "")[1] or "").lower()
|
|
|
|
|
|
if ext in {".png", ".jpg", ".jpeg", ".svg", ".webp"}:
|
|
|
|
|
|
return detect_image_mime(filename, data)
|
|
|
|
|
|
sig_png = data.startswith(b"\x89PNG\r\n\x1a\n")
|
|
|
|
|
|
sig_jpg = data.startswith(b"\xff\xd8\xff")
|
|
|
|
|
|
sig_webp = len(data) >= 12 and data[:4] == b"RIFF" and data[8:12] == b"WEBP"
|
|
|
|
|
|
if sig_png or sig_jpg or sig_webp:
|
|
|
|
|
|
return detect_image_mime(filename, data)
|
|
|
|
|
|
guessed, _ = mimetypes.guess_type(filename or "")
|
|
|
|
|
|
if guessed:
|
|
|
|
|
|
return guessed
|
|
|
|
|
|
return "application/octet-stream"
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/proxy/download")
|
|
|
|
|
|
async def proxy_download(url: str = Form(...)):
|
|
|
|
|
|
u = (url or "").strip()
|
|
|
|
|
|
if not u:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="url required")
|
|
|
|
|
|
try:
|
|
|
|
|
|
data: bytes
|
|
|
|
|
|
ct: str
|
|
|
|
|
|
name: str
|
|
|
|
|
|
if u.lower().startswith("http://") or u.lower().startswith("https://"):
|
|
|
|
|
|
already_escaped = "%" in u
|
|
|
|
|
|
safe = u if already_escaped else _safe_http_url(u)
|
|
|
|
|
|
with urlopen(safe, timeout=15) as r:
|
|
|
|
|
|
data = r.read()
|
|
|
|
|
|
ct = r.headers.get("Content-Type") or detect_mime(None, data)
|
|
|
|
|
|
from urllib.parse import urlparse, unquote
|
|
|
|
|
|
import os as _os
|
|
|
|
|
|
parsed = urlparse(u)
|
|
|
|
|
|
path = unquote(parsed.path or "")
|
|
|
|
|
|
last = (_os.path.basename(path) or "download").split("?")[0]
|
|
|
|
|
|
if "." in last:
|
|
|
|
|
|
name = last
|
|
|
|
|
|
else:
|
|
|
|
|
|
import mimetypes as _m
|
|
|
|
|
|
ext = _m.guess_extension((ct or "").split(";")[0].strip()) or ".md"
|
|
|
|
|
|
name = last + ext
|
|
|
|
|
|
else:
|
|
|
|
|
|
p = Path(u)
|
|
|
|
|
|
if not p.exists() or not p.is_file():
|
|
|
|
|
|
raise HTTPException(status_code=404, detail="local path not found")
|
|
|
|
|
|
data = p.read_bytes()
|
|
|
|
|
|
ct = detect_mime(p.name, data)
|
|
|
|
|
|
name = p.name
|
|
|
|
|
|
disp = f"attachment; filename=\"{name}\"; filename*=UTF-8''" + quote(name)
|
|
|
|
|
|
headers = {"Content-Disposition": disp}
|
|
|
|
|
|
return Response(content=data, media_type=ct, headers=headers)
|
|
|
|
|
|
except HTTPError as err:
|
|
|
|
|
|
raise HTTPException(status_code=err.code, detail=f"download failed: {err}")
|
|
|
|
|
|
except URLError as err:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail=f"download failed: {err}")
|
|
|
|
|
|
except HTTPException:
|
|
|
|
|
|
raise
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
def _minio_from_env() -> Tuple[Optional[object], Optional[str], Optional[str], str]:
|
|
|
|
|
|
endpoint = os.environ.get("MINIO_ENDPOINT")
|
|
|
|
|
|
access = os.environ.get("MINIO_ACCESS_KEY")
|
|
|
|
|
|
secret = os.environ.get("MINIO_SECRET_KEY")
|
|
|
|
|
|
bucket = os.environ.get("MINIO_BUCKET")
|
|
|
|
|
|
secure = str(os.environ.get("MINIO_SECURE", "false")).lower() in {"1","true","yes","on"}
|
|
|
|
|
|
public_base = os.environ.get("MINIO_PUBLIC_ENDPOINT") or (f"https://{endpoint}" if secure else f"http://{endpoint}" if endpoint else None)
|
|
|
|
|
|
if Minio is None or not endpoint or not access or not secret or not bucket or not public_base:
|
|
|
|
|
|
return None, None, None, ""
|
|
|
|
|
|
client = Minio(endpoint, access_key=access, secret_key=secret, secure=secure)
|
|
|
|
|
|
try:
|
|
|
|
|
|
_minio_create_bucket(client, bucket)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return client, bucket, public_base, os.environ.get("MINIO_PREFIX", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _export_ext(export: str) -> str:
|
|
|
|
|
|
e = (export or "").lower()
|
|
|
|
|
|
if e == "markdown":
|
|
|
|
|
|
return ".md"
|
|
|
|
|
|
if e == "html":
|
|
|
|
|
|
return ".html"
|
|
|
|
|
|
if e in {"json", "doctags"}:
|
|
|
|
|
|
return ".json"
|
|
|
|
|
|
return ".txt"
|
|
|
|
|
|
|
|
|
|
|
|
def _media_type(export: str) -> str:
|
|
|
|
|
|
e = (export or "").lower()
|
|
|
|
|
|
if e == "markdown":
|
|
|
|
|
|
return "text/markdown; charset=utf-8"
|
|
|
|
|
|
if e == "html":
|
|
|
|
|
|
return "text/html; charset=utf-8"
|
|
|
|
|
|
if e in {"json", "doctags"}:
|
|
|
|
|
|
return "application/json"
|
|
|
|
|
|
return "text/plain; charset=utf-8"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _rewrite_md_assets_to_minio(text: str, base_dir: Path, client: object, bucket: str, public_base: str, prefix: str, search_root: Optional[Path] = None) -> Tuple[str, List[Dict[str, str]]]:
|
|
|
|
|
|
mappings: List[Dict[str, str]] = []
|
|
|
|
|
|
def _abs_key(p: Path) -> str:
|
|
|
|
|
|
k = p.resolve().as_posix().lstrip("/")
|
|
|
|
|
|
return k.replace(":", "")
|
|
|
|
|
|
def _upload_data_uri(uri: str) -> Optional[str]:
|
|
|
|
|
|
try:
|
|
|
|
|
|
import base64, hashlib
|
|
|
|
|
|
head, _, b64 = uri.partition(",")
|
|
|
|
|
|
if not b64:
|
|
|
|
|
|
return None
|
|
|
|
|
|
b = base64.b64decode(b64, validate=False)
|
|
|
|
|
|
mime = ""
|
|
|
|
|
|
try:
|
|
|
|
|
|
low = head.lower()
|
|
|
|
|
|
pos = low.find("data:")
|
|
|
|
|
|
if pos != -1:
|
|
|
|
|
|
rest = head[pos+5:]
|
|
|
|
|
|
semi = rest.find(";")
|
|
|
|
|
|
mime = rest[:semi] if semi != -1 else rest
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
mime = ""
|
|
|
|
|
|
if not mime:
|
|
|
|
|
|
mime = detect_image_mime(None, b)
|
|
|
|
|
|
ext = ".png"
|
|
|
|
|
|
if mime.lower() in {"image/jpeg", "image/jpg"}:
|
|
|
|
|
|
ext = ".jpg"
|
|
|
|
|
|
elif mime.lower() == "image/webp":
|
|
|
|
|
|
ext = ".webp"
|
|
|
|
|
|
elif mime.lower() == "image/svg+xml":
|
|
|
|
|
|
ext = ".svg"
|
|
|
|
|
|
elif mime.lower() == "image/gif":
|
|
|
|
|
|
ext = ".gif"
|
|
|
|
|
|
h = hashlib.sha256(b).hexdigest()[:16]
|
|
|
|
|
|
obj = join_prefix(prefix, f"embed/{h}{ext}")
|
|
|
|
|
|
bio = io.BytesIO(b)
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(b), content_type=mime or detect_image_mime(None, b)) # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
return f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return f"{public_base}/{bucket}/{obj}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return None
|
|
|
|
|
|
def _upload(path: Path) -> Optional[str]:
|
|
|
|
|
|
try:
|
|
|
|
|
|
data = path.read_bytes()
|
|
|
|
|
|
mime = detect_mime(path.name, data)
|
|
|
|
|
|
obj = join_prefix(prefix, f"abs/{_abs_key(path)}")
|
|
|
|
|
|
bio = io.BytesIO(data)
|
|
|
|
|
|
size = len(data)
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=size, content_type=mime) # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
return f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return f"{public_base}/{bucket}/{obj}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return None
|
|
|
|
|
|
def _resolve_path(pure: str) -> Optional[Path]:
|
|
|
|
|
|
q = pure.replace("\\", "/")
|
|
|
|
|
|
if q.startswith("/"):
|
|
|
|
|
|
try:
|
|
|
|
|
|
rel = q.lstrip("/")
|
|
|
|
|
|
base = (search_root or base_dir)
|
|
|
|
|
|
p0 = (base / rel).resolve()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
p0 = (search_root or base_dir) / q.lstrip("/")
|
|
|
|
|
|
if p0.exists():
|
|
|
|
|
|
return p0
|
|
|
|
|
|
try:
|
|
|
|
|
|
p = (base_dir / q).resolve()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
p = (base_dir / q)
|
|
|
|
|
|
if p.exists():
|
|
|
|
|
|
return p
|
|
|
|
|
|
try:
|
|
|
|
|
|
name = Path(q).name
|
|
|
|
|
|
search = (search_root or base_dir)
|
|
|
|
|
|
for hit in search.rglob(name):
|
|
|
|
|
|
if hit.exists():
|
|
|
|
|
|
return hit
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return None
|
|
|
|
|
|
def _replace_md(m: re.Match) -> str:
|
|
|
|
|
|
full = m.group(0)
|
|
|
|
|
|
urlpart = m.group(1).strip()
|
|
|
|
|
|
if urlpart.startswith("data:"):
|
|
|
|
|
|
new = _upload_data_uri(urlpart)
|
|
|
|
|
|
if new:
|
|
|
|
|
|
mappings.append({"from": "data_uri", "to": new, "ok": True, "type": "md_image_data"})
|
|
|
|
|
|
return full.replace(urlpart, new)
|
|
|
|
|
|
mappings.append({"from": "data_uri", "to": None, "ok": False, "type": "md_image_data"})
|
|
|
|
|
|
return full
|
|
|
|
|
|
if urlpart.startswith("http://") or urlpart.startswith("https://"):
|
|
|
|
|
|
return full
|
|
|
|
|
|
s = urlpart
|
|
|
|
|
|
pure = s
|
|
|
|
|
|
tail = ""
|
|
|
|
|
|
if s.startswith("<"):
|
|
|
|
|
|
gt = s.find(">")
|
|
|
|
|
|
if gt != -1:
|
|
|
|
|
|
pure = s[1:gt].strip()
|
|
|
|
|
|
tail = s[gt+1:]
|
|
|
|
|
|
else:
|
|
|
|
|
|
dq = s.find('"')
|
|
|
|
|
|
sq = s.find("'")
|
|
|
|
|
|
qpos = -1
|
|
|
|
|
|
if dq != -1 and sq != -1:
|
|
|
|
|
|
qpos = dq if dq < sq else sq
|
|
|
|
|
|
elif dq != -1:
|
|
|
|
|
|
qpos = dq
|
|
|
|
|
|
elif sq != -1:
|
|
|
|
|
|
qpos = sq
|
|
|
|
|
|
if qpos != -1:
|
|
|
|
|
|
pure = s[:qpos].rstrip()
|
|
|
|
|
|
tail = s[qpos:]
|
|
|
|
|
|
p = _resolve_path(pure)
|
|
|
|
|
|
if not p or not p.exists():
|
|
|
|
|
|
mappings.append({"from": pure, "to": None, "ok": False, "type": "md_link"})
|
|
|
|
|
|
return full
|
|
|
|
|
|
new = _upload(p)
|
|
|
|
|
|
if not new:
|
|
|
|
|
|
mappings.append({"from": pure, "to": None, "ok": False, "type": "md_link"})
|
|
|
|
|
|
return full
|
|
|
|
|
|
mappings.append({"from": pure, "to": new, "ok": True, "type": "md_link"})
|
|
|
|
|
|
return full.replace(urlpart, f"{new}{tail}")
|
|
|
|
|
|
text = re.sub(r"!\[[^\]]*\]\(([^)]+)\)", _replace_md, text)
|
|
|
|
|
|
def _replace_mdlink(m: re.Match) -> str:
|
|
|
|
|
|
full = m.group(0)
|
|
|
|
|
|
urlpart = m.group(1).strip()
|
|
|
|
|
|
if urlpart.startswith("http://") or urlpart.startswith("https://") or urlpart.startswith("data:"):
|
|
|
|
|
|
return full
|
|
|
|
|
|
s = urlpart
|
|
|
|
|
|
pure = s
|
|
|
|
|
|
tail = ""
|
|
|
|
|
|
if s.startswith("<"):
|
|
|
|
|
|
gt = s.find(">")
|
|
|
|
|
|
if gt != -1:
|
|
|
|
|
|
pure = s[1:gt].strip()
|
|
|
|
|
|
tail = s[gt+1:]
|
|
|
|
|
|
else:
|
|
|
|
|
|
dq = s.find('"')
|
|
|
|
|
|
sq = s.find("'")
|
|
|
|
|
|
qpos = -1
|
|
|
|
|
|
if dq != -1 and sq != -1:
|
|
|
|
|
|
qpos = dq if dq < sq else sq
|
|
|
|
|
|
elif dq != -1:
|
|
|
|
|
|
qpos = dq
|
|
|
|
|
|
elif sq != -1:
|
|
|
|
|
|
qpos = sq
|
|
|
|
|
|
if qpos != -1:
|
|
|
|
|
|
pure = s[:qpos].rstrip()
|
|
|
|
|
|
tail = s[qpos:]
|
|
|
|
|
|
p = _resolve_path(pure)
|
|
|
|
|
|
if not p or not p.exists():
|
|
|
|
|
|
mappings.append({"from": pure, "to": None, "ok": False, "type": "md_link"})
|
|
|
|
|
|
return full
|
|
|
|
|
|
new = _upload(p)
|
|
|
|
|
|
if not new:
|
|
|
|
|
|
mappings.append({"from": pure, "to": None, "ok": False, "type": "md_link"})
|
|
|
|
|
|
return full
|
|
|
|
|
|
mappings.append({"from": pure, "to": new, "ok": True, "type": "md_link"})
|
|
|
|
|
|
return full.replace(urlpart, f"{new}{tail}")
|
|
|
|
|
|
text = re.sub(r"(?<!!)\[[^\]]*\]\(([^)]+)\)", _replace_mdlink, text)
|
|
|
|
|
|
def _replace_img(m: re.Match) -> str:
|
|
|
|
|
|
src = m.group(1).strip()
|
|
|
|
|
|
if src.startswith("data:"):
|
|
|
|
|
|
new = _upload_data_uri(src)
|
|
|
|
|
|
if new:
|
|
|
|
|
|
mappings.append({"from": "data_uri", "to": new, "ok": True, "type": "html_img_data"})
|
|
|
|
|
|
return m.group(0).replace(src, new)
|
|
|
|
|
|
mappings.append({"from": "data_uri", "to": None, "ok": False, "type": "html_img_data"})
|
|
|
|
|
|
return m.group(0)
|
|
|
|
|
|
if src.startswith("http://") or src.startswith("https://"):
|
|
|
|
|
|
return m.group(0)
|
|
|
|
|
|
pure = src
|
|
|
|
|
|
p = _resolve_path(pure)
|
|
|
|
|
|
if not p or not p.exists():
|
|
|
|
|
|
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_img"})
|
|
|
|
|
|
return m.group(0)
|
|
|
|
|
|
new = _upload(p)
|
|
|
|
|
|
if not new:
|
|
|
|
|
|
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_img"})
|
|
|
|
|
|
return m.group(0)
|
|
|
|
|
|
mappings.append({"from": pure, "to": new, "ok": True, "type": "html_img"})
|
|
|
|
|
|
return m.group(0).replace(src, new)
|
|
|
|
|
|
text = re.sub(r"<img[^>]+src=\"([^\"]+)\"", _replace_img, text)
|
|
|
|
|
|
text = re.sub(r"<img[^>]+src='([^']+)'", _replace_img, text)
|
|
|
|
|
|
def _replace_href(m: re.Match) -> str:
|
|
|
|
|
|
src = m.group(1).strip()
|
|
|
|
|
|
if src.startswith("http://") or src.startswith("https://") or src.startswith("data:"):
|
|
|
|
|
|
return m.group(0)
|
|
|
|
|
|
pure = src
|
|
|
|
|
|
p = _resolve_path(pure)
|
|
|
|
|
|
if not p or not p.exists():
|
|
|
|
|
|
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_href"})
|
|
|
|
|
|
return m.group(0)
|
|
|
|
|
|
new = _upload(p)
|
|
|
|
|
|
if not new:
|
|
|
|
|
|
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_href"})
|
|
|
|
|
|
return m.group(0)
|
|
|
|
|
|
mappings.append({"from": pure, "to": new, "ok": True, "type": "html_href"})
|
|
|
|
|
|
return m.group(0).replace(src, new)
|
|
|
|
|
|
text = re.sub(r"<a[^>]+href=\"([^\"]+)\"", _replace_href, text)
|
|
|
|
|
|
text = re.sub(r"<a[^>]+href='([^']+)'", _replace_href, text)
|
|
|
|
|
|
def _replace_video(m: re.Match) -> str:
|
|
|
|
|
|
src = m.group(1).strip()
|
|
|
|
|
|
if src.startswith("http://") or src.startswith("https://") or src.startswith("data:"):
|
|
|
|
|
|
return m.group(0)
|
|
|
|
|
|
pure = src
|
|
|
|
|
|
p = _resolve_path(pure)
|
|
|
|
|
|
if not p or not p.exists():
|
|
|
|
|
|
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_video"})
|
|
|
|
|
|
return m.group(0)
|
|
|
|
|
|
new = _upload(p)
|
|
|
|
|
|
if not new:
|
|
|
|
|
|
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_video"})
|
|
|
|
|
|
return m.group(0)
|
|
|
|
|
|
mappings.append({"from": pure, "to": new, "ok": True, "type": "html_video"})
|
|
|
|
|
|
return m.group(0).replace(src, new)
|
|
|
|
|
|
text = re.sub(r"<video[^>]+src=\"([^\"]+)\"", _replace_video, text)
|
|
|
|
|
|
text = re.sub(r"<video[^>]+src='([^']+)'", _replace_video, text)
|
|
|
|
|
|
def _replace_audio(m: re.Match) -> str:
|
|
|
|
|
|
src = m.group(1).strip()
|
|
|
|
|
|
if src.startswith("http://") or src.startswith("https://") or src.startswith("data:"):
|
|
|
|
|
|
return m.group(0)
|
|
|
|
|
|
pure = src
|
|
|
|
|
|
p = _resolve_path(pure)
|
|
|
|
|
|
if not p or not p.exists():
|
|
|
|
|
|
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_audio"})
|
|
|
|
|
|
return m.group(0)
|
|
|
|
|
|
new = _upload(p)
|
|
|
|
|
|
if not new:
|
|
|
|
|
|
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_audio"})
|
|
|
|
|
|
return m.group(0)
|
|
|
|
|
|
mappings.append({"from": pure, "to": new, "ok": True, "type": "html_audio"})
|
|
|
|
|
|
return m.group(0).replace(src, new)
|
|
|
|
|
|
text = re.sub(r"<audio[^>]+src=\"([^\"]+)\"", _replace_audio, text)
|
|
|
|
|
|
text = re.sub(r"<audio[^>]+src='([^']+)'", _replace_audio, text)
|
|
|
|
|
|
def _replace_source(m: re.Match) -> str:
|
|
|
|
|
|
src = m.group(1).strip()
|
|
|
|
|
|
if src.startswith("http://") or src.startswith("https://") or src.startswith("data:"):
|
|
|
|
|
|
return m.group(0)
|
|
|
|
|
|
pure = src
|
|
|
|
|
|
p = _resolve_path(pure)
|
|
|
|
|
|
if not p or not p.exists():
|
|
|
|
|
|
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_source"})
|
|
|
|
|
|
return m.group(0)
|
|
|
|
|
|
new = _upload(p)
|
|
|
|
|
|
if not new:
|
|
|
|
|
|
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_source"})
|
|
|
|
|
|
return m.group(0)
|
|
|
|
|
|
mappings.append({"from": pure, "to": new, "ok": True, "type": "html_source"})
|
|
|
|
|
|
return m.group(0).replace(src, new)
|
|
|
|
|
|
text = re.sub(r"<source[^>]+src=\"([^\"]+)\"", _replace_source, text)
|
|
|
|
|
|
text = re.sub(r"<source[^>]+src='([^']+)'", _replace_source, text)
|
|
|
|
|
|
return text, mappings
|
|
|
|
|
|
|
|
|
|
|
|
def _uplift_rel_path(rel: Path, md_dir: Path, root: Optional[Path], mappings: List[Dict[str, str]]) -> Path:
|
|
|
|
|
|
try:
|
|
|
|
|
|
parts = list(rel.parts)
|
|
|
|
|
|
if len(parts) < 2:
|
|
|
|
|
|
return rel
|
|
|
|
|
|
exts = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".svg", ".webp"}
|
|
|
|
|
|
def _is_asset_dir(name: str) -> bool:
|
|
|
|
|
|
n = name.strip().lower()
|
|
|
|
|
|
return n in {"image", "images", "img", "imgs", "media", "assets", "pic", "pics", "picture", "pictures", "visio pic", "visio_pic", "visio", "图片", "图像"}
|
|
|
|
|
|
def _has_asset_sibling() -> bool:
|
|
|
|
|
|
try:
|
|
|
|
|
|
for ch in md_dir.iterdir():
|
|
|
|
|
|
if ch.is_dir() and _is_asset_dir(ch.name):
|
|
|
|
|
|
for f in ch.rglob("*"):
|
|
|
|
|
|
if f.is_file() and f.suffix.lower() in exts:
|
|
|
|
|
|
return True
|
|
|
|
|
|
for f in md_dir.iterdir():
|
|
|
|
|
|
if f.is_file() and f.suffix.lower() in exts:
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return False
|
|
|
|
|
|
def _mappings_indicate_local_assets() -> bool:
|
|
|
|
|
|
try:
|
|
|
|
|
|
for m in mappings or []:
|
|
|
|
|
|
if isinstance(m.get("from"), str):
|
|
|
|
|
|
s = str(m.get("from") or "").strip()
|
|
|
|
|
|
if s and not (s.startswith("http://") or s.startswith("https://") or s.startswith("data:") or s.startswith("file://")):
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return False
|
|
|
|
|
|
try:
|
|
|
|
|
|
if len(parts) >= 2:
|
|
|
|
|
|
new_parts = parts[:-2] + [parts[-1]]
|
|
|
|
|
|
return Path("/".join(new_parts))
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return rel
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return rel
|
|
|
|
|
|
|
|
|
|
|
|
def _inject_image_urls_for_markers(text: str, urls: List[str]) -> str:
|
|
|
|
|
|
if not urls:
|
|
|
|
|
|
return text
|
|
|
|
|
|
out = []
|
|
|
|
|
|
i = 0
|
|
|
|
|
|
for line in text.splitlines():
|
|
|
|
|
|
if "<!-- image -->" in line and i < len(urls):
|
|
|
|
|
|
line = line.replace("<!-- image -->", f"")
|
|
|
|
|
|
i += 1
|
|
|
|
|
|
out.append(line)
|
|
|
|
|
|
return "\n".join(out)
|
|
|
|
|
|
|
|
|
|
|
|
def _extract_pdf_images(pdf_path: Path) -> List[Tuple[str, bytes]]:
|
|
|
|
|
|
imgs: List[Tuple[str, bytes]] = []
|
|
|
|
|
|
if fitz is None:
|
|
|
|
|
|
return imgs
|
|
|
|
|
|
try:
|
|
|
|
|
|
doc = fitz.open(pdf_path)
|
|
|
|
|
|
for page in doc:
|
|
|
|
|
|
for xref in page.get_images(full=True):
|
|
|
|
|
|
try:
|
|
|
|
|
|
info = doc.extract_image(xref[0])
|
|
|
|
|
|
ext = info.get("ext", "png")
|
|
|
|
|
|
data = info.get("image", b"")
|
|
|
|
|
|
if data:
|
|
|
|
|
|
imgs.append((ext, data))
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
continue
|
|
|
|
|
|
doc.close()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return imgs
|
|
|
|
|
|
|
|
|
|
|
|
def _bulk_upload_assets(root: Path, client: object, bucket: str, public_base: str, prefix: str) -> List[str]:
|
|
|
|
|
|
urls: List[str] = []
|
|
|
|
|
|
exts = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".svg", ".webp", ".tif", ".tiff", ".ico", ".jfif", ".heic", ".heif", ".emf", ".wmf", ".eps", ".psd"}
|
|
|
|
|
|
for f in root.rglob("*"):
|
|
|
|
|
|
try:
|
|
|
|
|
|
if not f.is_file():
|
|
|
|
|
|
continue
|
|
|
|
|
|
if f.suffix.lower() not in exts:
|
|
|
|
|
|
continue
|
|
|
|
|
|
data = f.read_bytes()
|
|
|
|
|
|
mime = detect_mime(f.name, data)
|
|
|
|
|
|
k = f.resolve().as_posix().lstrip("/").replace(":", "")
|
|
|
|
|
|
obj = join_prefix(prefix, f"abs/{k}")
|
|
|
|
|
|
bio = io.BytesIO(data)
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(data), content_type=mime) # type: ignore
|
|
|
|
|
|
urls.append(f"{public_base}/{bucket}/{obj}")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return urls
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/md/convert-folder")
|
|
|
|
|
|
async def md_convert_folder(folder_path: str = Form(...), prefix: Optional[str] = Form(None)):
|
|
|
|
|
|
p = Path(folder_path).expanduser().resolve()
|
|
|
|
|
|
if not p.exists() or not p.is_dir():
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="folder_path must be an existing directory")
|
|
|
|
|
|
client, bucket, public_base, env_prefix = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is None or bucket is None or not public_base:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="MinIO is not configured")
|
|
|
|
|
|
use_prefix = (prefix or env_prefix or "").strip()
|
|
|
|
|
|
processed: List[Dict[str, str]] = []
|
|
|
|
|
|
try:
|
|
|
|
|
|
_bulk_upload_assets(p, client, bucket, public_base, use_prefix)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
for md_file in p.rglob("*.md"):
|
|
|
|
|
|
rel_md = md_file.relative_to(p)
|
|
|
|
|
|
rel_uplift_path = rel_md
|
|
|
|
|
|
minio_url: Optional[str] = None
|
|
|
|
|
|
minio_presigned_url: Optional[str] = None
|
|
|
|
|
|
mappings: List[Dict[str, str]] = []
|
|
|
|
|
|
try:
|
|
|
|
|
|
content = md_file.read_text("utf-8", errors="ignore")
|
|
|
|
|
|
new_text, mappings = _rewrite_md_assets_to_minio(content, md_file.parent, client, bucket, public_base, use_prefix, search_root=p)
|
|
|
|
|
|
rel_uplift_path = _uplift_rel_path(rel_md, md_file.parent, p, mappings)
|
|
|
|
|
|
# upload rewritten md to MinIO
|
|
|
|
|
|
obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift_path.as_posix()}".lstrip("/")
|
|
|
|
|
|
raw = new_text.encode("utf-8")
|
|
|
|
|
|
bio = io.BytesIO(raw)
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type="text/markdown; charset=utf-8") # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
minio_url = f"{public_base}/{bucket}/{obj}"
|
|
|
|
|
|
minio_url_display = unquote(minio_url)
|
|
|
|
|
|
minio_url_display = unquote(minio_url)
|
|
|
|
|
|
try:
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
minio_presigned_url = presigned_read(client, bucket, obj, exp) if client is not None else None
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
minio_presigned_url = None
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logging.error(str(e))
|
|
|
|
|
|
okc = sum(1 for m in mappings if m.get("ok"))
|
|
|
|
|
|
frc = sum(1 for m in mappings if not m.get("ok"))
|
|
|
|
|
|
asset_urls = [m.get("to") for m in mappings if m.get("ok") and m.get("to")]
|
|
|
|
|
|
processed.append({
|
|
|
|
|
|
"source": rel_uplift_path.as_posix(),
|
|
|
|
|
|
"output": None,
|
|
|
|
|
|
"minio_url": minio_url,
|
|
|
|
|
|
"minio_presigned_url": minio_presigned_url,
|
|
|
|
|
|
"mappings": mappings,
|
|
|
|
|
|
"asset_ok": okc,
|
|
|
|
|
|
"asset_fail": frc,
|
|
|
|
|
|
"asset_urls": asset_urls
|
|
|
|
|
|
})
|
|
|
|
|
|
return {"ok": True, "count": len(processed), "files": processed}
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/md/upload-folder")
|
|
|
|
|
|
async def md_upload_folder(folder_files: List[UploadFile] = File(None), folder_paths: List[str] = Form(None), prefix: Optional[str] = Form(None)):
|
|
|
|
|
|
if not folder_files or not folder_paths or len(folder_files) != len(folder_paths):
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="folder_files and folder_paths are required and must match in length")
|
|
|
|
|
|
client, bucket, public_base, env_prefix = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is None or bucket is None or not public_base:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="MinIO is not configured")
|
|
|
|
|
|
use_prefix = (prefix or env_prefix or "").strip()
|
|
|
|
|
|
staging = Path(tempfile.mkdtemp(prefix="folder_stage_"))
|
|
|
|
|
|
try:
|
|
|
|
|
|
for f, rel in zip(folder_files, folder_paths):
|
|
|
|
|
|
rel_norm = rel.replace("\\", "/")
|
|
|
|
|
|
dest = staging / rel_norm
|
|
|
|
|
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
dest.write_bytes(await f.read())
|
|
|
|
|
|
base = staging
|
|
|
|
|
|
try:
|
|
|
|
|
|
_bulk_upload_assets(base, client, bucket, public_base, use_prefix)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
processed: List[Dict[str, str]] = []
|
|
|
|
|
|
for md_file in base.rglob("*.md"):
|
|
|
|
|
|
try:
|
|
|
|
|
|
content = md_file.read_text("utf-8", errors="ignore")
|
|
|
|
|
|
new_text, mappings = _rewrite_md_assets_to_minio(content, md_file.parent, client, bucket, public_base, use_prefix, search_root=base)
|
|
|
|
|
|
rel_md = md_file.relative_to(base)
|
|
|
|
|
|
rel_uplift = _uplift_rel_path(rel_md, md_file.parent, base, mappings)
|
|
|
|
|
|
try:
|
|
|
|
|
|
obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/")
|
|
|
|
|
|
bio = io.BytesIO(new_text.encode("utf-8"))
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(new_text.encode("utf-8")), content_type="text/markdown; charset=utf-8") # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
minio_url = f"{public_base}/{bucket}/{obj}"
|
|
|
|
|
|
minio_presigned_url = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
minio_presigned_url = presigned_read(client, bucket, obj, exp) if client is not None else None
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
minio_presigned_url = None
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
minio_url = None
|
|
|
|
|
|
minio_presigned_url = None
|
|
|
|
|
|
okc = sum(1 for m in mappings if m.get("ok"))
|
|
|
|
|
|
frc = sum(1 for m in mappings if not m.get("ok"))
|
|
|
|
|
|
asset_urls = [m.get("to") for m in mappings if m.get("ok") and m.get("to")]
|
|
|
|
|
|
processed.append({
|
|
|
|
|
|
"source": rel_uplift.as_posix(),
|
|
|
|
|
|
"output": None,
|
|
|
|
|
|
"minio_url": minio_url,
|
|
|
|
|
|
"minio_presigned_url": minio_presigned_url,
|
|
|
|
|
|
"mappings": mappings,
|
|
|
|
|
|
"asset_ok": okc,
|
|
|
|
|
|
"asset_fail": frc,
|
|
|
|
|
|
"asset_urls": asset_urls
|
|
|
|
|
|
})
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logging.error(str(e))
|
|
|
|
|
|
return {"ok": True, "count": len(processed), "files": processed}
|
|
|
|
|
|
finally:
|
|
|
|
|
|
try:
|
|
|
|
|
|
shutil.rmtree(staging)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
|
|
|
|
|
|
|
|
|
|
|
def _is_debug(request: Request) -> bool:
|
|
|
|
|
|
try:
|
|
|
|
|
|
q = request.query_params.get("debug")
|
|
|
|
|
|
if q and str(q).lower() in ("1", "true", "yes", "on"):
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
h = request.headers.get("X-Debug")
|
|
|
|
|
|
if h and str(h).lower() in ("1", "true", "yes", "on"):
|
|
|
|
|
|
return True
|
|
|
|
|
|
env = os.environ.get("APP_DEBUG")
|
|
|
|
|
|
if env and str(env).lower() in ("1", "true", "yes", "on"):
|
|
|
|
|
|
return True
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
@app.middleware("http")
|
|
|
|
|
|
async def logging_middleware(request: Request, call_next):
|
|
|
|
|
|
start = time.time()
|
|
|
|
|
|
try:
|
|
|
|
|
|
response = await call_next(request)
|
|
|
|
|
|
duration = int((time.time() - start) * 1000)
|
|
|
|
|
|
logging.info(f"{request.method} {request.url.path} -> {response.status_code} {duration}ms")
|
|
|
|
|
|
return response
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
duration = int((time.time() - start) * 1000)
|
|
|
|
|
|
tb = "".join(traceback.format_exception(type(exc), exc, exc.__traceback__))
|
|
|
|
|
|
logging.error(f"{request.method} {request.url.path} FAILED {duration}ms: {exc}\n{tb}")
|
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
|
|
@app.exception_handler(HTTPException)
|
|
|
|
|
|
async def http_exception_handler(request: Request, exc: HTTPException):
|
|
|
|
|
|
tb = "".join(traceback.format_exception(type(exc), exc, exc.__traceback__))
|
|
|
|
|
|
logging.error(f"HTTP error on {request.method} {request.url.path}: {exc}\n{tb}")
|
|
|
|
|
|
debug = _is_debug(request)
|
|
|
|
|
|
body = {"error": "http_error", "detail": exc.detail}
|
|
|
|
|
|
if debug:
|
|
|
|
|
|
body["trace"] = tb
|
|
|
|
|
|
return JSONResponse(status_code=exc.status_code, content=body)
|
|
|
|
|
|
|
|
|
|
|
|
@app.exception_handler(Exception)
|
|
|
|
|
|
async def global_exception_handler(request: Request, exc: Exception):
|
|
|
|
|
|
tb = "".join(traceback.format_exception(type(exc), exc, exc.__traceback__))
|
|
|
|
|
|
logging.error(f"Unhandled error on {request.method} {request.url.path}: {exc}\n{tb}")
|
|
|
|
|
|
debug = _is_debug(request)
|
|
|
|
|
|
body = {"error": "internal_error", "detail": str(exc)}
|
|
|
|
|
|
if debug:
|
|
|
|
|
|
body["trace"] = tb
|
|
|
|
|
|
return JSONResponse(status_code=500, content=body)
|
|
|
|
|
|
def _safe_http_url(u: str) -> str:
|
|
|
|
|
|
try:
|
|
|
|
|
|
parts = urlsplit(u)
|
|
|
|
|
|
path = quote(parts.path, safe="/:%")
|
|
|
|
|
|
query = quote(parts.query, safe="=&%")
|
|
|
|
|
|
frag = quote(parts.fragment, safe="")
|
|
|
|
|
|
netloc = parts.netloc
|
|
|
|
|
|
try:
|
|
|
|
|
|
userinfo = ''
|
|
|
|
|
|
hostport = netloc
|
|
|
|
|
|
if '@' in netloc:
|
|
|
|
|
|
userinfo, hostport = netloc.split('@', 1)
|
|
|
|
|
|
userinfo += '@'
|
|
|
|
|
|
if hostport.startswith('['):
|
|
|
|
|
|
netloc = userinfo + hostport
|
|
|
|
|
|
else:
|
|
|
|
|
|
port = ''
|
|
|
|
|
|
host = hostport
|
|
|
|
|
|
if ':' in hostport:
|
|
|
|
|
|
host, port = hostport.rsplit(':', 1)
|
|
|
|
|
|
if port and not port.isdigit():
|
|
|
|
|
|
host = hostport
|
|
|
|
|
|
port = ''
|
|
|
|
|
|
try:
|
|
|
|
|
|
host_idna = host.encode('idna').decode('ascii')
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
host_idna = host
|
|
|
|
|
|
netloc = f"{userinfo}{host_idna}{(':' + port) if port else ''}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return urlunsplit((parts.scheme, netloc, path, query, frag))
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return u
|
|
|
|
|
|
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
# API v2 endpoints with standard code/msg/data
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
_converter_v2 = FormatConverter()
|
|
|
|
|
|
|
|
|
|
|
|
def _ok(data: dict, msg: str = "ok"):
|
|
|
|
|
|
return JSONResponse({"code": 0, "msg": msg, "data": data})
|
|
|
|
|
|
|
|
|
|
|
|
def _err(msg: str, code: int = 500, detail: object = None):
|
|
|
|
|
|
payload = {"code": code, "msg": msg, "data": None}
|
|
|
|
|
|
if detail is not None:
|
|
|
|
|
|
payload["detail"] = detail
|
|
|
|
|
|
return JSONResponse(payload, status_code=200)
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/api/convert")
|
|
|
|
|
|
async def api_convert(
|
|
|
|
|
|
file: Optional[UploadFile] = File(None),
|
|
|
|
|
|
source_url: Optional[str] = Form(None),
|
|
|
|
|
|
export: str = Form("markdown"),
|
|
|
|
|
|
engine: Optional[str] = Form(None),
|
|
|
|
|
|
save: Optional[bool] = Form(False),
|
|
|
|
|
|
filename: Optional[str] = Form(None),
|
|
|
|
|
|
):
|
|
|
|
|
|
try:
|
|
|
|
|
|
if (file is None and not source_url) or (file is not None and source_url):
|
|
|
|
|
|
return _err("参数错误:file 与 source_url 二选一")
|
|
|
|
|
|
export = _normalize_export(export)
|
|
|
|
|
|
engine = _normalize_engine(engine)
|
|
|
|
|
|
if source_url:
|
|
|
|
|
|
enc, content, artifacts_dir = await asyncio.to_thread(_converter_v2.convert, source_url, export=export, engine=engine)
|
|
|
|
|
|
base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(source_url, None))
|
|
|
|
|
|
out_ext = _export_ext(export)
|
|
|
|
|
|
ct = _media_type(export)
|
|
|
|
|
|
mappings: list[dict[str, str]] = []
|
|
|
|
|
|
trace: List[str] = []
|
|
|
|
|
|
trace.append(f"source_url={source_url}")
|
|
|
|
|
|
trace.append(f"export={export}")
|
|
|
|
|
|
if artifacts_dir:
|
|
|
|
|
|
trace.append(f"artifacts_dir={artifacts_dir}")
|
|
|
|
|
|
if export.lower() == "markdown":
|
|
|
|
|
|
try:
|
|
|
|
|
|
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is not None and bucket and public_base:
|
|
|
|
|
|
trace.append(f"minio bucket={bucket} public={public_base} prefix={(prefix or '').strip('/')}")
|
|
|
|
|
|
base_dir = Path(artifacts_dir) if artifacts_dir else Path(tempfile.mkdtemp(prefix="md_assets_"))
|
|
|
|
|
|
new_text, ms = _rewrite_md_assets_to_minio(content, base_dir, client, bucket, public_base, prefix, search_root=(Path(artifacts_dir) if artifacts_dir else None))
|
|
|
|
|
|
urls: List[str] = []
|
|
|
|
|
|
if artifacts_dir:
|
|
|
|
|
|
try:
|
|
|
|
|
|
urls = _bulk_upload_assets(Path(artifacts_dir), client, bucket, public_base, prefix)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
urls = []
|
|
|
|
|
|
trace.append(f"asset_urls={len(urls)}")
|
|
|
|
|
|
try:
|
|
|
|
|
|
if source_url:
|
|
|
|
|
|
src_path: Optional[Path] = None
|
|
|
|
|
|
if source_url.startswith('file://') or Path(source_url).exists():
|
|
|
|
|
|
src_path = Path(source_url.replace('file://', ''))
|
|
|
|
|
|
elif source_url.startswith('http://') or source_url.startswith('https://'):
|
|
|
|
|
|
import tempfile as _tf
|
|
|
|
|
|
from urllib.request import urlopen
|
|
|
|
|
|
with _tf.NamedTemporaryFile(delete=False, suffix=Path(infer_basename(source_url, None)).suffix or '.bin') as _tmp:
|
|
|
|
|
|
try:
|
|
|
|
|
|
with urlopen(source_url) as resp:
|
|
|
|
|
|
_tmp.write(resp.read())
|
|
|
|
|
|
finally:
|
|
|
|
|
|
_tmp.flush(); _tmp.close()
|
|
|
|
|
|
src_path = Path(_tmp.name)
|
|
|
|
|
|
if src_path and src_path.exists() and str(src_path).lower().endswith('.pdf'):
|
|
|
|
|
|
pdf_imgs = _extract_pdf_images(src_path)
|
|
|
|
|
|
base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(source_url, None))
|
|
|
|
|
|
extra_urls: List[str] = []
|
|
|
|
|
|
for idx, (img_ext, data) in enumerate(pdf_imgs):
|
|
|
|
|
|
obj = join_prefix(prefix, f"converted/{base}_img_{idx}.{img_ext}")
|
|
|
|
|
|
bio = io.BytesIO(data)
|
|
|
|
|
|
mime = "image/png" if img_ext.lower() == "png" else "image/jpeg"
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(data), content_type=mime) # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
obj_enc = _quote(obj, safe="/")
|
|
|
|
|
|
extra_urls.append(f"{public_base}/{bucket}/{obj_enc}")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
extra_urls.append(f"{public_base}/{bucket}/{obj}")
|
|
|
|
|
|
urls.extend(extra_urls)
|
|
|
|
|
|
trace.append(f"pdf_imgs_uploaded={len(extra_urls)}")
|
|
|
|
|
|
if source_url.startswith('http://') or source_url.startswith('https://'):
|
|
|
|
|
|
try:
|
|
|
|
|
|
os.unlink(str(src_path))
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
before = new_text.count("<!-- image -->")
|
|
|
|
|
|
new_text = _inject_image_urls_for_markers(new_text, urls)
|
|
|
|
|
|
after = new_text.count("<!-- image -->")
|
|
|
|
|
|
trace.append(f"image_placeholders_before={before} after={after}")
|
|
|
|
|
|
content = new_text
|
|
|
|
|
|
mappings = ms
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
if not save:
|
|
|
|
|
|
resp = _ok({"encoding": enc, "content": content, "name": f"{base}{out_ext}", "media_type": ct, "mappings": mappings, "trace": trace})
|
|
|
|
|
|
try:
|
|
|
|
|
|
if artifacts_dir:
|
|
|
|
|
|
shutil.rmtree(artifacts_dir, ignore_errors=True)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return resp
|
|
|
|
|
|
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is None or not bucket or not public_base:
|
|
|
|
|
|
return _err("MinIO 未配置,无法保存")
|
|
|
|
|
|
out_name = f"{base}{out_ext}"
|
|
|
|
|
|
if export.lower() == "markdown" and not out_name.lower().endswith(".md"):
|
|
|
|
|
|
out_name = f"{base}.md"
|
|
|
|
|
|
obj = join_prefix(prefix, f"converted/{out_name}")
|
|
|
|
|
|
bio = io.BytesIO(content.encode("utf-8"))
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(content.encode("utf-8")), content_type=ct) # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
minio_url = f"{public_base}/{bucket}/{obj}"
|
|
|
|
|
|
minio_url_display = unquote(minio_url)
|
|
|
|
|
|
try:
|
|
|
|
|
|
trace.append(f"save out_name={out_name}")
|
|
|
|
|
|
trace.append(f"save obj={obj}")
|
|
|
|
|
|
trace.append(f"save minio_url={minio_url}")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
minio_presigned_url = presigned_read(client, bucket, obj, exp)
|
|
|
|
|
|
resp = _ok({
|
|
|
|
|
|
"encoding": enc,
|
|
|
|
|
|
"name": out_name,
|
|
|
|
|
|
"media_type": ct,
|
|
|
|
|
|
"minio_url": minio_url,
|
|
|
|
|
|
"minio_presigned_url": minio_presigned_url,
|
|
|
|
|
|
"minio_url_display": minio_url_display,
|
|
|
|
|
|
"mappings": mappings,
|
|
|
|
|
|
"trace": trace,
|
|
|
|
|
|
})
|
|
|
|
|
|
try:
|
|
|
|
|
|
if artifacts_dir:
|
|
|
|
|
|
shutil.rmtree(artifacts_dir, ignore_errors=True)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return resp
|
|
|
|
|
|
assert file is not None
|
|
|
|
|
|
suffix = ""
|
|
|
|
|
|
if file.filename and "." in file.filename:
|
|
|
|
|
|
suffix = "." + file.filename.rsplit(".", 1)[-1]
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
|
|
|
|
|
tmp.write(await file.read())
|
|
|
|
|
|
tmp_path = tmp.name
|
|
|
|
|
|
try:
|
|
|
|
|
|
enc, content, artifacts_dir = await asyncio.to_thread(_converter_v2.convert, tmp_path, export=export, engine=engine)
|
|
|
|
|
|
base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(None, file.filename))
|
|
|
|
|
|
out_ext = _export_ext(export)
|
|
|
|
|
|
ct = _media_type(export)
|
|
|
|
|
|
mappings: list[dict[str, str]] = []
|
|
|
|
|
|
trace: List[str] = []
|
|
|
|
|
|
trace.append(f"file={file.filename}")
|
|
|
|
|
|
trace.append(f"tmp_path={tmp_path}")
|
|
|
|
|
|
trace.append(f"export={export}")
|
|
|
|
|
|
if artifacts_dir:
|
|
|
|
|
|
trace.append(f"artifacts_dir={artifacts_dir}")
|
|
|
|
|
|
if export.lower() == "markdown":
|
|
|
|
|
|
try:
|
|
|
|
|
|
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is not None and bucket and public_base:
|
|
|
|
|
|
trace.append(f"minio bucket={bucket} public={public_base} prefix={(prefix or '').strip('/')}")
|
|
|
|
|
|
base_dir = Path(artifacts_dir) if artifacts_dir else Path(tempfile.mkdtemp(prefix="md_assets_"))
|
|
|
|
|
|
new_text, ms = _rewrite_md_assets_to_minio(content, base_dir, client, bucket, public_base, prefix, search_root=(Path(artifacts_dir) if artifacts_dir else None))
|
|
|
|
|
|
urls: List[str] = []
|
|
|
|
|
|
if artifacts_dir:
|
|
|
|
|
|
try:
|
|
|
|
|
|
urls = _bulk_upload_assets(Path(artifacts_dir), client, bucket, public_base, prefix)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
urls = []
|
|
|
|
|
|
trace.append(f"asset_urls={len(urls)}")
|
|
|
|
|
|
try:
|
|
|
|
|
|
if tmp_path and tmp_path.exists() and str(tmp_path).lower().endswith('.pdf'):
|
|
|
|
|
|
pdf_imgs = _extract_pdf_images(tmp_path)
|
|
|
|
|
|
base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(None, file.filename))
|
|
|
|
|
|
extra_urls: List[str] = []
|
|
|
|
|
|
for idx, (img_ext, data) in enumerate(pdf_imgs):
|
|
|
|
|
|
obj = join_prefix(prefix, f"converted/{base}_img_{idx}.{img_ext}")
|
|
|
|
|
|
bio = io.BytesIO(data)
|
|
|
|
|
|
mime = "image/png" if img_ext.lower() == "png" else "image/jpeg"
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(data), content_type=mime) # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
extra_urls.append(f"{public_base}/{bucket}/{_quote(obj, safe='/')}")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
extra_urls.append(f"{public_base}/{bucket}/{obj}")
|
|
|
|
|
|
urls.extend(extra_urls)
|
|
|
|
|
|
trace.append(f"pdf_imgs_uploaded={len(extra_urls)}")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
before = new_text.count("<!-- image -->")
|
|
|
|
|
|
new_text = _inject_image_urls_for_markers(new_text, urls)
|
|
|
|
|
|
after = new_text.count("<!-- image -->")
|
|
|
|
|
|
trace.append(f"image_placeholders_before={before} after={after}")
|
|
|
|
|
|
content = new_text
|
|
|
|
|
|
mappings = ms
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
if not save:
|
|
|
|
|
|
resp = _ok({"encoding": enc, "content": content, "name": f"{base}{out_ext}", "media_type": ct, "mappings": mappings, "trace": trace})
|
|
|
|
|
|
try:
|
|
|
|
|
|
if artifacts_dir:
|
|
|
|
|
|
shutil.rmtree(artifacts_dir, ignore_errors=True)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return resp
|
|
|
|
|
|
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is None or not bucket or not public_base:
|
|
|
|
|
|
return _err("MinIO 未配置,无法保存")
|
|
|
|
|
|
out_name = f"{base}{out_ext}"
|
|
|
|
|
|
if export.lower() == "markdown" and not out_name.lower().endswith(".md"):
|
|
|
|
|
|
out_name = f"{base}.md"
|
|
|
|
|
|
obj = join_prefix(prefix, f"converted/{out_name}")
|
|
|
|
|
|
bio = io.BytesIO(content.encode("utf-8"))
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(content.encode("utf-8")), content_type=ct) # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
minio_url = f"{public_base}/{bucket}/{obj}"
|
|
|
|
|
|
minio_url_display = unquote(minio_url)
|
|
|
|
|
|
try:
|
|
|
|
|
|
trace.append(f"save out_name={out_name}")
|
|
|
|
|
|
trace.append(f"save obj={obj}")
|
|
|
|
|
|
trace.append(f"save minio_url={minio_url}")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
minio_presigned_url = presigned_read(client, bucket, obj, exp)
|
|
|
|
|
|
resp = _ok({
|
|
|
|
|
|
"encoding": enc,
|
|
|
|
|
|
"name": out_name,
|
|
|
|
|
|
"media_type": ct,
|
|
|
|
|
|
"minio_url": minio_url,
|
|
|
|
|
|
"minio_presigned_url": minio_presigned_url,
|
|
|
|
|
|
"minio_url_display": minio_url_display,
|
|
|
|
|
|
"mappings": mappings,
|
|
|
|
|
|
"trace": trace,
|
|
|
|
|
|
})
|
|
|
|
|
|
try:
|
|
|
|
|
|
if artifacts_dir:
|
|
|
|
|
|
shutil.rmtree(artifacts_dir, ignore_errors=True)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return resp
|
|
|
|
|
|
finally:
|
|
|
|
|
|
try:
|
|
|
|
|
|
os.remove(tmp_path)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except HTTPException as e:
|
|
|
|
|
|
return _err(str(e.detail), 400)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
return _err(str(e))
|
|
|
|
|
|
|
2026-01-13 22:56:22 +08:00
|
|
|
|
@app.post("/api/pdf/convert")
|
|
|
|
|
|
async def api_pdf_convert(
|
|
|
|
|
|
file: Optional[UploadFile] = File(None),
|
|
|
|
|
|
file_path: Optional[str] = Form(None),
|
|
|
|
|
|
markdown_content: Optional[str] = Form(None),
|
|
|
|
|
|
toc: bool = Form(False),
|
|
|
|
|
|
header_text: Optional[str] = Form(None),
|
|
|
|
|
|
footer_text: Optional[str] = Form(None),
|
|
|
|
|
|
logo_url: Optional[str] = Form(None),
|
|
|
|
|
|
copyright_text: Optional[str] = Form(None),
|
|
|
|
|
|
filename_text: Optional[str] = Form(None),
|
|
|
|
|
|
cover_src: Optional[str] = Form(None),
|
|
|
|
|
|
product_name: Optional[str] = Form(None),
|
|
|
|
|
|
document_name: Optional[str] = Form(None),
|
|
|
|
|
|
product_version: Optional[str] = Form(None),
|
|
|
|
|
|
document_version: Optional[str] = Form(None),
|
|
|
|
|
|
css_name: Optional[str] = Form(None),
|
|
|
|
|
|
css_text: Optional[str] = Form(None),
|
|
|
|
|
|
download: bool = Form(True),
|
|
|
|
|
|
):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Convert Word or Markdown to PDF
|
|
|
|
|
|
|
|
|
|
|
|
Supports three input methods:
|
|
|
|
|
|
1. Upload file (Word .doc/.docx or Markdown .md)
|
|
|
|
|
|
2. Specify file_path (local file path)
|
|
|
|
|
|
3. Provide markdown_content directly
|
|
|
|
|
|
|
|
|
|
|
|
Returns PDF file as download by default
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
pdf_bytes: bytes = b""
|
|
|
|
|
|
output_filename: str = "document.pdf"
|
|
|
|
|
|
|
|
|
|
|
|
# Determine input source
|
|
|
|
|
|
if file:
|
|
|
|
|
|
# Handle uploaded file
|
|
|
|
|
|
filename = file.filename or "upload"
|
|
|
|
|
|
suffix = Path(filename).suffix.lower()
|
|
|
|
|
|
|
|
|
|
|
|
# Save uploaded file to temp
|
|
|
|
|
|
tmp_path = Path(tempfile.mktemp(suffix=suffix))
|
|
|
|
|
|
try:
|
|
|
|
|
|
content = await file.read()
|
|
|
|
|
|
tmp_path.write_bytes(content)
|
|
|
|
|
|
|
|
|
|
|
|
if suffix in {".doc", ".docx"}:
|
|
|
|
|
|
# Convert Word to PDF
|
|
|
|
|
|
output_filename = f"{Path(filename).stem}.pdf"
|
|
|
|
|
|
pdf_bytes = await asyncio.to_thread(
|
|
|
|
|
|
word_to_pdf_bytes,
|
|
|
|
|
|
tmp_path,
|
|
|
|
|
|
toc=toc,
|
|
|
|
|
|
header_text=header_text,
|
|
|
|
|
|
footer_text=footer_text,
|
|
|
|
|
|
logo_url=logo_url,
|
|
|
|
|
|
copyright_text=copyright_text,
|
|
|
|
|
|
filename_text=filename_text or Path(filename).stem,
|
|
|
|
|
|
cover_src=cover_src,
|
|
|
|
|
|
product_name=product_name,
|
|
|
|
|
|
document_name=document_name,
|
|
|
|
|
|
product_version=product_version,
|
|
|
|
|
|
document_version=document_version,
|
|
|
|
|
|
)
|
|
|
|
|
|
elif suffix in {".md", ".markdown"}:
|
|
|
|
|
|
# Convert Markdown file to PDF
|
|
|
|
|
|
output_filename = f"{Path(filename).stem}.pdf"
|
|
|
|
|
|
pdf_bytes = await asyncio.to_thread(
|
|
|
|
|
|
markdown_file_to_pdf_bytes,
|
|
|
|
|
|
tmp_path,
|
|
|
|
|
|
toc=toc,
|
|
|
|
|
|
header_text=header_text,
|
|
|
|
|
|
footer_text=footer_text,
|
|
|
|
|
|
logo_url=logo_url,
|
|
|
|
|
|
copyright_text=copyright_text,
|
|
|
|
|
|
filename_text=filename_text or Path(filename).stem,
|
|
|
|
|
|
cover_src=cover_src,
|
|
|
|
|
|
product_name=product_name,
|
|
|
|
|
|
document_name=document_name,
|
|
|
|
|
|
product_version=product_version,
|
|
|
|
|
|
document_version=document_version,
|
|
|
|
|
|
css_name=css_name,
|
|
|
|
|
|
css_text=css_text,
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
return _err(f"不支持的文件格式: {suffix}。支持的格式: .doc, .docx, .md")
|
|
|
|
|
|
finally:
|
|
|
|
|
|
try:
|
|
|
|
|
|
tmp_path.unlink(missing_ok=True)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
elif file_path:
|
|
|
|
|
|
# Handle local file path
|
|
|
|
|
|
path = Path(file_path).expanduser()
|
|
|
|
|
|
if not path.exists():
|
|
|
|
|
|
return _err(f"文件不存在: {file_path}")
|
|
|
|
|
|
|
|
|
|
|
|
suffix = path.suffix.lower()
|
|
|
|
|
|
output_filename = f"{path.stem}.pdf"
|
|
|
|
|
|
|
|
|
|
|
|
if suffix in {".doc", ".docx"}:
|
|
|
|
|
|
pdf_bytes = await asyncio.to_thread(
|
|
|
|
|
|
word_to_pdf_bytes,
|
|
|
|
|
|
path,
|
|
|
|
|
|
toc=toc,
|
|
|
|
|
|
header_text=header_text,
|
|
|
|
|
|
footer_text=footer_text,
|
|
|
|
|
|
logo_url=logo_url,
|
|
|
|
|
|
copyright_text=copyright_text,
|
|
|
|
|
|
filename_text=filename_text or path.stem,
|
|
|
|
|
|
cover_src=cover_src,
|
|
|
|
|
|
product_name=product_name,
|
|
|
|
|
|
document_name=document_name,
|
|
|
|
|
|
product_version=product_version,
|
|
|
|
|
|
document_version=document_version,
|
|
|
|
|
|
)
|
|
|
|
|
|
elif suffix in {".md", ".markdown"}:
|
|
|
|
|
|
pdf_bytes = await asyncio.to_thread(
|
|
|
|
|
|
markdown_file_to_pdf_bytes,
|
|
|
|
|
|
path,
|
|
|
|
|
|
toc=toc,
|
|
|
|
|
|
header_text=header_text,
|
|
|
|
|
|
footer_text=footer_text,
|
|
|
|
|
|
logo_url=logo_url,
|
|
|
|
|
|
copyright_text=copyright_text,
|
|
|
|
|
|
filename_text=filename_text or path.stem,
|
|
|
|
|
|
cover_src=cover_src,
|
|
|
|
|
|
product_name=product_name,
|
|
|
|
|
|
document_name=document_name,
|
|
|
|
|
|
product_version=product_version,
|
|
|
|
|
|
document_version=document_version,
|
|
|
|
|
|
css_name=css_name,
|
|
|
|
|
|
css_text=css_text,
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
return _err(f"不支持的文件格式: {suffix}。支持的格式: .doc, .docx, .md")
|
|
|
|
|
|
|
|
|
|
|
|
elif markdown_content:
|
|
|
|
|
|
# Handle direct markdown content
|
|
|
|
|
|
output_filename = f"{filename_text or 'document'}.pdf"
|
|
|
|
|
|
pdf_bytes = await asyncio.to_thread(
|
|
|
|
|
|
markdown_to_pdf_bytes,
|
|
|
|
|
|
markdown_content,
|
|
|
|
|
|
toc=toc,
|
|
|
|
|
|
header_text=header_text,
|
|
|
|
|
|
footer_text=footer_text,
|
|
|
|
|
|
logo_url=logo_url,
|
|
|
|
|
|
copyright_text=copyright_text,
|
|
|
|
|
|
filename_text=filename_text,
|
|
|
|
|
|
cover_src=cover_src,
|
|
|
|
|
|
product_name=product_name,
|
|
|
|
|
|
document_name=document_name,
|
|
|
|
|
|
product_version=product_version,
|
|
|
|
|
|
document_version=document_version,
|
|
|
|
|
|
css_name=css_name,
|
|
|
|
|
|
css_text=css_text,
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
return _err("必须提供 file、file_path 或 markdown_content 中的一个")
|
|
|
|
|
|
|
|
|
|
|
|
if not pdf_bytes:
|
|
|
|
|
|
return _err("PDF 转换失败,未生成内容")
|
|
|
|
|
|
|
|
|
|
|
|
# Return PDF file
|
|
|
|
|
|
if download:
|
|
|
|
|
|
from fastapi.responses import StreamingResponse
|
2026-01-15 23:45:46 +08:00
|
|
|
|
import urllib.parse
|
|
|
|
|
|
|
|
|
|
|
|
# 处理中文文件名 - 使用 URL 编码确保只包含 ASCII 字符
|
|
|
|
|
|
# 先将中文文件名进行百分比编码
|
|
|
|
|
|
safe_filename = urllib.parse.quote(output_filename, safe='')
|
|
|
|
|
|
|
2026-01-13 22:56:22 +08:00
|
|
|
|
return StreamingResponse(
|
|
|
|
|
|
io.BytesIO(pdf_bytes),
|
|
|
|
|
|
media_type="application/pdf",
|
|
|
|
|
|
headers={
|
2026-01-15 23:45:46 +08:00
|
|
|
|
"Content-Disposition": f"attachment; filename={safe_filename}"
|
2026-01-13 22:56:22 +08:00
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Return as base64 in JSON
|
|
|
|
|
|
import base64
|
|
|
|
|
|
return _ok({
|
|
|
|
|
|
"pdf_base64": base64.b64encode(pdf_bytes).decode("ascii"),
|
|
|
|
|
|
"filename": output_filename,
|
|
|
|
|
|
"size": len(pdf_bytes)
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logging.exception("PDF conversion error")
|
|
|
|
|
|
return _err(f"PDF 转换失败: {str(e)}")
|
|
|
|
|
|
|
2026-01-07 17:18:26 +08:00
|
|
|
|
@app.post("/api/import/convert")
|
|
|
|
|
|
async def api_import_convert(json_file: UploadFile = File(None), json_text: Optional[str] = Form(None), path: Optional[str] = Form(None), versionId: Optional[int] = Form(1001), download: Optional[bool] = Form(False)):
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw_text: Optional[str] = None
|
|
|
|
|
|
if json_file is not None:
|
|
|
|
|
|
raw = await json_file.read()
|
|
|
|
|
|
raw_text = raw.decode("utf-8", errors="ignore")
|
|
|
|
|
|
elif json_text:
|
|
|
|
|
|
raw_text = json_text
|
|
|
|
|
|
else:
|
|
|
|
|
|
use_path = (path or "import.json").strip()
|
|
|
|
|
|
p = Path(use_path).expanduser()
|
|
|
|
|
|
if not p.exists():
|
|
|
|
|
|
return _err(f"未找到文件: {use_path}")
|
|
|
|
|
|
raw_text = p.read_text("utf-8", errors="ignore")
|
|
|
|
|
|
import json as _json
|
|
|
|
|
|
data = _json.loads(raw_text or "{}")
|
|
|
|
|
|
files = data.get("files", [])
|
|
|
|
|
|
if not isinstance(files, list):
|
|
|
|
|
|
return _err("JSON结构不合法:缺少 files 数组")
|
|
|
|
|
|
imp = _build_import_tree(files, int(versionId or 1001))
|
|
|
|
|
|
if download:
|
|
|
|
|
|
from fastapi.responses import StreamingResponse
|
|
|
|
|
|
b = _json.dumps(imp, ensure_ascii=False, indent=2).encode("utf-8")
|
|
|
|
|
|
return StreamingResponse(io.BytesIO(b), media_type="application/json; charset=utf-8", headers={"Content-Disposition": "attachment; filename=import.json"})
|
|
|
|
|
|
return _ok({"import": imp})
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
return _err(str(e))
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/api/upload-archive")
|
|
|
|
|
|
async def api_upload_archive(file: UploadFile = File(...), prefix: Optional[str] = Form(None)):
|
|
|
|
|
|
try:
|
|
|
|
|
|
client, bucket, public_base, env_prefix = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is None or bucket is None or not public_base:
|
|
|
|
|
|
return _err("MinIO 未配置")
|
|
|
|
|
|
use_prefix = (prefix or env_prefix or "").strip()
|
|
|
|
|
|
suffix = (file.filename or "").lower()
|
|
|
|
|
|
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
|
|
|
|
|
|
data = await file.read()
|
|
|
|
|
|
tmp.write(data)
|
|
|
|
|
|
tmp.flush(); tmp.close()
|
|
|
|
|
|
root = Path(tempfile.mkdtemp(prefix="extract_"))
|
|
|
|
|
|
try:
|
|
|
|
|
|
if suffix.endswith(".zip"):
|
|
|
|
|
|
import zipfile
|
|
|
|
|
|
with zipfile.ZipFile(tmp.name, "r") as zf:
|
|
|
|
|
|
_zip_extract_safely(zf, root)
|
|
|
|
|
|
elif ".tar" in suffix or suffix.endswith(".tgz") or suffix.endswith(".tar.gz") or suffix.endswith(".tar.bz2") or suffix.endswith(".tar.xz"):
|
|
|
|
|
|
import tarfile
|
|
|
|
|
|
with tarfile.open(tmp.name, "r:*") as tf:
|
|
|
|
|
|
_tar_extract_safely(tf, root)
|
|
|
|
|
|
else:
|
|
|
|
|
|
return _err("不支持的压缩格式")
|
|
|
|
|
|
try:
|
|
|
|
|
|
_bulk_upload_assets(root, client, bucket, public_base, use_prefix)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
files = []
|
|
|
|
|
|
# Process Markdown files as-is
|
|
|
|
|
|
for md in root.rglob("*.md"):
|
|
|
|
|
|
try:
|
|
|
|
|
|
text = md.read_text("utf-8", errors="ignore")
|
|
|
|
|
|
new_text, mappings = _rewrite_md_assets_to_minio(text, md.parent, client, bucket, public_base, use_prefix, search_root=root)
|
|
|
|
|
|
rel_md = md.relative_to(root)
|
|
|
|
|
|
rel_uplift = _uplift_rel_path(rel_md, md.parent, root, mappings)
|
|
|
|
|
|
obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/")
|
|
|
|
|
|
bio = io.BytesIO(new_text.encode("utf-8"))
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(new_text.encode("utf-8")), content_type="text/markdown; charset=utf-8") # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
url = f"{public_base}/{bucket}/{quote(obj, safe='/')}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
url = f"{public_base}/{bucket}/{obj}"
|
|
|
|
|
|
url_display = unquote(url)
|
|
|
|
|
|
url_display = unquote(url)
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
ps = presigned_read(client, bucket, obj, exp) if client is not None else None
|
|
|
|
|
|
raw = new_text.encode("utf-8")
|
|
|
|
|
|
files.append({
|
|
|
|
|
|
"source": rel_uplift.as_posix(),
|
|
|
|
|
|
"minio_url": url,
|
|
|
|
|
|
"minio_presigned_url": ps,
|
|
|
|
|
|
"minio_url_display": url_display,
|
|
|
|
|
|
"mappings": mappings,
|
|
|
|
|
|
"object_name": obj,
|
|
|
|
|
|
"size": len(raw),
|
|
|
|
|
|
})
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
files.append({"source": (md.relative_to(root).as_posix()), "minio_url": None, "minio_presigned_url": None, "mappings": [], "object_name": None, "size": 0})
|
|
|
|
|
|
|
|
|
|
|
|
# Convert HTML files to Markdown and process similarly
|
|
|
|
|
|
for html in [p for p in root.rglob("*") if p.is_file() and p.suffix.lower() in {".html", ".htm"}]:
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Skip if a sibling Markdown already exists for the same base name
|
|
|
|
|
|
rel_html = html.relative_to(root)
|
|
|
|
|
|
md_target_rel = rel_html.with_suffix(".md")
|
|
|
|
|
|
md_sibling = (root / md_target_rel).exists()
|
|
|
|
|
|
if md_sibling:
|
|
|
|
|
|
continue
|
|
|
|
|
|
html_src = html.read_text("utf-8", errors="ignore")
|
|
|
|
|
|
html_rew, mappings = _rewrite_md_assets_to_minio(html_src, html.parent, client, bucket, public_base, use_prefix, search_root=root)
|
|
|
|
|
|
tmpd = Path(tempfile.mkdtemp(prefix="rew_html_"))
|
|
|
|
|
|
tmpf = tmpd / html.name
|
|
|
|
|
|
tmpf.write_text(html_rew, "utf-8")
|
|
|
|
|
|
enc, md_text, _art = _converter_v2.convert(str(tmpf), export="markdown")
|
|
|
|
|
|
md_text2, mappings2 = _rewrite_md_assets_to_minio(md_text, html.parent, client, bucket, public_base, use_prefix, search_root=root)
|
|
|
|
|
|
mappings = (mappings or []) + (mappings2 or [])
|
|
|
|
|
|
new_text = md_text2
|
|
|
|
|
|
rel_uplift = _uplift_rel_path(md_target_rel, html.parent, root, mappings)
|
|
|
|
|
|
obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/")
|
|
|
|
|
|
raw = new_text.encode(enc or "utf-8")
|
|
|
|
|
|
bio = io.BytesIO(raw)
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type="text/markdown; charset=utf-8") # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
url = f"{public_base}/{bucket}/{quote(obj, safe='/')}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
url = f"{public_base}/{bucket}/{obj}"
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
ps = presigned_read(client, bucket, obj, exp) if client is not None else None
|
|
|
|
|
|
files.append({
|
|
|
|
|
|
"source": rel_uplift.as_posix(),
|
|
|
|
|
|
"minio_url": url,
|
|
|
|
|
|
"minio_presigned_url": ps,
|
|
|
|
|
|
"minio_url_display": url_display,
|
|
|
|
|
|
"mappings": mappings,
|
|
|
|
|
|
"object_name": obj,
|
|
|
|
|
|
"size": len(raw),
|
|
|
|
|
|
})
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
files.append({"source": (html.relative_to(root).as_posix()), "minio_url": None, "minio_presigned_url": None, "mappings": [], "object_name": None, "size": 0})
|
|
|
|
|
|
finally:
|
|
|
|
|
|
try:
|
|
|
|
|
|
shutil.rmtree(tmpd, ignore_errors=True)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
imp = _build_import_tree(files, int(1001))
|
|
|
|
|
|
return _ok({"count": len(files), "files": files, "import": imp})
|
|
|
|
|
|
finally:
|
|
|
|
|
|
try:
|
|
|
|
|
|
os.unlink(tmp.name)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
try:
|
|
|
|
|
|
shutil.rmtree(root)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
return _err(str(e))
|
|
|
|
|
|
|
|
|
|
|
|
STAGED_ARCHIVES: Dict[str, Dict[str, object]] = {}
|
|
|
|
|
|
|
|
|
|
|
|
def _build_import_tree(processed: List[Dict[str, object]], version_id: int) -> Dict[str, object]:
|
|
|
|
|
|
def ensure_folder(children: list, name: str) -> Dict[str, object]:
|
|
|
|
|
|
for n in children:
|
|
|
|
|
|
if isinstance(n, dict) and n.get("name") == name and n.get("type") == "FOLDER":
|
|
|
|
|
|
return n
|
|
|
|
|
|
node = {"name": name, "type": "FOLDER", "children": [], "sortOrder": 100}
|
|
|
|
|
|
children.append(node)
|
|
|
|
|
|
return node
|
|
|
|
|
|
tree: List[Dict[str, object]] = []
|
|
|
|
|
|
for idx, f in enumerate(processed):
|
|
|
|
|
|
src = str(f.get("source") or "")
|
|
|
|
|
|
obj = str(f.get("object_name") or "")
|
|
|
|
|
|
size = int(f.get("size") or 0)
|
|
|
|
|
|
parts = [p for p in src.split("/") if p]
|
|
|
|
|
|
if not parts:
|
|
|
|
|
|
continue
|
|
|
|
|
|
cur = tree
|
|
|
|
|
|
for d in parts[:-1]:
|
|
|
|
|
|
folder = ensure_folder(cur, d)
|
|
|
|
|
|
cur = folder.setdefault("children", []) # type: ignore
|
|
|
|
|
|
fname = parts[-1]
|
|
|
|
|
|
base = fname.rsplit(".", 1)[0]
|
|
|
|
|
|
file_node = {"name": base, "type": "FILE", "sortOrder": 100 + idx, "files": [{"languageId": 1, "objectName": obj, "fileName": fname, "fileSize": size}]}
|
|
|
|
|
|
cur.append(file_node) # type: ignore
|
|
|
|
|
|
return {"versionId": version_id, "tree": tree}
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/api/archive/stage")
|
|
|
|
|
|
async def api_archive_stage(file: UploadFile = File(...), prefix: Optional[str] = Form(None)):
|
|
|
|
|
|
try:
|
|
|
|
|
|
suffix = (file.filename or "").lower()
|
|
|
|
|
|
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
|
|
|
|
|
|
data = await file.read()
|
|
|
|
|
|
tmp.write(data)
|
|
|
|
|
|
tmp.flush(); tmp.close()
|
|
|
|
|
|
sid = uuid.uuid4().hex
|
|
|
|
|
|
STAGED_ARCHIVES[sid] = {"path": tmp.name, "prefix": (prefix or "")}
|
|
|
|
|
|
return _ok({"id": sid, "name": file.filename, "size": len(data)})
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
return _err(str(e))
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/api/archive/process")
|
|
|
|
|
|
async def api_archive_process(id: str = Form(...), prefix: Optional[str] = Form(None), versionId: Optional[int] = Form(1001)):
|
|
|
|
|
|
try:
|
|
|
|
|
|
st = STAGED_ARCHIVES.get(id)
|
|
|
|
|
|
if not st:
|
|
|
|
|
|
return _err("未找到已上传的压缩包")
|
|
|
|
|
|
tmp_path = Path(str(st.get("path")))
|
|
|
|
|
|
use_prefix_param = (prefix or str(st.get("prefix") or "")).strip()
|
|
|
|
|
|
client, bucket, public_base, env_prefix = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is None or bucket is None or not public_base:
|
|
|
|
|
|
return _err("MinIO 未配置")
|
|
|
|
|
|
use_prefix = (use_prefix_param or env_prefix or "").strip()
|
|
|
|
|
|
root = Path(tempfile.mkdtemp(prefix="extract_"))
|
|
|
|
|
|
try:
|
|
|
|
|
|
sfx = tmp_path.name.lower()
|
|
|
|
|
|
if sfx.endswith(".zip"):
|
|
|
|
|
|
import zipfile
|
|
|
|
|
|
with zipfile.ZipFile(str(tmp_path), "r") as zf:
|
|
|
|
|
|
_zip_extract_safely(zf, root)
|
|
|
|
|
|
elif ".tar" in sfx or sfx.endswith(".tgz") or sfx.endswith(".tar.gz") or sfx.endswith(".tar.bz2") or sfx.endswith(".tar.xz"):
|
|
|
|
|
|
import tarfile
|
|
|
|
|
|
with tarfile.open(str(tmp_path), "r:*") as tf:
|
|
|
|
|
|
_tar_extract_safely(tf, root)
|
|
|
|
|
|
else:
|
|
|
|
|
|
return _err("不支持的压缩格式")
|
|
|
|
|
|
try:
|
|
|
|
|
|
_bulk_upload_assets(root, client, bucket, public_base, use_prefix)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
processed: List[Dict[str, object]] = []
|
|
|
|
|
|
# Process existing Markdown files
|
|
|
|
|
|
for md in root.rglob("*.md"):
|
|
|
|
|
|
try:
|
|
|
|
|
|
text = md.read_text("utf-8", errors="ignore")
|
|
|
|
|
|
new_text, mappings = _rewrite_md_assets_to_minio(text, md.parent, client, bucket, public_base, use_prefix, search_root=root)
|
|
|
|
|
|
rel_md = md.relative_to(root)
|
|
|
|
|
|
rel_uplift = _uplift_rel_path(rel_md, md.parent, root, mappings)
|
|
|
|
|
|
obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/")
|
|
|
|
|
|
raw = new_text.encode("utf-8")
|
|
|
|
|
|
bio = io.BytesIO(raw)
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type="text/markdown; charset=utf-8") # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
url = f"{public_base}/{bucket}/{obj}"
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
ps = presigned_read(client, bucket, obj, exp) if client is not None else None
|
|
|
|
|
|
processed.append({"source": rel_uplift.as_posix(), "minio_url": url, "minio_presigned_url": ps, "mappings": mappings, "object_name": obj, "size": len(raw)})
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
processed.append({"source": (md.relative_to(root).as_posix()), "minio_url": None, "minio_presigned_url": None, "mappings": [], "object_name": None, "size": 0})
|
|
|
|
|
|
|
|
|
|
|
|
# Convert HTML files to Markdown and process
|
|
|
|
|
|
for html in [p for p in root.rglob("*") if p.is_file() and p.suffix.lower() in {".html", ".htm"}]:
|
|
|
|
|
|
try:
|
|
|
|
|
|
rel_html = html.relative_to(root)
|
|
|
|
|
|
md_target_rel = rel_html.with_suffix(".md")
|
|
|
|
|
|
md_sibling = (root / md_target_rel).exists()
|
|
|
|
|
|
if md_sibling:
|
|
|
|
|
|
continue
|
|
|
|
|
|
html_src = html.read_text("utf-8", errors="ignore")
|
|
|
|
|
|
html_rew, mappings = _rewrite_md_assets_to_minio(html_src, html.parent, client, bucket, public_base, use_prefix, search_root=root)
|
|
|
|
|
|
tmpd = Path(tempfile.mkdtemp(prefix="rew_html_"))
|
|
|
|
|
|
tmpf = tmpd / html.name
|
|
|
|
|
|
tmpf.write_text(html_rew, "utf-8")
|
|
|
|
|
|
enc, md_text, _art = _converter_v2.convert(str(tmpf), export="markdown")
|
|
|
|
|
|
md_text2, mappings2 = _rewrite_md_assets_to_minio(md_text, html.parent, client, bucket, public_base, use_prefix, search_root=root)
|
|
|
|
|
|
mappings = (mappings or []) + (mappings2 or [])
|
|
|
|
|
|
new_text = md_text2
|
|
|
|
|
|
rel_uplift = _uplift_rel_path(md_target_rel, html.parent, root, mappings)
|
|
|
|
|
|
obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/")
|
|
|
|
|
|
raw = new_text.encode(enc or "utf-8")
|
|
|
|
|
|
bio = io.BytesIO(raw)
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type="text/markdown; charset=utf-8") # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
url = f"{public_base}/{bucket}/{obj}"
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
ps = presigned_read(client, bucket, obj, exp) if client is not None else None
|
|
|
|
|
|
processed.append({"source": rel_uplift.as_posix(), "minio_url": url, "minio_presigned_url": ps, "mappings": mappings, "object_name": obj, "size": len(raw)})
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
processed.append({"source": (html.relative_to(root).as_posix()), "minio_url": None, "minio_presigned_url": None, "mappings": [], "object_name": None, "size": 0})
|
|
|
|
|
|
finally:
|
|
|
|
|
|
try:
|
|
|
|
|
|
shutil.rmtree(tmpd, ignore_errors=True)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
imp = _build_import_tree(processed, int(versionId or 1001))
|
|
|
|
|
|
return _ok({"count": len(processed), "files": processed, "import": imp})
|
|
|
|
|
|
finally:
|
|
|
|
|
|
try:
|
|
|
|
|
|
os.unlink(str(tmp_path))
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
try:
|
|
|
|
|
|
shutil.rmtree(root)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
try:
|
|
|
|
|
|
STAGED_ARCHIVES.pop(id, None)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
return _err(str(e))
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/api/upload-list")
|
|
|
|
|
|
async def api_upload_list(list_file: UploadFile = File(...), prefix: Optional[str] = Form(None), versionId: Optional[int] = Form(1001)):
|
|
|
|
|
|
try:
|
|
|
|
|
|
client, bucket, public_base, env_prefix = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is None or bucket is None or not public_base:
|
|
|
|
|
|
return _err("MinIO 未配置")
|
|
|
|
|
|
use_prefix = (prefix or env_prefix or "").strip()
|
|
|
|
|
|
raw = await list_file.read()
|
|
|
|
|
|
text = raw.decode("utf-8", errors="ignore")
|
|
|
|
|
|
lines = [l.strip() for l in text.splitlines()]
|
|
|
|
|
|
paths: List[str] = [l for l in lines if l and not l.startswith("#")]
|
|
|
|
|
|
locals: List[Path] = []
|
|
|
|
|
|
for p in paths:
|
|
|
|
|
|
if p.startswith("http://") or p.startswith("https://"):
|
|
|
|
|
|
pass
|
|
|
|
|
|
else:
|
|
|
|
|
|
lp = Path(p).expanduser()
|
|
|
|
|
|
if lp.exists() and lp.is_file():
|
|
|
|
|
|
locals.append(lp.resolve())
|
|
|
|
|
|
base_root = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
if locals:
|
|
|
|
|
|
base_root = Path(os.path.commonpath([str(x) for x in locals]))
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
base_root = None
|
|
|
|
|
|
processed: List[Dict[str, object]] = []
|
|
|
|
|
|
for p in locals:
|
|
|
|
|
|
try:
|
|
|
|
|
|
content = p.read_text("utf-8", errors="ignore")
|
|
|
|
|
|
new_text, mappings = _rewrite_md_assets_to_minio(content, p.parent, client, bucket, public_base, use_prefix, search_root=base_root)
|
|
|
|
|
|
rel0 = p.relative_to(base_root) if base_root else Path(p.name)
|
|
|
|
|
|
rel_uplift = _uplift_rel_path(rel0, p.parent, base_root, mappings)
|
|
|
|
|
|
obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/")
|
|
|
|
|
|
raw_md = new_text.encode("utf-8")
|
|
|
|
|
|
bio = io.BytesIO(raw_md)
|
|
|
|
|
|
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw_md), content_type="text/markdown; charset=utf-8") # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
from urllib.parse import quote as _quote
|
|
|
|
|
|
url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
url = f"{public_base}/{bucket}/{obj}"
|
|
|
|
|
|
exp = int(timedelta(hours=12).total_seconds())
|
|
|
|
|
|
ps = presigned_read(client, bucket, obj, exp) if client is not None else None
|
|
|
|
|
|
processed.append({"source": rel_uplift.as_posix(), "minio_url": url, "minio_presigned_url": ps, "mappings": mappings, "object_name": obj, "size": len(raw_md)})
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
processed.append({"source": p.name, "minio_url": None, "minio_presigned_url": None, "mappings": [], "object_name": None, "size": 0})
|
|
|
|
|
|
imp = _build_import_tree(processed, int(versionId or 1001))
|
|
|
|
|
|
return _ok({"count": len(processed), "files": processed, "import": imp})
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
return _err(str(e))
|
|
|
|
|
|
@app.get("/config/minio/policy")
|
|
|
|
|
|
async def get_minio_policy(bucket: Optional[str] = None):
|
|
|
|
|
|
client, cfg_bucket, _, _ = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is None:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="MinIO 未配置")
|
|
|
|
|
|
bkt = (bucket or cfg_bucket or "").strip()
|
|
|
|
|
|
if not bkt:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="bucket 不能为空")
|
|
|
|
|
|
try:
|
|
|
|
|
|
pol = client.get_bucket_policy(bucket_name=bkt) # type: ignore
|
|
|
|
|
|
try:
|
|
|
|
|
|
import json as _json
|
|
|
|
|
|
data = _json.loads(pol)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
data = {"raw": pol}
|
|
|
|
|
|
return {"ok": True, "bucket": bkt, "policy": data}
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
try:
|
|
|
|
|
|
try:
|
|
|
|
|
|
region = client._get_region(bkt) # type: ignore
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
region = "us-east-1"
|
|
|
|
|
|
resp = client._url_open(method="GET", region=region, bucket_name=bkt, query_params={"policy": ""}) # type: ignore
|
|
|
|
|
|
raw = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw = getattr(resp, "data", None)
|
|
|
|
|
|
if raw is not None and hasattr(raw, "decode"):
|
|
|
|
|
|
raw = raw.decode("utf-8")
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
raw = None
|
|
|
|
|
|
if raw is None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
raw = resp.read().decode("utf-8") # type: ignore
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
raw = ""
|
|
|
|
|
|
try:
|
|
|
|
|
|
import json as _json
|
|
|
|
|
|
data = _json.loads(raw)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
data = {"raw": raw}
|
|
|
|
|
|
return {"ok": True, "bucket": bkt, "policy": data}
|
|
|
|
|
|
except Exception as e2:
|
|
|
|
|
|
return {"ok": False, "bucket": bkt, "error": str(e2)}
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/config/minio/apply_public_read")
|
|
|
|
|
|
async def apply_public_read(bucket: Optional[str] = Form(None), enable: Optional[str] = Form("true")):
|
|
|
|
|
|
client, cfg_bucket, _, _ = minio_current(RUNTIME_CONFIG)
|
|
|
|
|
|
if client is None:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="MinIO 未配置")
|
|
|
|
|
|
bkt = (bucket or cfg_bucket or "").strip()
|
|
|
|
|
|
if not bkt:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="bucket 不能为空")
|
|
|
|
|
|
try:
|
|
|
|
|
|
import json as _json
|
|
|
|
|
|
if str(enable or "true").lower() in {"1","true","yes","on"}:
|
|
|
|
|
|
policy = {
|
|
|
|
|
|
"Version": "2012-10-17",
|
|
|
|
|
|
"Statement": [
|
|
|
|
|
|
{"Effect": "Allow", "Principal": "*", "Action": ["s3:GetBucketLocation", "s3:ListBucket"], "Resource": [f"arn:aws:s3:::{bkt}"]},
|
|
|
|
|
|
{"Effect": "Allow", "Principal": "*", "Action": ["s3:GetObject"], "Resource": [f"arn:aws:s3:::{bkt}/*"]},
|
|
|
|
|
|
],
|
|
|
|
|
|
}
|
|
|
|
|
|
try:
|
|
|
|
|
|
client.set_bucket_policy(bucket_name=bkt, policy=_json.dumps(policy)) # type: ignore
|
|
|
|
|
|
return {"ok": True, "bucket": bkt, "applied": True}
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
try:
|
|
|
|
|
|
try:
|
|
|
|
|
|
region = client._get_region(bkt) # type: ignore
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
region = "us-east-1"
|
|
|
|
|
|
raw = _json.dumps(policy).encode("utf-8")
|
|
|
|
|
|
client._url_open(method="PUT", region=region, bucket_name=bkt, query_params={"policy": ""}, body=raw) # type: ignore
|
|
|
|
|
|
return {"ok": True, "bucket": bkt, "applied": True}
|
|
|
|
|
|
except Exception as e2:
|
|
|
|
|
|
return {"ok": False, "bucket": bkt, "error": str(e2)}
|
|
|
|
|
|
try:
|
|
|
|
|
|
client.delete_bucket_policy(bkt) # type: ignore
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return {"ok": True, "bucket": bkt, "applied": False}
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
return {"ok": False, "bucket": bkt, "error": str(e)}
|