2994 lines
132 KiB
Python
2994 lines
132 KiB
Python
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request, Query
|
||
from fastapi.responses import Response, HTMLResponse, JSONResponse, FileResponse
|
||
from fastapi.staticfiles import StaticFiles
|
||
from fastapi.middleware.cors import CORSMiddleware
|
||
from pathlib import Path
|
||
import tempfile
|
||
import os
|
||
import asyncio
|
||
from typing import Optional, List, Dict, Tuple
|
||
from datetime import timedelta
|
||
import mimetypes
|
||
from urllib.request import urlopen, Request
|
||
from urllib.error import HTTPError, URLError
|
||
from urllib.parse import urlsplit, urlunsplit, quote, unquote
|
||
import logging
|
||
import traceback
|
||
import time
|
||
import re
|
||
import io
|
||
import shutil
|
||
import uuid
|
||
import subprocess
|
||
import sys
|
||
import json
|
||
try:
|
||
from minio import Minio # type: ignore
|
||
import urllib3 # type: ignore
|
||
except Exception:
|
||
Minio = None
|
||
urllib3 = None # type: ignore
|
||
from pydantic import BaseModel
|
||
|
||
class ConvertResponse(BaseModel):
|
||
minio_url: Optional[str]
|
||
minio_presigned_url: Optional[str]
|
||
name: str
|
||
media_type: str
|
||
|
||
class MinioPresignResponse(BaseModel):
|
||
bucket: str
|
||
object: str
|
||
minio_url: Optional[str]
|
||
minio_presigned_url: Optional[str]
|
||
expires: int
|
||
|
||
try:
|
||
import fitz # type: ignore
|
||
except Exception:
|
||
fitz = None # type: ignore
|
||
from app.services.docling_adapter import (
|
||
convert_source,
|
||
md_to_docx_bytes,
|
||
md_to_pdf_bytes_with_renderer,
|
||
infer_basename,
|
||
sanitize_filename,
|
||
load_linkmap,
|
||
save_linkmap,
|
||
)
|
||
from app.services.unified_converter import FormatConverter
|
||
from app.services.minio_utils import minio_current, join_prefix, presigned_read
|
||
|
||
"""
|
||
@api Server Application
|
||
@description FastAPI server providing document conversion endpoints and MinIO integration
|
||
"""
|
||
|
||
app = FastAPI()
|
||
app.add_middleware(
|
||
CORSMiddleware,
|
||
allow_origins=["*"],
|
||
allow_methods=["*"],
|
||
allow_headers=["*"],
|
||
)
|
||
|
||
try:
|
||
_ui_dir = Path(__file__).resolve().parents[2] / "frontend" / "dist"
|
||
if _ui_dir.exists():
|
||
app.mount("/ui", StaticFiles(directory=str(_ui_dir), html=True), name="ui")
|
||
try:
|
||
assets_dir = _ui_dir / "assets"
|
||
if assets_dir.exists():
|
||
app.mount("/assets", StaticFiles(directory=str(assets_dir)), name="assets")
|
||
except Exception:
|
||
pass
|
||
try:
|
||
svg_path = _ui_dir / "vite.svg"
|
||
if svg_path.exists():
|
||
@app.get("/vite.svg")
|
||
def _vite_svg():
|
||
return FileResponse(str(svg_path), media_type="image/svg+xml")
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
|
||
|
||
@app.get("/health")
|
||
def health():
|
||
"""
|
||
@function health
|
||
@description Health check endpoint
|
||
@return {"status": "ok"}
|
||
"""
|
||
return {"status": "ok"}
|
||
|
||
@app.post("/convert")
|
||
async def convert(
|
||
file: Optional[UploadFile] = File(None),
|
||
source_url: Optional[str] = Form(None),
|
||
export: str = Form("markdown"),
|
||
save: Optional[bool] = Form(False),
|
||
filename: Optional[str] = Form(None),
|
||
):
|
||
"""
|
||
@function convert
|
||
@description Convert various document formats to Markdown/HTML/JSON
|
||
@param file Uploaded file (optional)
|
||
@param source_url URL of the source document (optional)
|
||
@param export Target export format (default: markdown)
|
||
@param save Whether to save to MinIO (default: False)
|
||
@param filename Custom filename for the output
|
||
@return JSON response with conversion result or MinIO URL
|
||
"""
|
||
if (file is None and not source_url) or (file is not None and source_url):
|
||
raise HTTPException(status_code=400, detail="provide exactly one of file or source_url")
|
||
export = _normalize_export(export)
|
||
if source_url:
|
||
enc, content, artifacts_dir = await asyncio.to_thread(_converter_v2.convert, source_url, export=export)
|
||
base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(source_url, None))
|
||
out_ext = _export_ext(export)
|
||
ct = _media_type(export)
|
||
if export.lower() == "markdown":
|
||
try:
|
||
client_rw, bucket_rw, public_rw, prefix_rw = minio_current(RUNTIME_CONFIG)
|
||
if client_rw is not None and bucket_rw and public_rw:
|
||
base_dir = Path(artifacts_dir) if artifacts_dir else Path(tempfile.mkdtemp(prefix="md_assets_"))
|
||
new_text, _ms = _rewrite_md_assets_to_minio(
|
||
content,
|
||
base_dir,
|
||
client_rw,
|
||
bucket_rw,
|
||
public_rw,
|
||
prefix_rw,
|
||
search_root=(Path(artifacts_dir) if artifacts_dir else None),
|
||
)
|
||
content = new_text
|
||
try:
|
||
if artifacts_dir:
|
||
_bulk_upload_assets(Path(artifacts_dir), client_rw, bucket_rw, public_rw, prefix_rw)
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
pass
|
||
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
||
if client is None or not bucket or not public_base:
|
||
raise HTTPException(status_code=400, detail="MinIO is not configured for save")
|
||
rc_store_final = str(RUNTIME_CONFIG.get("minio", {}).get("store_final") or "true").lower() in {"1","true","yes","on"}
|
||
if not rc_store_final:
|
||
raise HTTPException(status_code=400, detail="Saving to MinIO is disabled by configuration")
|
||
out_name = f"{base}{out_ext}"
|
||
obj = join_prefix(prefix, f"converted/{out_name}")
|
||
raw = content.encode(enc or "utf-8")
|
||
bio = io.BytesIO(raw)
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type=ct) # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
||
except Exception:
|
||
minio_url = f"{public_base}/{bucket}/{obj}"
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
minio_presigned_url = presigned_read(client, bucket, obj, exp)
|
||
resp = JSONResponse({
|
||
"minio_url": minio_url,
|
||
"minio_presigned_url": minio_presigned_url,
|
||
"name": out_name,
|
||
"export": export,
|
||
"media_type": ct
|
||
})
|
||
try:
|
||
if artifacts_dir:
|
||
shutil.rmtree(artifacts_dir, ignore_errors=True)
|
||
except Exception:
|
||
pass
|
||
return resp
|
||
assert file is not None
|
||
suffix = ""
|
||
if file.filename and "." in file.filename:
|
||
suffix = "." + file.filename.rsplit(".", 1)[-1]
|
||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||
tmp.write(await file.read())
|
||
tmp_path = tmp.name
|
||
try:
|
||
enc, content, artifacts_dir = await asyncio.to_thread(_converter_v2.convert, tmp_path, export=export)
|
||
base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(None, file.filename))
|
||
out_ext = _export_ext(export)
|
||
ct = _media_type(export)
|
||
if export.lower() == "markdown":
|
||
try:
|
||
client_rw, bucket_rw, public_rw, prefix_rw = minio_current(RUNTIME_CONFIG)
|
||
if client_rw is not None and bucket_rw and public_rw:
|
||
base_dir = Path(artifacts_dir) if artifacts_dir else Path(tempfile.mkdtemp(prefix="md_assets_"))
|
||
new_text, _ms = _rewrite_md_assets_to_minio(
|
||
content,
|
||
base_dir,
|
||
client_rw,
|
||
bucket_rw,
|
||
public_rw,
|
||
prefix_rw,
|
||
search_root=(Path(artifacts_dir) if artifacts_dir else None),
|
||
)
|
||
content = new_text
|
||
try:
|
||
if artifacts_dir:
|
||
_bulk_upload_assets(Path(artifacts_dir), client_rw, bucket_rw, public_rw, prefix_rw)
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
pass
|
||
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
||
if client is None or not bucket or not public_base:
|
||
raise HTTPException(status_code=400, detail="MinIO is not configured for save")
|
||
rc_store_final = str(RUNTIME_CONFIG.get("minio", {}).get("store_final") or "true").lower() in {"1","true","yes","on"}
|
||
if not rc_store_final:
|
||
raise HTTPException(status_code=400, detail="Saving to MinIO is disabled by configuration")
|
||
out_name = f"{base}{out_ext}"
|
||
obj = join_prefix(prefix, f"converted/{out_name}")
|
||
raw = content.encode(enc or "utf-8")
|
||
bio = io.BytesIO(raw)
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type=ct) # type: ignore
|
||
minio_url = f"{public_base}/{bucket}/{obj}"
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
minio_presigned_url = presigned_read(client, bucket, obj, exp)
|
||
resp = JSONResponse({
|
||
"minio_url": minio_url,
|
||
"minio_presigned_url": minio_presigned_url,
|
||
"name": out_name,
|
||
"export": export,
|
||
"media_type": ct
|
||
})
|
||
try:
|
||
if artifacts_dir:
|
||
shutil.rmtree(artifacts_dir, ignore_errors=True)
|
||
except Exception:
|
||
pass
|
||
return resp
|
||
finally:
|
||
try:
|
||
os.remove(tmp_path)
|
||
except Exception:
|
||
pass
|
||
|
||
profiles_dir = Path(__file__).parent / "configs"
|
||
profiles_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
@app.get("/")
|
||
def index():
|
||
return JSONResponse({"ok": True, "service": "docling-api", "version": "v2"})
|
||
|
||
@app.get("/@vite/client")
|
||
def vite_client_stub():
|
||
return JSONResponse({"ok": True})
|
||
|
||
@app.get("/refresh.js")
|
||
def refresh_js_stub():
|
||
return Response(content="window.initClient=function(){},window.addRefresh=function(){};", media_type="application/javascript")
|
||
|
||
|
||
|
||
RUNTIME_CONFIG: Dict[str, Dict[str, Optional[str]]] = {
|
||
"minio": {
|
||
"endpoint": None,
|
||
"public": None,
|
||
"access": None,
|
||
"secret": None,
|
||
"bucket": None,
|
||
"secure": None,
|
||
"prefix": None,
|
||
"store_final": "true",
|
||
"public_read": "true",
|
||
},
|
||
"db": {
|
||
"webhook_url": None,
|
||
"token": None,
|
||
},
|
||
}
|
||
|
||
def _normalize_export(export: str) -> str:
|
||
e = (export or "").strip().lower()
|
||
allowed = {"markdown", "html", "json", "doctags"}
|
||
if e not in allowed:
|
||
raise HTTPException(status_code=422, detail="unsupported export")
|
||
return e
|
||
|
||
def _normalize_engine(engine: Optional[str]) -> Optional[str]:
|
||
if engine is None:
|
||
return None
|
||
e = (engine or "").strip().lower()
|
||
allowed = {"docling", "word2markdown", "pandoc", "custom"}
|
||
if e not in allowed:
|
||
raise HTTPException(status_code=422, detail="unsupported engine")
|
||
return e
|
||
|
||
def _fix_garbled_name(name: str) -> str:
|
||
try:
|
||
s = name
|
||
t = s.strip()
|
||
# If pure ASCII, no fix needed
|
||
if all(ord(c) < 128 for c in t):
|
||
return name
|
||
# Try to reconstruct original bytes assuming CP437 (Zip default when UTF-8 flag not set)
|
||
try:
|
||
raw = s.encode("cp437", errors="strict")
|
||
except UnicodeEncodeError:
|
||
# Not CP437 mojibake, keep original
|
||
return name
|
||
encs = [
|
||
"gb18030",
|
||
"gbk",
|
||
"cp936",
|
||
"utf-8",
|
||
"big5",
|
||
"cp950",
|
||
"shift_jis",
|
||
"cp932",
|
||
"cp949",
|
||
"euc-kr",
|
||
"euc-jp",
|
||
]
|
||
for e in encs:
|
||
try:
|
||
fixed = raw.decode(e)
|
||
if fixed:
|
||
return fixed
|
||
except Exception:
|
||
continue
|
||
except Exception:
|
||
pass
|
||
return name
|
||
|
||
def _safe_target(base: Path, name: str) -> Optional[Path]:
|
||
try:
|
||
n = name.replace("\\", "/").lstrip("/")
|
||
parts = [p for p in n.split("/") if p and p not in {".", ".."}]
|
||
tgt = base / "/".join(parts)
|
||
rp = tgt.resolve()
|
||
rb = base.resolve()
|
||
try:
|
||
rp.relative_to(rb)
|
||
except Exception:
|
||
return None
|
||
return rp
|
||
except Exception:
|
||
return None
|
||
|
||
def _zip_extract_safely(zf: object, dest: Path) -> None:
|
||
try:
|
||
for zi in zf.infolist(): # type: ignore
|
||
try:
|
||
name = str(getattr(zi, "filename", ""))
|
||
flag = int(getattr(zi, "flag_bits", 0))
|
||
use = name
|
||
if (flag & 0x800) == 0:
|
||
use = _fix_garbled_name(name)
|
||
target = _safe_target(dest, use)
|
||
if target is None:
|
||
continue
|
||
if hasattr(zi, "is_dir") and zi.is_dir(): # type: ignore
|
||
target.mkdir(parents=True, exist_ok=True)
|
||
continue
|
||
target.parent.mkdir(parents=True, exist_ok=True)
|
||
with zf.open(zi, "r") as src: # type: ignore
|
||
data = src.read()
|
||
with open(target, "wb") as out:
|
||
out.write(data)
|
||
except Exception:
|
||
continue
|
||
except Exception:
|
||
pass
|
||
|
||
def _tar_extract_safely(tf: object, dest: Path) -> None:
|
||
try:
|
||
for m in tf.getmembers(): # type: ignore
|
||
try:
|
||
name = str(getattr(m, "name", ""))
|
||
use = _fix_garbled_name(name)
|
||
target = _safe_target(dest, use)
|
||
if target is None:
|
||
continue
|
||
if getattr(m, "isdir", lambda: False)():
|
||
target.mkdir(parents=True, exist_ok=True)
|
||
continue
|
||
target.parent.mkdir(parents=True, exist_ok=True)
|
||
f = tf.extractfile(m) # type: ignore
|
||
if f is None:
|
||
continue
|
||
data = f.read()
|
||
with open(target, "wb") as out:
|
||
out.write(data)
|
||
except Exception:
|
||
continue
|
||
except Exception:
|
||
pass
|
||
|
||
def _minio_head_bucket(client: object, bucket: str) -> bool:
|
||
try:
|
||
if hasattr(client, "bucket_exists"):
|
||
try:
|
||
return bool(client.bucket_exists(bucket)) # type: ignore
|
||
except Exception:
|
||
pass
|
||
try:
|
||
region = client._get_region(bucket) # type: ignore
|
||
except Exception:
|
||
region = "us-east-1"
|
||
client._url_open(method="HEAD", region=region, bucket_name=bucket) # type: ignore
|
||
return True
|
||
except Exception:
|
||
try:
|
||
names = [getattr(b, "name", None) for b in client.list_buckets()] # type: ignore
|
||
return bucket in set(n for n in names if n)
|
||
except Exception:
|
||
return False
|
||
|
||
def _minio_create_bucket(client: object, bucket: str) -> bool:
|
||
# Prefer SDK methods, fallback to low-level call
|
||
try:
|
||
if hasattr(client, "bucket_exists"):
|
||
try:
|
||
if client.bucket_exists(bucket): # type: ignore
|
||
return True
|
||
except Exception:
|
||
pass
|
||
if hasattr(client, "make_bucket"):
|
||
try:
|
||
client.make_bucket(bucket) # type: ignore
|
||
return True
|
||
except Exception:
|
||
try:
|
||
region = client._get_region(bucket) # type: ignore
|
||
except Exception:
|
||
region = "us-east-1"
|
||
try:
|
||
client.make_bucket(bucket, location=region) # type: ignore
|
||
return True
|
||
except Exception:
|
||
pass
|
||
try:
|
||
try:
|
||
region = client._get_region(bucket) # type: ignore
|
||
except Exception:
|
||
region = "us-east-1"
|
||
client._url_open(method="PUT", region=region, bucket_name=bucket) # type: ignore
|
||
return True
|
||
except Exception as ce:
|
||
if "BucketAlreadyOwnedByYou" in str(ce) or "BucketAlreadyExists" in str(ce):
|
||
return True
|
||
raise
|
||
except Exception as e:
|
||
raise e
|
||
def _minio_client(endpoint: str, access: str, secret: str, secure: bool):
|
||
if urllib3 is not None:
|
||
try:
|
||
http = urllib3.PoolManager(timeout=urllib3.Timeout(connect=3.0, read=20.0))
|
||
return Minio(endpoint=endpoint, access_key=access, secret_key=secret, secure=secure, http_client=http) # type: ignore
|
||
except Exception:
|
||
return Minio(endpoint=endpoint, access_key=access, secret_key=secret, secure=secure) # type: ignore
|
||
return Minio(endpoint=endpoint, access_key=access, secret_key=secret, secure=secure) # type: ignore
|
||
def _minio_time_hint(endpoint: str, secure: bool) -> Optional[str]:
|
||
try:
|
||
scheme = "https" if secure else "http"
|
||
r = urlopen(f"{scheme}://{endpoint}", timeout=3)
|
||
srv_date = r.headers.get("Date")
|
||
if not srv_date:
|
||
return None
|
||
from email.utils import parsedate_to_datetime
|
||
from datetime import datetime, timezone
|
||
dt = parsedate_to_datetime(srv_date)
|
||
now = datetime.now(timezone.utc)
|
||
diff = abs((now - dt).total_seconds())
|
||
return f"服务器时间与本机相差约 {int(diff)} 秒"
|
||
except Exception:
|
||
return None
|
||
|
||
|
||
def _db_notify(payload: Dict[str, object]):
|
||
try:
|
||
import requests # type: ignore
|
||
except Exception:
|
||
return
|
||
url = (RUNTIME_CONFIG.get("db", {}).get("webhook_url") or "").strip()
|
||
if not url:
|
||
return
|
||
token = (RUNTIME_CONFIG.get("db", {}).get("token") or "")
|
||
headers = {"Content-Type": "application/json"}
|
||
if token:
|
||
headers["Authorization"] = f"Bearer {token}"
|
||
try:
|
||
requests.post(url, json=payload, headers=headers, timeout=5)
|
||
except Exception:
|
||
pass
|
||
|
||
@app.post("/config/minio")
|
||
async def set_minio_config(
|
||
endpoint: str = Form(...),
|
||
public: Optional[str] = Form(None),
|
||
access: str = Form(...),
|
||
secret: str = Form(...),
|
||
bucket: str = Form(...),
|
||
secure: Optional[str] = Form("false"),
|
||
prefix: Optional[str] = Form(None),
|
||
store_final: Optional[str] = Form("true"),
|
||
public_read: Optional[str] = Form("true"),
|
||
):
|
||
ep_raw = (endpoint or "").strip()
|
||
ep_host = ep_raw
|
||
try:
|
||
from urllib.parse import urlsplit
|
||
u = urlsplit(ep_raw)
|
||
if u.scheme:
|
||
ep_host = (u.netloc or ep_raw).split("/")[0]
|
||
else:
|
||
ep_host = ep_raw.split("/")[0]
|
||
except Exception:
|
||
ep_host = ep_raw.split("/")[0]
|
||
# reject console port or console paths for endpoint
|
||
try:
|
||
if (":9001" in ep_host) or ("/browser" in ep_raw) or ("/minio" in ep_raw):
|
||
return {"ok": False, "error": "请使用 MinIO API 端口 9000(而非 9001 控制台)"}
|
||
except Exception:
|
||
pass
|
||
pub_val = public
|
||
try:
|
||
from urllib.parse import urlsplit
|
||
pu = urlsplit((public or "").strip())
|
||
if (pu.netloc.endswith(":9001") or "/browser" in (public or "") or "/minio" in (public or "")):
|
||
pub_val = None
|
||
except Exception:
|
||
if public and (":9001" in public or "/browser" in public or "/minio" in public):
|
||
pub_val = None
|
||
# ensure public has scheme
|
||
try:
|
||
if pub_val:
|
||
from urllib.parse import urlsplit
|
||
pu = urlsplit(pub_val.strip())
|
||
scheme = pu.scheme or ("https" if str(secure or "false").lower() in {"1","true","yes","on"} else "http")
|
||
host = pu.netloc or pu.path.split("/")[0]
|
||
pub_val = f"{scheme}://{host}"
|
||
except Exception:
|
||
try:
|
||
if pub_val:
|
||
host = pub_val.strip().split("/")[0]
|
||
scheme = "https" if str(secure or "false").lower() in {"1","true","yes","on"} else "http"
|
||
pub_val = f"{scheme}://{host}"
|
||
except Exception:
|
||
pass
|
||
RUNTIME_CONFIG["minio"].update({
|
||
"endpoint": ep_host,
|
||
"public": pub_val,
|
||
"access": access,
|
||
"secret": secret,
|
||
"bucket": bucket,
|
||
"secure": secure,
|
||
"prefix": prefix,
|
||
"store_final": store_final,
|
||
"public_read": public_read,
|
||
})
|
||
client, bkt, pub, _ = minio_current(RUNTIME_CONFIG)
|
||
if client is None or not bkt or not pub:
|
||
return {"ok": False, "error": "MinIO config invalid"}
|
||
try:
|
||
pr = str(public_read or "true").lower() in {"1","true","yes","on"}
|
||
if pr:
|
||
policy = {
|
||
"Version": "2012-10-17",
|
||
"Statement": [
|
||
{"Effect": "Allow", "Principal": "*", "Action": ["s3:GetBucketLocation", "s3:ListBucket"], "Resource": [f"arn:aws:s3:::{bkt}"]},
|
||
{"Effect": "Allow", "Principal": "*", "Action": ["s3:GetObject"], "Resource": [f"arn:aws:s3:::{bkt}/*"]},
|
||
],
|
||
}
|
||
import json as _json
|
||
client.set_bucket_policy(bucket_name=bkt, policy=_json.dumps(policy)) # type: ignore
|
||
else:
|
||
try:
|
||
client.delete_bucket_policy(bkt) # type: ignore
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
pass
|
||
return {"ok": True}
|
||
|
||
@app.post("/config/minio/test")
|
||
async def test_minio_config(
|
||
endpoint: str = Form(...),
|
||
public: Optional[str] = Form(None),
|
||
access: str = Form(...),
|
||
secret: str = Form(...),
|
||
bucket: str = Form(...),
|
||
secure: Optional[str] = Form("false"),
|
||
create_if_missing: Optional[str] = Form("true"),
|
||
public_read: Optional[str] = Form("false"),
|
||
):
|
||
if Minio is None:
|
||
return {"ok": False, "connected": False, "bucket_exists": False, "error": "minio client not available"}
|
||
try:
|
||
sec = str(secure or "false").lower() in {"1","true","yes","on"}
|
||
ep_raw = (endpoint or "").strip()
|
||
ep_host = ep_raw
|
||
try:
|
||
from urllib.parse import urlsplit
|
||
u = urlsplit(ep_raw)
|
||
if u.scheme:
|
||
ep_host = (u.netloc or ep_raw).split("/")[0]
|
||
else:
|
||
ep_host = ep_raw.split("/")[0]
|
||
except Exception:
|
||
ep_host = ep_raw.split("/")[0]
|
||
if ":9001" in ep_host or "/browser" in ep_raw or "/minio" in ep_raw:
|
||
return {"ok": False, "connected": False, "bucket_exists": False, "error": "请使用 MinIO API 端口 9000(而非 9001 控制台)"}
|
||
client = _minio_client(endpoint=ep_host, access=access, secret=secret, secure=sec)
|
||
# handshake fallback
|
||
try:
|
||
try:
|
||
client.list_buckets() # type: ignore
|
||
except Exception as e:
|
||
if sec and ("SSL" in str(e) or "HTTPSConnectionPool" in str(e) or "SSLError" in str(e)):
|
||
client = _minio_client(endpoint=ep_host, access=access, secret=secret, secure=False)
|
||
sec = False
|
||
except Exception:
|
||
pass
|
||
exists = False
|
||
created = False
|
||
exists = _minio_head_bucket(client, bucket)
|
||
if not exists and str(create_if_missing or "true").lower() in {"1","true","yes","on"}:
|
||
if _minio_create_bucket(client, bucket):
|
||
exists = True
|
||
created = True
|
||
# 始终根据 public_read 应用/移除策略(即使桶已存在)
|
||
try:
|
||
import json as _json
|
||
if str(public_read or "false").lower() in {"1","true","yes","on"}:
|
||
policy = {
|
||
"Version": "2012-10-17",
|
||
"Statement": [
|
||
{"Effect": "Allow", "Principal": "*", "Action": ["s3:GetBucketLocation", "s3:ListBucket"], "Resource": [f"arn:aws:s3:::{bucket}"]},
|
||
{"Effect": "Allow", "Principal": "*", "Action": ["s3:GetObject"], "Resource": [f"arn:aws:s3:::{bucket}/*"]},
|
||
],
|
||
}
|
||
client.set_bucket_policy(bucket_name=bucket, policy=_json.dumps(policy)) # type: ignore
|
||
else:
|
||
try:
|
||
client.delete_bucket_policy(bucket) # type: ignore
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
pass
|
||
return {"ok": True, "connected": True, "bucket_exists": exists, "created": created, "hint": ("使用 HTTPS 访问 9000 端口可能失败,请确认启用 HTTPS 与证书配置匹配" if sec and (public or "").startswith("http://") else None)}
|
||
except Exception as e:
|
||
hint = None
|
||
if "RequestTimeTooSkewed" in str(e):
|
||
hint = _minio_time_hint(ep_host, sec)
|
||
return {"ok": False, "connected": False, "bucket_exists": False, "error": str(e), "hint": hint}
|
||
|
||
@app.get("/config/profile/list")
|
||
async def list_profiles():
|
||
names: List[str] = []
|
||
try:
|
||
for p in profiles_dir.rglob("*.json"):
|
||
try:
|
||
names.append(p.stem)
|
||
except Exception:
|
||
continue
|
||
except Exception:
|
||
pass
|
||
return {"ok": True, "profiles": sorted(set(names))}
|
||
|
||
@app.post("/config/profile/activate")
|
||
async def activate_profile(name: str = Form(...)):
|
||
target = None
|
||
try:
|
||
for p in profiles_dir.rglob("*.json"):
|
||
if p.stem.lower() == (name or "").strip().lower():
|
||
target = p
|
||
break
|
||
if target is None:
|
||
raise HTTPException(status_code=404, detail="profile not found")
|
||
active_path = profiles_dir / "active.json"
|
||
data = json.loads(target.read_text("utf-8"))
|
||
# 应用并覆盖到运行时配置
|
||
try:
|
||
minio_cfg = data.get("minio", {})
|
||
if isinstance(minio_cfg, dict) and minio_cfg:
|
||
sanitized = dict(minio_cfg)
|
||
try:
|
||
ep = str(sanitized.get("endpoint") or "").strip()
|
||
if ep and ":9001" in ep:
|
||
h = ep.split("/")[0]
|
||
if ":" in h:
|
||
parts = h.split(":")
|
||
sanitized["endpoint"] = f"{parts[0]}:9000"
|
||
else:
|
||
sanitized["endpoint"] = h
|
||
except Exception:
|
||
pass
|
||
try:
|
||
pub = str(sanitized.get("public") or "").strip()
|
||
if pub and (":9001" in pub or "/browser" in pub or "/minio" in pub):
|
||
host = pub.split("/")[0]
|
||
sec = str(sanitized.get("secure") or RUNTIME_CONFIG.get("minio", {}).get("secure") or "false").lower() in {"1","true","yes","on"}
|
||
scheme = "https" if sec else "http"
|
||
if ":" in host:
|
||
base_host = host.split(":")[0]
|
||
sanitized["public"] = f"{scheme}://{base_host}:9000"
|
||
else:
|
||
sanitized["public"] = f"{scheme}://{host}:9000"
|
||
except Exception:
|
||
pass
|
||
RUNTIME_CONFIG["minio"].update(sanitized)
|
||
except Exception:
|
||
pass
|
||
try:
|
||
db_cfg = data.get("db", {})
|
||
if isinstance(db_cfg, dict) and db_cfg:
|
||
RUNTIME_CONFIG["db"].update(db_cfg)
|
||
except Exception:
|
||
pass
|
||
# 写入 active.json 以便后续观察者检测到变更
|
||
active_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), "utf-8")
|
||
return {"ok": True, "active": target.stem}
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
|
||
@app.get("/system/time/check")
|
||
def system_time_check(
|
||
endpoint: Optional[str] = Query(None),
|
||
public: Optional[str] = Query(None),
|
||
secure: Optional[str] = Query(None),
|
||
):
|
||
try:
|
||
rc = RUNTIME_CONFIG.get("minio", {})
|
||
ep_raw = (endpoint or rc.get("endpoint") or "").strip()
|
||
pub_raw = (public or rc.get("public") or "").strip()
|
||
sec_flag = secure if secure is not None else (rc.get("secure") or "false")
|
||
sec = str(sec_flag or "false").lower() in {"1","true","yes","on"}
|
||
scheme = "https" if sec else "http"
|
||
# 解析 host(优先 public,其次 endpoint)
|
||
def _host(s: str) -> str:
|
||
try:
|
||
from urllib.parse import urlsplit
|
||
u = urlsplit(s)
|
||
return (u.netloc or s).split("/")[0] if u.scheme else s.split("/")[0]
|
||
except Exception:
|
||
return s.split("/")[0]
|
||
base_host = _host(pub_raw or ep_raw)
|
||
if not base_host:
|
||
from datetime import datetime, timezone
|
||
now = datetime.now(timezone.utc)
|
||
return {"ok": True, "server_time": None, "local_time": now.isoformat(), "diff_sec": None, "hint": "未配置 MinIO 端点"}
|
||
# 构造候选检测 URL(尽量使用 MinIO 健康端点以获取标准 Date 头)
|
||
base = f"{scheme}://{base_host}"
|
||
candidates = [
|
||
base,
|
||
base + "/minio/health/live",
|
||
base + "/minio/health/ready",
|
||
base + "/minio/health/version",
|
||
]
|
||
srv_date = None
|
||
for url in candidates:
|
||
try:
|
||
req = Request(url, method="HEAD")
|
||
r = urlopen(req, timeout=3)
|
||
d = r.headers.get("Date") or r.headers.get("date")
|
||
if d:
|
||
srv_date = d
|
||
break
|
||
except Exception:
|
||
try:
|
||
r = urlopen(url, timeout=3)
|
||
d = r.headers.get("Date") or r.headers.get("date")
|
||
if d:
|
||
srv_date = d
|
||
break
|
||
except Exception:
|
||
pass
|
||
# 如果按当前 scheme 获取失败,尝试切换 scheme 再试一次
|
||
if not srv_date:
|
||
alt_scheme = "http" if scheme == "https" else "https"
|
||
alt_base = f"{alt_scheme}://{base_host}"
|
||
alt_candidates = [
|
||
alt_base,
|
||
alt_base + "/minio/health/live",
|
||
alt_base + "/minio/health/ready",
|
||
alt_base + "/minio/health/version",
|
||
]
|
||
for url in alt_candidates:
|
||
try:
|
||
req = Request(url, method="HEAD")
|
||
r = urlopen(req, timeout=3)
|
||
d = r.headers.get("Date") or r.headers.get("date")
|
||
if d:
|
||
srv_date = d
|
||
break
|
||
except Exception:
|
||
try:
|
||
r = urlopen(url, timeout=3)
|
||
d = r.headers.get("Date") or r.headers.get("date")
|
||
if d:
|
||
srv_date = d
|
||
break
|
||
except Exception:
|
||
pass
|
||
from datetime import datetime, timezone
|
||
now = datetime.now(timezone.utc)
|
||
diff = None
|
||
if srv_date:
|
||
from email.utils import parsedate_to_datetime
|
||
try:
|
||
dt = parsedate_to_datetime(srv_date)
|
||
diff = int(abs((now - dt).total_seconds()))
|
||
except Exception:
|
||
diff = None
|
||
hint = _minio_time_hint(base_host, sec)
|
||
return {"ok": True, "server_time": srv_date, "local_time": now.isoformat(), "diff_sec": diff, "hint": hint}
|
||
except Exception as e:
|
||
return {"ok": False, "error": str(e)}
|
||
|
||
@app.post("/system/time/sync")
|
||
async def system_time_sync(method: Optional[str] = Form("auto"), ntp_server: Optional[str] = Form(None)):
|
||
cmds = []
|
||
servers = [s for s in [ntp_server, "time.apple.com", "pool.ntp.org"] if s]
|
||
for srv in servers:
|
||
if (method or "auto") in {"auto", "sntp"}:
|
||
cmds.append(["sntp", "-sS", srv])
|
||
if (method or "auto") in {"auto", "ntpdate"}:
|
||
cmds.append(["ntpdate", "-u", srv])
|
||
outputs = []
|
||
success = False
|
||
for cmd in cmds:
|
||
try:
|
||
p = subprocess.run(cmd, capture_output=True, text=True, timeout=8)
|
||
outputs.append({"cmd": " ".join(cmd), "code": p.returncode, "out": p.stdout, "err": p.stderr})
|
||
if p.returncode == 0:
|
||
success = True
|
||
break
|
||
except Exception as e:
|
||
outputs.append({"cmd": " ".join(cmd), "code": -1, "out": "", "err": str(e)})
|
||
if not success and sys.platform == "darwin":
|
||
elev_cmds = []
|
||
for srv in servers:
|
||
elev_cmds.append(["osascript", "-e", f'do shell script "sntp -sS {srv}" with administrator privileges'])
|
||
elev_cmds.append(["osascript", "-e", f'do shell script "ntpdate -u {srv}" with administrator privileges'])
|
||
elev_cmds.append(["osascript", "-e", f'do shell script "/usr/sbin/systemsetup -setnetworktimeserver {srv}" with administrator privileges'])
|
||
elev_cmds.append(["osascript", "-e", 'do shell script "/usr/sbin/systemsetup -setusingnetworktime on" with administrator privileges'])
|
||
for cmd in elev_cmds:
|
||
try:
|
||
p = subprocess.run(cmd, capture_output=True, text=True, timeout=12)
|
||
outputs.append({"cmd": " ".join(cmd), "code": p.returncode, "out": p.stdout, "err": p.stderr})
|
||
if p.returncode == 0:
|
||
success = True
|
||
break
|
||
except Exception as e:
|
||
outputs.append({"cmd": " ".join(cmd), "code": -1, "out": "", "err": str(e)})
|
||
chk = system_time_check()
|
||
return {"ok": success, "result": outputs, "check": chk}
|
||
|
||
@app.get("/api/system/time/check")
|
||
def system_time_check_api(
|
||
endpoint: Optional[str] = Query(None),
|
||
public: Optional[str] = Query(None),
|
||
secure: Optional[str] = Query(None),
|
||
):
|
||
return system_time_check(endpoint=endpoint, public=public, secure=secure)
|
||
|
||
@app.post("/api/system/time/sync")
|
||
async def system_time_sync_api(method: Optional[str] = Form("auto"), ntp_server: Optional[str] = Form(None)):
|
||
return await system_time_sync(method=method, ntp_server=ntp_server)
|
||
|
||
async def _auto_time_calibration():
|
||
try:
|
||
await asyncio.sleep(1.0)
|
||
chk = system_time_check()
|
||
try:
|
||
diff = int((chk or {}).get("diff_sec") or 0)
|
||
except Exception:
|
||
diff = 0
|
||
if diff and diff > 120:
|
||
try:
|
||
await system_time_sync(method="auto", ntp_server=None)
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
pass
|
||
|
||
@app.get("/config/minio/buckets")
|
||
def list_minio_buckets(
|
||
endpoint: str,
|
||
access: str,
|
||
secret: str,
|
||
secure: Optional[str] = "false",
|
||
):
|
||
if Minio is None:
|
||
return {"ok": False, "error": "minio client not available", "buckets": []}
|
||
try:
|
||
sec = str(secure or "false").lower() in {"1","true","yes","on"}
|
||
client = _minio_client(endpoint=endpoint, access=access, secret=secret, secure=sec)
|
||
names = [b.name for b in client.list_buckets()]
|
||
return {"ok": True, "buckets": names}
|
||
except Exception as e:
|
||
return {"ok": False, "error": str(e), "buckets": []}
|
||
|
||
@app.post("/config/minio/create-bucket")
|
||
async def create_minio_bucket(
|
||
endpoint: str = Form(...),
|
||
access: str = Form(...),
|
||
secret: str = Form(...),
|
||
bucket: str = Form(...),
|
||
secure: Optional[str] = Form("false"),
|
||
public_read: Optional[str] = Form("false"),
|
||
):
|
||
if Minio is None:
|
||
return {"ok": False, "error": "minio client not available"}
|
||
try:
|
||
sec = str(secure or "false").lower() in {"1","true","yes","on"}
|
||
ep_raw = (endpoint or "").strip()
|
||
ep_host = ep_raw
|
||
try:
|
||
from urllib.parse import urlsplit
|
||
u = urlsplit(ep_raw)
|
||
if u.scheme:
|
||
ep_host = (u.netloc or ep_raw).split("/")[0]
|
||
else:
|
||
ep_host = ep_raw.split("/")[0]
|
||
except Exception:
|
||
ep_host = ep_raw.split("/")[0]
|
||
if ":9001" in ep_host or "/browser" in ep_raw or "/minio" in ep_raw:
|
||
return {"ok": False, "error": "请使用 MinIO API 端口 9000(而非 9001 控制台)"}
|
||
client = _minio_client(endpoint=ep_host, access=access, secret=secret, secure=sec)
|
||
try:
|
||
try:
|
||
client.list_buckets() # type: ignore
|
||
except Exception as e:
|
||
if sec and ("SSL" in str(e) or "HTTPSConnectionPool" in str(e) or "SSLError" in str(e)):
|
||
client = _minio_client(endpoint=ep_host, access=access, secret=secret, secure=False)
|
||
sec = False
|
||
except Exception:
|
||
pass
|
||
_minio_create_bucket(client, bucket)
|
||
try:
|
||
pr = str(public_read or "false").lower() in {"1","true","yes","on"}
|
||
if pr:
|
||
policy = {
|
||
"Version": "2012-10-17",
|
||
"Statement": [
|
||
{"Effect": "Allow", "Principal": {"AWS": ["*"]}, "Action": ["s3:GetBucketLocation", "s3:ListBucket"], "Resource": [f"arn:aws:s3:::{bucket}"]},
|
||
{"Effect": "Allow", "Principal": {"AWS": ["*"]}, "Action": ["s3:GetObject"], "Resource": [f"arn:aws:s3:::{bucket}/*"]},
|
||
],
|
||
}
|
||
import json as _json
|
||
client.set_bucket_policy(bucket, _json.dumps(policy)) # type: ignore
|
||
except Exception:
|
||
pass
|
||
return {"ok": True, "bucket_exists": True}
|
||
except Exception as e:
|
||
hint = None
|
||
if "RequestTimeTooSkewed" in str(e):
|
||
hint = _minio_time_hint(ep_host, sec)
|
||
return {"ok": False, "error": str(e), "hint": hint}
|
||
|
||
@app.post("/minio/presign", response_model=MinioPresignResponse)
|
||
async def minio_presign(
|
||
url: Optional[str] = Form(None),
|
||
object_name: Optional[str] = Form(None),
|
||
bucket: Optional[str] = Form(None),
|
||
expires: Optional[int] = Form(3600),
|
||
):
|
||
client, cfg_bucket, public_base, _ = minio_current(RUNTIME_CONFIG)
|
||
if client is None:
|
||
raise HTTPException(status_code=400, detail="MinIO 未配置")
|
||
obj = (object_name or "").strip()
|
||
bkt = (bucket or cfg_bucket or "").strip()
|
||
if (not obj) and url:
|
||
try:
|
||
from urllib.parse import urlsplit, unquote
|
||
u = urlsplit((url or "").strip())
|
||
path = u.path or ""
|
||
parts = [p for p in path.split("/") if p]
|
||
if parts:
|
||
if not bkt:
|
||
bkt = parts[0]
|
||
obj = "/".join(parts[1:])
|
||
obj = unquote(obj)
|
||
except Exception:
|
||
obj = obj
|
||
if not bkt or not obj:
|
||
raise HTTPException(status_code=400, detail="bucket 与 object_name/URL 不能为空")
|
||
exp = int(expires or 3600)
|
||
ps = presigned_read(client, bkt, obj, exp) if client is not None else None
|
||
pub_url = None
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
if public_base:
|
||
pub_url = f"{public_base}/{bkt}/{_quote(obj, safe='/')}"
|
||
except Exception:
|
||
pub_url = None
|
||
return MinioPresignResponse(
|
||
bucket=bkt,
|
||
object=obj,
|
||
minio_url=pub_url,
|
||
minio_presigned_url=ps,
|
||
expires=exp,
|
||
)
|
||
|
||
@app.get("/minio/object")
|
||
def minio_object(bucket: Optional[str] = None, object: str = ""):
|
||
client, cfg_bucket, public_base, _ = minio_current(RUNTIME_CONFIG)
|
||
if client is None:
|
||
raise HTTPException(status_code=400, detail="MinIO 未配置")
|
||
bkt = (bucket or cfg_bucket or "").strip()
|
||
obj_in = (object or "").strip()
|
||
try:
|
||
from urllib.parse import unquote as _unquote
|
||
obj = _unquote(obj_in)
|
||
except Exception:
|
||
obj = obj_in
|
||
if not bkt or not obj:
|
||
raise HTTPException(status_code=400, detail="bucket 与 object 不能为空")
|
||
ct = None
|
||
try:
|
||
try:
|
||
st = client.stat_object(bucket_name=bkt, object_name=obj) # type: ignore
|
||
except TypeError:
|
||
st = client.stat_object(bkt, obj) # type: ignore
|
||
ct = getattr(st, "content_type", None)
|
||
except Exception:
|
||
ct = None
|
||
data = b""
|
||
try:
|
||
try:
|
||
resp = client.get_object(bucket_name=bkt, object_name=obj) # type: ignore
|
||
except TypeError:
|
||
resp = client.get_object(bkt, obj) # type: ignore
|
||
try:
|
||
data = resp.read() # type: ignore
|
||
finally:
|
||
try:
|
||
resp.close() # type: ignore
|
||
except Exception:
|
||
pass
|
||
except Exception as e:
|
||
raise HTTPException(status_code=403, detail=str(e))
|
||
media = ct or detect_mime(obj, data)
|
||
headers = {"Content-Disposition": f"inline; filename*=UTF-8''" + quote(Path(obj).name)}
|
||
return Response(content=data, media_type=media, headers=headers)
|
||
|
||
@app.post("/config/db")
|
||
async def set_db_config(webhook_url: Optional[str] = Form(None), token: Optional[str] = Form(None)):
|
||
RUNTIME_CONFIG["db"].update({"webhook_url": webhook_url, "token": token})
|
||
return {"ok": True}
|
||
|
||
@app.get("/config")
|
||
def get_config_snapshot():
|
||
safe = {
|
||
"minio": {
|
||
k: ("***" if k == "secret" and v else v)
|
||
for k, v in RUNTIME_CONFIG.get("minio", {}).items()
|
||
},
|
||
"db": RUNTIME_CONFIG.get("db", {}),
|
||
}
|
||
return safe
|
||
|
||
@app.get("/config/profiles")
|
||
def list_profiles():
|
||
names = []
|
||
try:
|
||
for p in profiles_dir.glob("*.json"):
|
||
names.append(p.stem)
|
||
except Exception:
|
||
names = []
|
||
return {"ok": True, "profiles": sorted(names)}
|
||
|
||
@app.post("/config/save_profile")
|
||
async def save_profile(name: str = Form(...)):
|
||
if not name.strip():
|
||
raise HTTPException(status_code=400, detail="name required")
|
||
data = {
|
||
"minio": RUNTIME_CONFIG.get("minio", {}),
|
||
"db": RUNTIME_CONFIG.get("db", {}),
|
||
}
|
||
import json as _json
|
||
path = profiles_dir / f"{sanitize_filename(name)}.json"
|
||
try:
|
||
path.write_text(_json.dumps(data, ensure_ascii=False, indent=2), "utf-8")
|
||
return {"ok": True, "name": path.stem}
|
||
except Exception as e:
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
|
||
@app.get("/config/load_profile")
|
||
def load_profile(name: str):
|
||
import json as _json
|
||
path = profiles_dir / f"{sanitize_filename(name)}.json"
|
||
if not path.exists():
|
||
raise HTTPException(status_code=404, detail="profile not found")
|
||
try:
|
||
data = _json.loads(path.read_text("utf-8"))
|
||
m = data.get("minio", {})
|
||
d = data.get("db", {})
|
||
RUNTIME_CONFIG["minio"].update(m)
|
||
RUNTIME_CONFIG["db"].update(d)
|
||
client, bkt, pub, _ = minio_current(RUNTIME_CONFIG)
|
||
if client is None or not bkt or not pub:
|
||
raise HTTPException(status_code=400, detail="MinIO config invalid")
|
||
return {"ok": True, "config": data}
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
|
||
# ──────────────────────────────────────────────────────────────────────────────
|
||
# Auto-load DB config from app/configs without restart or page refresh
|
||
# ──────────────────────────────────────────────────────────────────────────────
|
||
|
||
def _choose_default_config_file() -> Optional[Path]:
|
||
try:
|
||
candidates: List[Path] = []
|
||
for p in profiles_dir.rglob("*.json"):
|
||
candidates.append(p)
|
||
if not candidates:
|
||
return None
|
||
by_name = {x.stem.lower(): x for x in candidates}
|
||
for prefer in ("active", "default", "test"):
|
||
if prefer in by_name:
|
||
return by_name[prefer]
|
||
return sorted(candidates, key=lambda x: x.stat().st_mtime, reverse=True)[0]
|
||
except Exception:
|
||
return None
|
||
|
||
def _apply_configs_from_file(path: Path) -> None:
|
||
try:
|
||
import json as _json
|
||
data = _json.loads(path.read_text("utf-8"))
|
||
db_cfg = data.get("db", {})
|
||
if isinstance(db_cfg, dict) and db_cfg:
|
||
RUNTIME_CONFIG["db"].update(db_cfg)
|
||
minio_cfg = data.get("minio", {})
|
||
if isinstance(minio_cfg, dict) and minio_cfg:
|
||
sanitized = dict(minio_cfg)
|
||
try:
|
||
ep = str(sanitized.get("endpoint") or "").strip()
|
||
if ep and ":9001" in ep:
|
||
h = ep.split("/")[0]
|
||
if ":" in h:
|
||
parts = h.split(":")
|
||
sanitized["endpoint"] = f"{parts[0]}:9000"
|
||
else:
|
||
sanitized["endpoint"] = h
|
||
except Exception:
|
||
pass
|
||
try:
|
||
pub = str(sanitized.get("public") or "").strip()
|
||
if pub and (":9001" in pub or "/browser" in pub or "/minio" in pub):
|
||
host = pub.split("/")[0]
|
||
sec = str(sanitized.get("secure") or RUNTIME_CONFIG.get("minio", {}).get("secure") or "false").lower() in {"1","true","yes","on"}
|
||
scheme = "https" if sec else "http"
|
||
if ":" in host:
|
||
base_host = host.split(":")[0]
|
||
sanitized["public"] = f"{scheme}://{base_host}:9000"
|
||
else:
|
||
sanitized["public"] = f"{scheme}://{host}:9000"
|
||
except Exception:
|
||
pass
|
||
for k, v in sanitized.items():
|
||
try:
|
||
cur = RUNTIME_CONFIG["minio"].get(k)
|
||
if cur in (None, ""):
|
||
RUNTIME_CONFIG["minio"][k] = v
|
||
except Exception:
|
||
RUNTIME_CONFIG["minio"][k] = v
|
||
except Exception:
|
||
pass
|
||
|
||
async def _watch_db_config_changes(interval_sec: float = 3.0) -> None:
|
||
last_path: Optional[Path] = _choose_default_config_file()
|
||
last_mtime: float = (last_path.stat().st_mtime if last_path and last_path.exists() else 0.0)
|
||
# Apply once at startup
|
||
if last_path:
|
||
_apply_configs_from_file(last_path)
|
||
while True:
|
||
try:
|
||
cur = _choose_default_config_file()
|
||
if cur and cur.exists():
|
||
mt = cur.stat().st_mtime
|
||
if cur != last_path or mt > last_mtime:
|
||
_apply_configs_from_file(cur)
|
||
last_path = cur
|
||
last_mtime = mt
|
||
except Exception:
|
||
pass
|
||
await asyncio.sleep(interval_sec)
|
||
|
||
@app.on_event("startup")
|
||
async def _startup_autoload_configs():
|
||
try:
|
||
asyncio.create_task(_watch_db_config_changes(interval_sec=3.0))
|
||
except Exception:
|
||
pass
|
||
try:
|
||
asyncio.create_task(_auto_time_calibration())
|
||
except Exception:
|
||
pass
|
||
|
||
@app.post("/md/convert", response_model=ConvertResponse)
|
||
async def md_convert(
|
||
md_file: Optional[UploadFile] = File(None),
|
||
markdown_text: Optional[str] = Form(None),
|
||
markdown_url: Optional[str] = Form(None),
|
||
target: str = Form("docx"),
|
||
save: Optional[bool] = Form(False),
|
||
filename: Optional[str] = Form(None),
|
||
css_name: Optional[str] = Form(None),
|
||
css_text: Optional[str] = Form(None),
|
||
toc: Optional[bool] = Form(True),
|
||
header_text: Optional[str] = Form(None),
|
||
footer_text: Optional[str] = Form(None),
|
||
logo_url: Optional[str] = Form(None),
|
||
logo_file: Optional[UploadFile] = File(None),
|
||
cover_url: Optional[str] = Form(None),
|
||
cover_file: Optional[UploadFile] = File(None),
|
||
product_name: Optional[str] = Form(None),
|
||
document_name: Optional[str] = Form(None),
|
||
product_version: Optional[str] = Form(None),
|
||
document_version: Optional[str] = Form(None),
|
||
copyright_text: Optional[str] = Form(None),
|
||
):
|
||
"""
|
||
@function md_convert
|
||
@description Advanced Markdown conversion endpoint supporting custom styling, logos, and metadata
|
||
@param md_file Uploaded Markdown file (optional)
|
||
@param markdown_text Raw Markdown text (optional)
|
||
@param markdown_url URL to Markdown file (optional)
|
||
@param target Output format (docx/pdf)
|
||
@param save Save to MinIO
|
||
@param filename Output filename
|
||
@param css_name Predefined CSS profile name
|
||
@param css_text Custom CSS content
|
||
@param toc Include Table of Contents
|
||
@param header_text Custom header text
|
||
@param footer_text Custom footer text
|
||
@param logo_url URL for logo image
|
||
@param logo_file Uploaded logo file
|
||
@param cover_url URL for cover image
|
||
@param cover_file Uploaded cover file
|
||
@param product_name Product name for cover
|
||
@param document_name Document name for cover
|
||
@param product_version Product version for cover
|
||
@param document_version Document version for cover
|
||
@param copyright_text Copyright text
|
||
@return File download or JSON response
|
||
"""
|
||
logging.info(f"md_convert start target={target} save={save} filename={filename}")
|
||
provided = 0
|
||
if md_file is not None:
|
||
provided += 1
|
||
if markdown_text:
|
||
provided += 1
|
||
if markdown_url:
|
||
provided += 1
|
||
if provided != 1:
|
||
raise HTTPException(status_code=400, detail="provide exactly one of md_file, markdown_text, markdown_url")
|
||
if target.lower() not in {"docx", "pdf"}:
|
||
raise HTTPException(status_code=400, detail="target must be docx or pdf")
|
||
mappings: List[Dict[str, str]] = []
|
||
base_dir = Path(".").resolve()
|
||
if md_file is not None:
|
||
content = (await md_file.read()).decode("utf-8", errors="ignore")
|
||
base_dir = Path(md_file.filename or ".").resolve().parent if md_file and md_file.filename else Path(".")
|
||
base = sanitize_filename(filename) if filename else sanitize_filename(os.path.splitext(md_file.filename or "document")[0])
|
||
elif markdown_url:
|
||
src = markdown_url.strip()
|
||
try:
|
||
if src.lower().startswith("http"):
|
||
already_escaped = "%" in src
|
||
safe = src if already_escaped else _safe_http_url(src)
|
||
try:
|
||
with urlopen(safe, timeout=10) as r:
|
||
raw = r.read()
|
||
try:
|
||
logging.info(f"md_convert fetched markdown_url len={len(raw)} url={safe}")
|
||
except Exception:
|
||
pass
|
||
except UnicodeEncodeError:
|
||
alt = quote(src, safe=':/?&=%#')
|
||
with urlopen(_safe_http_url(alt), timeout=10) as r:
|
||
raw = r.read()
|
||
try:
|
||
logging.info(f"md_convert fetched markdown_url(len={len(raw)}) with alt url")
|
||
except Exception:
|
||
pass
|
||
except HTTPError as err:
|
||
raise HTTPException(status_code=400, detail={"error": "fetch_failed", "status": err.code, "url": getattr(err, 'url', src)})
|
||
except URLError as err:
|
||
raise HTTPException(status_code=400, detail={"error": "fetch_failed", "status": None, "url": src, "reason": str(getattr(err, 'reason', err))})
|
||
try:
|
||
content = raw.decode("utf-8")
|
||
except Exception:
|
||
content = raw.decode("latin-1", errors="ignore")
|
||
else:
|
||
with open(src, "r", encoding="utf-8", errors="ignore") as f:
|
||
content = f.read()
|
||
base_dir = Path(src).resolve().parent
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
raise HTTPException(status_code=400, detail={"error": "fetch_failed", "url": src, "message": str(e)})
|
||
base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(src, None))
|
||
else:
|
||
content = markdown_text or ""
|
||
base = sanitize_filename(filename) if filename else "document"
|
||
# Rewrite local assets to MinIO URLs if configured
|
||
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
||
if client is not None and bucket and public_base and base_dir:
|
||
try:
|
||
content, mappings = _rewrite_md_assets_to_minio(content, base_dir, client, bucket, public_base, prefix)
|
||
except Exception:
|
||
pass
|
||
# Prepare common assets (logo, cover) for both DOCX and PDF
|
||
logo_src = None
|
||
try:
|
||
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
||
if logo_file is not None and getattr(logo_file, "filename", None):
|
||
lb = await logo_file.read()
|
||
mime = detect_image_mime(logo_file.filename, lb)
|
||
safe_logo = sanitize_filename(os.path.splitext(logo_file.filename or "logo")[0])
|
||
extl = "." + (logo_file.filename.rsplit(".", 1)[-1].lower() if "." in (logo_file.filename or "") else "png")
|
||
obj_logo = join_prefix(prefix, f"uploads/logo/{int(time.time())}-{safe_logo}{extl}")
|
||
bio = io.BytesIO(lb)
|
||
if client is not None and bucket and public_base:
|
||
client.put_object(bucket_name=bucket, object_name=obj_logo, data=bio, length=len(lb), content_type=mime) # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
enc = _quote(obj_logo, safe="/")
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
ps = presigned_read(client, bucket, obj_logo, exp) if client is not None else None
|
||
logo_src = ps or f"{public_base}/{bucket}/{enc}"
|
||
except Exception:
|
||
logo_src = f"{public_base}/{bucket}/{obj_logo}"
|
||
try:
|
||
if not save:
|
||
import base64 as _b64
|
||
logo_src = f"data:{mime};base64," + _b64.b64encode(lb).decode("ascii")
|
||
except Exception:
|
||
pass
|
||
elif logo_url:
|
||
u = logo_url.strip()
|
||
if u.lower().startswith("http://") or u.lower().startswith("https://"):
|
||
logo_src = u
|
||
elif u.startswith("/"):
|
||
p = Path(u)
|
||
try:
|
||
lb = p.read_bytes()
|
||
mime = detect_image_mime(p.name, lb)
|
||
obj_logo = join_prefix(prefix, f"uploads/logo/{int(time.time())}-{sanitize_filename(p.stem)}{p.suffix or '.png'}")
|
||
bio = io.BytesIO(lb)
|
||
if client is not None and bucket and public_base:
|
||
client.put_object(bucket_name=bucket, object_name=obj_logo, data=bio, length=len(lb), content_type=mime) # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
enc = _quote(obj_logo, safe="/")
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
ps = presigned_read(client, bucket, obj_logo, exp) if client is not None else None
|
||
logo_src = ps or f"{public_base}/{bucket}/{enc}"
|
||
except Exception:
|
||
logo_src = f"{public_base}/{bucket}/{obj_logo}"
|
||
try:
|
||
if not save:
|
||
import base64 as _b64
|
||
logo_src = f"data:{mime};base64," + _b64.b64encode(lb).decode("ascii")
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
logo_src = p.resolve().as_uri()
|
||
else:
|
||
p = Path(u)
|
||
try:
|
||
lb = p.read_bytes()
|
||
mime = detect_image_mime(p.name, lb)
|
||
obj_logo = join_prefix(prefix, f"uploads/logo/{int(time.time())}-{sanitize_filename(p.stem)}{p.suffix or '.png'}")
|
||
bio = io.BytesIO(lb)
|
||
if client is not None and bucket and public_base:
|
||
client.put_object(bucket_name=bucket, object_name=obj_logo, data=bio, length=len(lb), content_type=mime) # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
enc = _quote(obj_logo, safe="/")
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
ps = presigned_read(client, bucket, obj_logo, exp) if client is not None else None
|
||
logo_src = ps or f"{public_base}/{bucket}/{enc}"
|
||
except Exception:
|
||
logo_src = f"{public_base}/{bucket}/{obj_logo}"
|
||
try:
|
||
if not save:
|
||
import base64 as _b64
|
||
logo_src = f"data:{mime};base64," + _b64.b64encode(lb).decode("ascii")
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
logo_src = p.resolve().as_uri()
|
||
except Exception:
|
||
logo_src = None
|
||
|
||
cover_src = None
|
||
try:
|
||
limit = 2 * 1024 * 1024
|
||
if cover_file is not None and getattr(cover_file, "filename", None):
|
||
cb = await cover_file.read()
|
||
if len(cb) > limit:
|
||
raise HTTPException(status_code=400, detail="cover image exceeds 2MB limit")
|
||
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
||
mime = detect_image_mime(cover_file.filename, cb)
|
||
safe_cov = sanitize_filename(os.path.splitext(cover_file.filename or "cover")[0])
|
||
extc = "." + (cover_file.filename.rsplit(".", 1)[-1].lower() if "." in (cover_file.filename or "") else "png")
|
||
obj_cov = join_prefix(prefix, f"uploads/cover/{int(time.time())}-{safe_cov}{extc}")
|
||
bio = io.BytesIO(cb)
|
||
if client is not None and bucket and public_base:
|
||
client.put_object(bucket_name=bucket, object_name=obj_cov, data=bio, length=len(cb), content_type=mime) # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
enc = _quote(obj_cov, safe="/")
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
ps = presigned_read(client, bucket, obj_cov, exp) if client is not None else None
|
||
cover_src = ps or f"{public_base}/{bucket}/{enc}"
|
||
except Exception:
|
||
cover_src = f"{public_base}/{bucket}/{obj_cov}"
|
||
try:
|
||
if not save:
|
||
import base64 as _b64
|
||
cover_src = f"data:{mime};base64," + _b64.b64encode(cb).decode("ascii")
|
||
except Exception:
|
||
pass
|
||
elif cover_url:
|
||
cu = cover_url.strip()
|
||
if cu.lower().startswith("http://") or cu.lower().startswith("https://"):
|
||
cover_src = cu
|
||
else:
|
||
p = Path(cu)
|
||
rb = p.read_bytes()
|
||
if len(rb) > limit:
|
||
raise HTTPException(status_code=400, detail="cover image exceeds 2MB limit")
|
||
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
||
mime = detect_image_mime(cu, rb)
|
||
obj_cov = join_prefix(prefix, f"uploads/cover/{int(time.time())}-{sanitize_filename(p.stem)}{p.suffix or '.png'}")
|
||
bio = io.BytesIO(rb)
|
||
if client is not None and bucket and public_base:
|
||
client.put_object(bucket_name=bucket, object_name=obj_cov, data=bio, length=len(rb), content_type=mime) # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
enc = _quote(obj_cov, safe="/")
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
ps = presigned_read(client, bucket, obj_cov, exp) if client is not None else None
|
||
cover_src = ps or f"{public_base}/{bucket}/{enc}"
|
||
except Exception:
|
||
cover_src = f"{public_base}/{bucket}/{obj_cov}"
|
||
try:
|
||
if not save:
|
||
import base64 as _b64
|
||
cover_src = f"data:{mime};base64," + _b64.b64encode(rb).decode("ascii")
|
||
except Exception:
|
||
pass
|
||
except HTTPException:
|
||
raise
|
||
except Exception:
|
||
cover_src = None
|
||
logging.info(f"md_convert assets prepared logo_src={bool(logo_src)} cover_src={bool(cover_src)} css_name={css_name} css_text_len={(len(css_text) if css_text else 0)}")
|
||
if target.lower() == "docx":
|
||
data = md_to_docx_bytes(
|
||
content,
|
||
toc=bool(toc),
|
||
header_text=header_text,
|
||
footer_text=footer_text,
|
||
logo_url=logo_src or logo_url,
|
||
copyright_text=copyright_text,
|
||
filename_text=base,
|
||
cover_src=cover_src,
|
||
product_name=product_name,
|
||
document_name=document_name,
|
||
product_version=product_version,
|
||
document_version=document_version,
|
||
)
|
||
media = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||
ext = ".docx"
|
||
# Upload final docx to MinIO
|
||
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
||
minio_url = None
|
||
minio_presigned_url = None
|
||
try:
|
||
rc_store_final = str(RUNTIME_CONFIG.get("minio", {}).get("store_final") or "true").lower() in {"1","true","yes","on"}
|
||
if client is not None and bucket and public_base and rc_store_final:
|
||
out_name = f"{base}{ext}"
|
||
obj = f"{(prefix or '').strip('/')}/converted/{out_name}".lstrip("/")
|
||
bio = io.BytesIO(data)
|
||
ct = media or "application/octet-stream"
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(data), content_type=ct) # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
||
except Exception:
|
||
minio_url = f"{public_base}/{bucket}/{obj}"
|
||
try:
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
minio_presigned_url = presigned_read(client, bucket, obj, exp) if client is not None else None
|
||
except Exception:
|
||
minio_presigned_url = None
|
||
except Exception:
|
||
minio_url = None
|
||
logging.info(f"md_convert done docx name={base}{ext} size={len(data)}")
|
||
_db_notify({
|
||
"type": "md_convert",
|
||
"base": base,
|
||
"target": target.lower(),
|
||
"local_url": None,
|
||
"minio_url": minio_url,
|
||
"minio_presigned_url": minio_presigned_url,
|
||
"mappings": mappings,
|
||
"time": int(time.time())
|
||
})
|
||
return ConvertResponse(
|
||
minio_url=minio_url,
|
||
minio_presigned_url=minio_presigned_url,
|
||
name=f"{base}{ext}",
|
||
media_type=media,
|
||
)
|
||
else:
|
||
use_css_name = css_name if css_name else ("default" if not css_text else None)
|
||
data = md_to_pdf_bytes_with_renderer(
|
||
content,
|
||
"weasyprint",
|
||
css_name=use_css_name,
|
||
css_text=css_text,
|
||
toc=bool(toc),
|
||
header_text=header_text,
|
||
footer_text=footer_text,
|
||
logo_url=logo_src or logo_url,
|
||
copyright_text=copyright_text,
|
||
filename_text=base,
|
||
cover_src=cover_src,
|
||
product_name=product_name,
|
||
document_name=document_name,
|
||
product_version=product_version,
|
||
document_version=document_version,
|
||
)
|
||
media = "application/pdf"
|
||
ext = ".pdf"
|
||
minio_url = None
|
||
minio_presigned_url = None
|
||
try:
|
||
rc_store_final = str(RUNTIME_CONFIG.get("minio", {}).get("store_final") or "true").lower() in {"1","true","yes","on"}
|
||
if client is not None and bucket and public_base and rc_store_final:
|
||
out_name = f"{base}{ext}"
|
||
obj = f"{(prefix or '').strip('/')}/converted/{out_name}".lstrip("/")
|
||
bio = io.BytesIO(data)
|
||
ct = media or "application/octet-stream"
|
||
try:
|
||
if ct.startswith("text/") and "charset" not in ct.lower():
|
||
ct = ct + "; charset=utf-8"
|
||
except Exception:
|
||
pass
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(data), content_type=ct) # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
||
except Exception:
|
||
minio_url = f"{public_base}/{bucket}/{obj}"
|
||
try:
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
minio_presigned_url = presigned_read(client, bucket, obj, exp) if client is not None else None
|
||
except Exception:
|
||
minio_presigned_url = None
|
||
except Exception:
|
||
minio_url = None
|
||
logging.info(f"md_convert done pdf name={base}{ext} size={len(data)}")
|
||
_db_notify({
|
||
"type": "md_convert",
|
||
"base": base,
|
||
"target": target.lower(),
|
||
"local_url": None,
|
||
"minio_url": minio_url,
|
||
"minio_presigned_url": minio_presigned_url,
|
||
"mappings": mappings,
|
||
"time": int(time.time())
|
||
})
|
||
return ConvertResponse(
|
||
minio_url=minio_url,
|
||
minio_presigned_url=minio_presigned_url,
|
||
name=f"{base}{ext}",
|
||
media_type=media,
|
||
)
|
||
|
||
@app.get("/config/linkmap")
|
||
def get_linkmap():
|
||
return load_linkmap()
|
||
|
||
@app.post("/config/linkmap")
|
||
async def set_linkmap(mapping: dict):
|
||
try:
|
||
save_linkmap(mapping)
|
||
return {"ok": True}
|
||
except Exception as e:
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
def detect_image_mime(filename: Optional[str], data: bytes) -> str:
|
||
ext = (os.path.splitext(filename or "")[1] or "").lower()
|
||
if ext in {".png"}:
|
||
return "image/png"
|
||
if ext in {".jpg", ".jpeg"}:
|
||
return "image/jpeg"
|
||
if ext in {".svg"}:
|
||
return "image/svg+xml"
|
||
if ext in {".webp"}:
|
||
return "image/webp"
|
||
if data.startswith(b"\x89PNG\r\n\x1a\n"):
|
||
return "image/png"
|
||
if data.startswith(b"\xff\xd8\xff"):
|
||
return "image/jpeg"
|
||
if len(data) >= 12 and data[:4] == b"RIFF" and data[8:12] == b"WEBP":
|
||
return "image/webp"
|
||
try:
|
||
head = data[:512].decode("utf-8", errors="ignore")
|
||
if "<svg" in head:
|
||
return "image/svg+xml"
|
||
except Exception:
|
||
pass
|
||
guessed, _ = mimetypes.guess_type(filename or "")
|
||
if guessed:
|
||
return guessed
|
||
return "image/png"
|
||
|
||
def detect_mime(filename: Optional[str], data: bytes) -> str:
|
||
ext = (os.path.splitext(filename or "")[1] or "").lower()
|
||
if ext in {".png", ".jpg", ".jpeg", ".svg", ".webp"}:
|
||
return detect_image_mime(filename, data)
|
||
sig_png = data.startswith(b"\x89PNG\r\n\x1a\n")
|
||
sig_jpg = data.startswith(b"\xff\xd8\xff")
|
||
sig_webp = len(data) >= 12 and data[:4] == b"RIFF" and data[8:12] == b"WEBP"
|
||
if sig_png or sig_jpg or sig_webp:
|
||
return detect_image_mime(filename, data)
|
||
guessed, _ = mimetypes.guess_type(filename or "")
|
||
if guessed:
|
||
return guessed
|
||
return "application/octet-stream"
|
||
|
||
@app.post("/proxy/download")
|
||
async def proxy_download(url: str = Form(...)):
|
||
u = (url or "").strip()
|
||
if not u:
|
||
raise HTTPException(status_code=400, detail="url required")
|
||
try:
|
||
data: bytes
|
||
ct: str
|
||
name: str
|
||
if u.lower().startswith("http://") or u.lower().startswith("https://"):
|
||
already_escaped = "%" in u
|
||
safe = u if already_escaped else _safe_http_url(u)
|
||
with urlopen(safe, timeout=15) as r:
|
||
data = r.read()
|
||
ct = r.headers.get("Content-Type") or detect_mime(None, data)
|
||
from urllib.parse import urlparse, unquote
|
||
import os as _os
|
||
parsed = urlparse(u)
|
||
path = unquote(parsed.path or "")
|
||
last = (_os.path.basename(path) or "download").split("?")[0]
|
||
if "." in last:
|
||
name = last
|
||
else:
|
||
import mimetypes as _m
|
||
ext = _m.guess_extension((ct or "").split(";")[0].strip()) or ".md"
|
||
name = last + ext
|
||
else:
|
||
p = Path(u)
|
||
if not p.exists() or not p.is_file():
|
||
raise HTTPException(status_code=404, detail="local path not found")
|
||
data = p.read_bytes()
|
||
ct = detect_mime(p.name, data)
|
||
name = p.name
|
||
disp = f"attachment; filename=\"{name}\"; filename*=UTF-8''" + quote(name)
|
||
headers = {"Content-Disposition": disp}
|
||
return Response(content=data, media_type=ct, headers=headers)
|
||
except HTTPError as err:
|
||
raise HTTPException(status_code=err.code, detail=f"download failed: {err}")
|
||
except URLError as err:
|
||
raise HTTPException(status_code=400, detail=f"download failed: {err}")
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
|
||
def _minio_from_env() -> Tuple[Optional[object], Optional[str], Optional[str], str]:
|
||
endpoint = os.environ.get("MINIO_ENDPOINT")
|
||
access = os.environ.get("MINIO_ACCESS_KEY")
|
||
secret = os.environ.get("MINIO_SECRET_KEY")
|
||
bucket = os.environ.get("MINIO_BUCKET")
|
||
secure = str(os.environ.get("MINIO_SECURE", "false")).lower() in {"1","true","yes","on"}
|
||
public_base = os.environ.get("MINIO_PUBLIC_ENDPOINT") or (f"https://{endpoint}" if secure else f"http://{endpoint}" if endpoint else None)
|
||
if Minio is None or not endpoint or not access or not secret or not bucket or not public_base:
|
||
return None, None, None, ""
|
||
client = Minio(endpoint, access_key=access, secret_key=secret, secure=secure)
|
||
try:
|
||
_minio_create_bucket(client, bucket)
|
||
except Exception:
|
||
pass
|
||
return client, bucket, public_base, os.environ.get("MINIO_PREFIX", "")
|
||
|
||
|
||
|
||
def _export_ext(export: str) -> str:
|
||
e = (export or "").lower()
|
||
if e == "markdown":
|
||
return ".md"
|
||
if e == "html":
|
||
return ".html"
|
||
if e in {"json", "doctags"}:
|
||
return ".json"
|
||
return ".txt"
|
||
|
||
def _media_type(export: str) -> str:
|
||
e = (export or "").lower()
|
||
if e == "markdown":
|
||
return "text/markdown; charset=utf-8"
|
||
if e == "html":
|
||
return "text/html; charset=utf-8"
|
||
if e in {"json", "doctags"}:
|
||
return "application/json"
|
||
return "text/plain; charset=utf-8"
|
||
|
||
|
||
|
||
def _rewrite_md_assets_to_minio(text: str, base_dir: Path, client: object, bucket: str, public_base: str, prefix: str, search_root: Optional[Path] = None) -> Tuple[str, List[Dict[str, str]]]:
|
||
mappings: List[Dict[str, str]] = []
|
||
def _abs_key(p: Path) -> str:
|
||
k = p.resolve().as_posix().lstrip("/")
|
||
return k.replace(":", "")
|
||
def _upload_data_uri(uri: str) -> Optional[str]:
|
||
try:
|
||
import base64, hashlib
|
||
head, _, b64 = uri.partition(",")
|
||
if not b64:
|
||
return None
|
||
b = base64.b64decode(b64, validate=False)
|
||
mime = ""
|
||
try:
|
||
low = head.lower()
|
||
pos = low.find("data:")
|
||
if pos != -1:
|
||
rest = head[pos+5:]
|
||
semi = rest.find(";")
|
||
mime = rest[:semi] if semi != -1 else rest
|
||
except Exception:
|
||
mime = ""
|
||
if not mime:
|
||
mime = detect_image_mime(None, b)
|
||
ext = ".png"
|
||
if mime.lower() in {"image/jpeg", "image/jpg"}:
|
||
ext = ".jpg"
|
||
elif mime.lower() == "image/webp":
|
||
ext = ".webp"
|
||
elif mime.lower() == "image/svg+xml":
|
||
ext = ".svg"
|
||
elif mime.lower() == "image/gif":
|
||
ext = ".gif"
|
||
h = hashlib.sha256(b).hexdigest()[:16]
|
||
obj = join_prefix(prefix, f"embed/{h}{ext}")
|
||
bio = io.BytesIO(b)
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(b), content_type=mime or detect_image_mime(None, b)) # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
return f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
||
except Exception:
|
||
return f"{public_base}/{bucket}/{obj}"
|
||
except Exception:
|
||
return None
|
||
def _upload(path: Path) -> Optional[str]:
|
||
try:
|
||
data = path.read_bytes()
|
||
mime = detect_mime(path.name, data)
|
||
obj = join_prefix(prefix, f"abs/{_abs_key(path)}")
|
||
bio = io.BytesIO(data)
|
||
size = len(data)
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=size, content_type=mime) # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
return f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
||
except Exception:
|
||
return f"{public_base}/{bucket}/{obj}"
|
||
except Exception:
|
||
return None
|
||
def _resolve_path(pure: str) -> Optional[Path]:
|
||
q = pure.replace("\\", "/")
|
||
if q.startswith("/"):
|
||
try:
|
||
rel = q.lstrip("/")
|
||
base = (search_root or base_dir)
|
||
p0 = (base / rel).resolve()
|
||
except Exception:
|
||
p0 = (search_root or base_dir) / q.lstrip("/")
|
||
if p0.exists():
|
||
return p0
|
||
try:
|
||
p = (base_dir / q).resolve()
|
||
except Exception:
|
||
p = (base_dir / q)
|
||
if p.exists():
|
||
return p
|
||
try:
|
||
name = Path(q).name
|
||
search = (search_root or base_dir)
|
||
for hit in search.rglob(name):
|
||
if hit.exists():
|
||
return hit
|
||
except Exception:
|
||
pass
|
||
return None
|
||
def _replace_md(m: re.Match) -> str:
|
||
full = m.group(0)
|
||
urlpart = m.group(1).strip()
|
||
if urlpart.startswith("data:"):
|
||
new = _upload_data_uri(urlpart)
|
||
if new:
|
||
mappings.append({"from": "data_uri", "to": new, "ok": True, "type": "md_image_data"})
|
||
return full.replace(urlpart, new)
|
||
mappings.append({"from": "data_uri", "to": None, "ok": False, "type": "md_image_data"})
|
||
return full
|
||
if urlpart.startswith("http://") or urlpart.startswith("https://"):
|
||
return full
|
||
s = urlpart
|
||
pure = s
|
||
tail = ""
|
||
if s.startswith("<"):
|
||
gt = s.find(">")
|
||
if gt != -1:
|
||
pure = s[1:gt].strip()
|
||
tail = s[gt+1:]
|
||
else:
|
||
dq = s.find('"')
|
||
sq = s.find("'")
|
||
qpos = -1
|
||
if dq != -1 and sq != -1:
|
||
qpos = dq if dq < sq else sq
|
||
elif dq != -1:
|
||
qpos = dq
|
||
elif sq != -1:
|
||
qpos = sq
|
||
if qpos != -1:
|
||
pure = s[:qpos].rstrip()
|
||
tail = s[qpos:]
|
||
p = _resolve_path(pure)
|
||
if not p or not p.exists():
|
||
mappings.append({"from": pure, "to": None, "ok": False, "type": "md_link"})
|
||
return full
|
||
new = _upload(p)
|
||
if not new:
|
||
mappings.append({"from": pure, "to": None, "ok": False, "type": "md_link"})
|
||
return full
|
||
mappings.append({"from": pure, "to": new, "ok": True, "type": "md_link"})
|
||
return full.replace(urlpart, f"{new}{tail}")
|
||
text = re.sub(r"!\[[^\]]*\]\(([^)]+)\)", _replace_md, text)
|
||
def _replace_mdlink(m: re.Match) -> str:
|
||
full = m.group(0)
|
||
urlpart = m.group(1).strip()
|
||
if urlpart.startswith("http://") or urlpart.startswith("https://") or urlpart.startswith("data:"):
|
||
return full
|
||
s = urlpart
|
||
pure = s
|
||
tail = ""
|
||
if s.startswith("<"):
|
||
gt = s.find(">")
|
||
if gt != -1:
|
||
pure = s[1:gt].strip()
|
||
tail = s[gt+1:]
|
||
else:
|
||
dq = s.find('"')
|
||
sq = s.find("'")
|
||
qpos = -1
|
||
if dq != -1 and sq != -1:
|
||
qpos = dq if dq < sq else sq
|
||
elif dq != -1:
|
||
qpos = dq
|
||
elif sq != -1:
|
||
qpos = sq
|
||
if qpos != -1:
|
||
pure = s[:qpos].rstrip()
|
||
tail = s[qpos:]
|
||
p = _resolve_path(pure)
|
||
if not p or not p.exists():
|
||
mappings.append({"from": pure, "to": None, "ok": False, "type": "md_link"})
|
||
return full
|
||
new = _upload(p)
|
||
if not new:
|
||
mappings.append({"from": pure, "to": None, "ok": False, "type": "md_link"})
|
||
return full
|
||
mappings.append({"from": pure, "to": new, "ok": True, "type": "md_link"})
|
||
return full.replace(urlpart, f"{new}{tail}")
|
||
text = re.sub(r"(?<!!)\[[^\]]*\]\(([^)]+)\)", _replace_mdlink, text)
|
||
def _replace_img(m: re.Match) -> str:
|
||
src = m.group(1).strip()
|
||
if src.startswith("data:"):
|
||
new = _upload_data_uri(src)
|
||
if new:
|
||
mappings.append({"from": "data_uri", "to": new, "ok": True, "type": "html_img_data"})
|
||
return m.group(0).replace(src, new)
|
||
mappings.append({"from": "data_uri", "to": None, "ok": False, "type": "html_img_data"})
|
||
return m.group(0)
|
||
if src.startswith("http://") or src.startswith("https://"):
|
||
return m.group(0)
|
||
pure = src
|
||
p = _resolve_path(pure)
|
||
if not p or not p.exists():
|
||
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_img"})
|
||
return m.group(0)
|
||
new = _upload(p)
|
||
if not new:
|
||
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_img"})
|
||
return m.group(0)
|
||
mappings.append({"from": pure, "to": new, "ok": True, "type": "html_img"})
|
||
return m.group(0).replace(src, new)
|
||
text = re.sub(r"<img[^>]+src=\"([^\"]+)\"", _replace_img, text)
|
||
text = re.sub(r"<img[^>]+src='([^']+)'", _replace_img, text)
|
||
def _replace_href(m: re.Match) -> str:
|
||
src = m.group(1).strip()
|
||
if src.startswith("http://") or src.startswith("https://") or src.startswith("data:"):
|
||
return m.group(0)
|
||
pure = src
|
||
p = _resolve_path(pure)
|
||
if not p or not p.exists():
|
||
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_href"})
|
||
return m.group(0)
|
||
new = _upload(p)
|
||
if not new:
|
||
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_href"})
|
||
return m.group(0)
|
||
mappings.append({"from": pure, "to": new, "ok": True, "type": "html_href"})
|
||
return m.group(0).replace(src, new)
|
||
text = re.sub(r"<a[^>]+href=\"([^\"]+)\"", _replace_href, text)
|
||
text = re.sub(r"<a[^>]+href='([^']+)'", _replace_href, text)
|
||
def _replace_video(m: re.Match) -> str:
|
||
src = m.group(1).strip()
|
||
if src.startswith("http://") or src.startswith("https://") or src.startswith("data:"):
|
||
return m.group(0)
|
||
pure = src
|
||
p = _resolve_path(pure)
|
||
if not p or not p.exists():
|
||
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_video"})
|
||
return m.group(0)
|
||
new = _upload(p)
|
||
if not new:
|
||
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_video"})
|
||
return m.group(0)
|
||
mappings.append({"from": pure, "to": new, "ok": True, "type": "html_video"})
|
||
return m.group(0).replace(src, new)
|
||
text = re.sub(r"<video[^>]+src=\"([^\"]+)\"", _replace_video, text)
|
||
text = re.sub(r"<video[^>]+src='([^']+)'", _replace_video, text)
|
||
def _replace_audio(m: re.Match) -> str:
|
||
src = m.group(1).strip()
|
||
if src.startswith("http://") or src.startswith("https://") or src.startswith("data:"):
|
||
return m.group(0)
|
||
pure = src
|
||
p = _resolve_path(pure)
|
||
if not p or not p.exists():
|
||
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_audio"})
|
||
return m.group(0)
|
||
new = _upload(p)
|
||
if not new:
|
||
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_audio"})
|
||
return m.group(0)
|
||
mappings.append({"from": pure, "to": new, "ok": True, "type": "html_audio"})
|
||
return m.group(0).replace(src, new)
|
||
text = re.sub(r"<audio[^>]+src=\"([^\"]+)\"", _replace_audio, text)
|
||
text = re.sub(r"<audio[^>]+src='([^']+)'", _replace_audio, text)
|
||
def _replace_source(m: re.Match) -> str:
|
||
src = m.group(1).strip()
|
||
if src.startswith("http://") or src.startswith("https://") or src.startswith("data:"):
|
||
return m.group(0)
|
||
pure = src
|
||
p = _resolve_path(pure)
|
||
if not p or not p.exists():
|
||
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_source"})
|
||
return m.group(0)
|
||
new = _upload(p)
|
||
if not new:
|
||
mappings.append({"from": pure, "to": None, "ok": False, "type": "html_source"})
|
||
return m.group(0)
|
||
mappings.append({"from": pure, "to": new, "ok": True, "type": "html_source"})
|
||
return m.group(0).replace(src, new)
|
||
text = re.sub(r"<source[^>]+src=\"([^\"]+)\"", _replace_source, text)
|
||
text = re.sub(r"<source[^>]+src='([^']+)'", _replace_source, text)
|
||
return text, mappings
|
||
|
||
def _uplift_rel_path(rel: Path, md_dir: Path, root: Optional[Path], mappings: List[Dict[str, str]]) -> Path:
|
||
try:
|
||
parts = list(rel.parts)
|
||
if len(parts) < 2:
|
||
return rel
|
||
exts = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".svg", ".webp"}
|
||
def _is_asset_dir(name: str) -> bool:
|
||
n = name.strip().lower()
|
||
return n in {"image", "images", "img", "imgs", "media", "assets", "pic", "pics", "picture", "pictures", "visio pic", "visio_pic", "visio", "图片", "图像"}
|
||
def _has_asset_sibling() -> bool:
|
||
try:
|
||
for ch in md_dir.iterdir():
|
||
if ch.is_dir() and _is_asset_dir(ch.name):
|
||
for f in ch.rglob("*"):
|
||
if f.is_file() and f.suffix.lower() in exts:
|
||
return True
|
||
for f in md_dir.iterdir():
|
||
if f.is_file() and f.suffix.lower() in exts:
|
||
return True
|
||
except Exception:
|
||
pass
|
||
return False
|
||
def _mappings_indicate_local_assets() -> bool:
|
||
try:
|
||
for m in mappings or []:
|
||
if isinstance(m.get("from"), str):
|
||
s = str(m.get("from") or "").strip()
|
||
if s and not (s.startswith("http://") or s.startswith("https://") or s.startswith("data:") or s.startswith("file://")):
|
||
return True
|
||
except Exception:
|
||
pass
|
||
return False
|
||
try:
|
||
if len(parts) >= 2:
|
||
new_parts = parts[:-2] + [parts[-1]]
|
||
return Path("/".join(new_parts))
|
||
except Exception:
|
||
pass
|
||
return rel
|
||
except Exception:
|
||
return rel
|
||
|
||
def _inject_image_urls_for_markers(text: str, urls: List[str]) -> str:
|
||
if not urls:
|
||
return text
|
||
out = []
|
||
i = 0
|
||
for line in text.splitlines():
|
||
if "<!-- image -->" in line and i < len(urls):
|
||
line = line.replace("<!-- image -->", f"")
|
||
i += 1
|
||
out.append(line)
|
||
return "\n".join(out)
|
||
|
||
def _extract_pdf_images(pdf_path: Path) -> List[Tuple[str, bytes]]:
|
||
imgs: List[Tuple[str, bytes]] = []
|
||
if fitz is None:
|
||
return imgs
|
||
try:
|
||
doc = fitz.open(pdf_path)
|
||
for page in doc:
|
||
for xref in page.get_images(full=True):
|
||
try:
|
||
info = doc.extract_image(xref[0])
|
||
ext = info.get("ext", "png")
|
||
data = info.get("image", b"")
|
||
if data:
|
||
imgs.append((ext, data))
|
||
except Exception:
|
||
continue
|
||
doc.close()
|
||
except Exception:
|
||
pass
|
||
return imgs
|
||
|
||
def _bulk_upload_assets(root: Path, client: object, bucket: str, public_base: str, prefix: str) -> List[str]:
|
||
urls: List[str] = []
|
||
exts = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".svg", ".webp", ".tif", ".tiff", ".ico", ".jfif", ".heic", ".heif", ".emf", ".wmf", ".eps", ".psd"}
|
||
for f in root.rglob("*"):
|
||
try:
|
||
if not f.is_file():
|
||
continue
|
||
if f.suffix.lower() not in exts:
|
||
continue
|
||
data = f.read_bytes()
|
||
mime = detect_mime(f.name, data)
|
||
k = f.resolve().as_posix().lstrip("/").replace(":", "")
|
||
obj = join_prefix(prefix, f"abs/{k}")
|
||
bio = io.BytesIO(data)
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(data), content_type=mime) # type: ignore
|
||
urls.append(f"{public_base}/{bucket}/{obj}")
|
||
except Exception:
|
||
pass
|
||
return urls
|
||
|
||
@app.post("/md/convert-folder")
|
||
async def md_convert_folder(folder_path: str = Form(...), prefix: Optional[str] = Form(None)):
|
||
p = Path(folder_path).expanduser().resolve()
|
||
if not p.exists() or not p.is_dir():
|
||
raise HTTPException(status_code=400, detail="folder_path must be an existing directory")
|
||
client, bucket, public_base, env_prefix = minio_current(RUNTIME_CONFIG)
|
||
if client is None or bucket is None or not public_base:
|
||
raise HTTPException(status_code=400, detail="MinIO is not configured")
|
||
use_prefix = (prefix or env_prefix or "").strip()
|
||
processed: List[Dict[str, str]] = []
|
||
try:
|
||
_bulk_upload_assets(p, client, bucket, public_base, use_prefix)
|
||
except Exception:
|
||
pass
|
||
for md_file in p.rglob("*.md"):
|
||
rel_md = md_file.relative_to(p)
|
||
rel_uplift_path = rel_md
|
||
minio_url: Optional[str] = None
|
||
minio_presigned_url: Optional[str] = None
|
||
mappings: List[Dict[str, str]] = []
|
||
try:
|
||
content = md_file.read_text("utf-8", errors="ignore")
|
||
new_text, mappings = _rewrite_md_assets_to_minio(content, md_file.parent, client, bucket, public_base, use_prefix, search_root=p)
|
||
rel_uplift_path = _uplift_rel_path(rel_md, md_file.parent, p, mappings)
|
||
# upload rewritten md to MinIO
|
||
obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift_path.as_posix()}".lstrip("/")
|
||
raw = new_text.encode("utf-8")
|
||
bio = io.BytesIO(raw)
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type="text/markdown; charset=utf-8") # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
||
except Exception:
|
||
minio_url = f"{public_base}/{bucket}/{obj}"
|
||
minio_url_display = unquote(minio_url)
|
||
minio_url_display = unquote(minio_url)
|
||
try:
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
minio_presigned_url = presigned_read(client, bucket, obj, exp) if client is not None else None
|
||
except Exception:
|
||
minio_presigned_url = None
|
||
except Exception as e:
|
||
logging.error(str(e))
|
||
okc = sum(1 for m in mappings if m.get("ok"))
|
||
frc = sum(1 for m in mappings if not m.get("ok"))
|
||
asset_urls = [m.get("to") for m in mappings if m.get("ok") and m.get("to")]
|
||
processed.append({
|
||
"source": rel_uplift_path.as_posix(),
|
||
"output": None,
|
||
"minio_url": minio_url,
|
||
"minio_presigned_url": minio_presigned_url,
|
||
"mappings": mappings,
|
||
"asset_ok": okc,
|
||
"asset_fail": frc,
|
||
"asset_urls": asset_urls
|
||
})
|
||
return {"ok": True, "count": len(processed), "files": processed}
|
||
|
||
@app.post("/md/upload-folder")
|
||
async def md_upload_folder(folder_files: List[UploadFile] = File(None), folder_paths: List[str] = Form(None), prefix: Optional[str] = Form(None)):
|
||
if not folder_files or not folder_paths or len(folder_files) != len(folder_paths):
|
||
raise HTTPException(status_code=400, detail="folder_files and folder_paths are required and must match in length")
|
||
client, bucket, public_base, env_prefix = minio_current(RUNTIME_CONFIG)
|
||
if client is None or bucket is None or not public_base:
|
||
raise HTTPException(status_code=400, detail="MinIO is not configured")
|
||
use_prefix = (prefix or env_prefix or "").strip()
|
||
staging = Path(tempfile.mkdtemp(prefix="folder_stage_"))
|
||
try:
|
||
for f, rel in zip(folder_files, folder_paths):
|
||
rel_norm = rel.replace("\\", "/")
|
||
dest = staging / rel_norm
|
||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||
dest.write_bytes(await f.read())
|
||
base = staging
|
||
try:
|
||
_bulk_upload_assets(base, client, bucket, public_base, use_prefix)
|
||
except Exception:
|
||
pass
|
||
processed: List[Dict[str, str]] = []
|
||
for md_file in base.rglob("*.md"):
|
||
try:
|
||
content = md_file.read_text("utf-8", errors="ignore")
|
||
new_text, mappings = _rewrite_md_assets_to_minio(content, md_file.parent, client, bucket, public_base, use_prefix, search_root=base)
|
||
rel_md = md_file.relative_to(base)
|
||
rel_uplift = _uplift_rel_path(rel_md, md_file.parent, base, mappings)
|
||
try:
|
||
obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/")
|
||
bio = io.BytesIO(new_text.encode("utf-8"))
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(new_text.encode("utf-8")), content_type="text/markdown; charset=utf-8") # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
||
except Exception:
|
||
minio_url = f"{public_base}/{bucket}/{obj}"
|
||
minio_presigned_url = None
|
||
try:
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
minio_presigned_url = presigned_read(client, bucket, obj, exp) if client is not None else None
|
||
except Exception:
|
||
minio_presigned_url = None
|
||
except Exception:
|
||
minio_url = None
|
||
minio_presigned_url = None
|
||
okc = sum(1 for m in mappings if m.get("ok"))
|
||
frc = sum(1 for m in mappings if not m.get("ok"))
|
||
asset_urls = [m.get("to") for m in mappings if m.get("ok") and m.get("to")]
|
||
processed.append({
|
||
"source": rel_uplift.as_posix(),
|
||
"output": None,
|
||
"minio_url": minio_url,
|
||
"minio_presigned_url": minio_presigned_url,
|
||
"mappings": mappings,
|
||
"asset_ok": okc,
|
||
"asset_fail": frc,
|
||
"asset_urls": asset_urls
|
||
})
|
||
except Exception as e:
|
||
logging.error(str(e))
|
||
return {"ok": True, "count": len(processed), "files": processed}
|
||
finally:
|
||
try:
|
||
shutil.rmtree(staging)
|
||
except Exception:
|
||
pass
|
||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||
|
||
def _is_debug(request: Request) -> bool:
|
||
try:
|
||
q = request.query_params.get("debug")
|
||
if q and str(q).lower() in ("1", "true", "yes", "on"):
|
||
return True
|
||
except Exception:
|
||
pass
|
||
h = request.headers.get("X-Debug")
|
||
if h and str(h).lower() in ("1", "true", "yes", "on"):
|
||
return True
|
||
env = os.environ.get("APP_DEBUG")
|
||
if env and str(env).lower() in ("1", "true", "yes", "on"):
|
||
return True
|
||
return False
|
||
|
||
@app.middleware("http")
|
||
async def logging_middleware(request: Request, call_next):
|
||
start = time.time()
|
||
try:
|
||
response = await call_next(request)
|
||
duration = int((time.time() - start) * 1000)
|
||
logging.info(f"{request.method} {request.url.path} -> {response.status_code} {duration}ms")
|
||
return response
|
||
except Exception as exc:
|
||
duration = int((time.time() - start) * 1000)
|
||
tb = "".join(traceback.format_exception(type(exc), exc, exc.__traceback__))
|
||
logging.error(f"{request.method} {request.url.path} FAILED {duration}ms: {exc}\n{tb}")
|
||
raise
|
||
|
||
@app.exception_handler(HTTPException)
|
||
async def http_exception_handler(request: Request, exc: HTTPException):
|
||
tb = "".join(traceback.format_exception(type(exc), exc, exc.__traceback__))
|
||
logging.error(f"HTTP error on {request.method} {request.url.path}: {exc}\n{tb}")
|
||
debug = _is_debug(request)
|
||
body = {"error": "http_error", "detail": exc.detail}
|
||
if debug:
|
||
body["trace"] = tb
|
||
return JSONResponse(status_code=exc.status_code, content=body)
|
||
|
||
@app.exception_handler(Exception)
|
||
async def global_exception_handler(request: Request, exc: Exception):
|
||
tb = "".join(traceback.format_exception(type(exc), exc, exc.__traceback__))
|
||
logging.error(f"Unhandled error on {request.method} {request.url.path}: {exc}\n{tb}")
|
||
debug = _is_debug(request)
|
||
body = {"error": "internal_error", "detail": str(exc)}
|
||
if debug:
|
||
body["trace"] = tb
|
||
return JSONResponse(status_code=500, content=body)
|
||
def _safe_http_url(u: str) -> str:
|
||
try:
|
||
parts = urlsplit(u)
|
||
path = quote(parts.path, safe="/:%")
|
||
query = quote(parts.query, safe="=&%")
|
||
frag = quote(parts.fragment, safe="")
|
||
netloc = parts.netloc
|
||
try:
|
||
userinfo = ''
|
||
hostport = netloc
|
||
if '@' in netloc:
|
||
userinfo, hostport = netloc.split('@', 1)
|
||
userinfo += '@'
|
||
if hostport.startswith('['):
|
||
netloc = userinfo + hostport
|
||
else:
|
||
port = ''
|
||
host = hostport
|
||
if ':' in hostport:
|
||
host, port = hostport.rsplit(':', 1)
|
||
if port and not port.isdigit():
|
||
host = hostport
|
||
port = ''
|
||
try:
|
||
host_idna = host.encode('idna').decode('ascii')
|
||
except Exception:
|
||
host_idna = host
|
||
netloc = f"{userinfo}{host_idna}{(':' + port) if port else ''}"
|
||
except Exception:
|
||
pass
|
||
return urlunsplit((parts.scheme, netloc, path, query, frag))
|
||
except Exception:
|
||
return u
|
||
|
||
# ──────────────────────────────────────────────────────────────────────────────
|
||
# API v2 endpoints with standard code/msg/data
|
||
# ──────────────────────────────────────────────────────────────────────────────
|
||
|
||
_converter_v2 = FormatConverter()
|
||
|
||
def _ok(data: dict, msg: str = "ok"):
|
||
return JSONResponse({"code": 0, "msg": msg, "data": data})
|
||
|
||
def _err(msg: str, code: int = 500, detail: object = None):
|
||
payload = {"code": code, "msg": msg, "data": None}
|
||
if detail is not None:
|
||
payload["detail"] = detail
|
||
return JSONResponse(payload, status_code=200)
|
||
|
||
@app.post("/api/convert")
|
||
async def api_convert(
|
||
file: Optional[UploadFile] = File(None),
|
||
source_url: Optional[str] = Form(None),
|
||
export: str = Form("markdown"),
|
||
engine: Optional[str] = Form(None),
|
||
save: Optional[bool] = Form(False),
|
||
filename: Optional[str] = Form(None),
|
||
):
|
||
try:
|
||
if (file is None and not source_url) or (file is not None and source_url):
|
||
return _err("参数错误:file 与 source_url 二选一")
|
||
export = _normalize_export(export)
|
||
engine = _normalize_engine(engine)
|
||
if source_url:
|
||
enc, content, artifacts_dir = await asyncio.to_thread(_converter_v2.convert, source_url, export=export, engine=engine)
|
||
base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(source_url, None))
|
||
out_ext = _export_ext(export)
|
||
ct = _media_type(export)
|
||
mappings: list[dict[str, str]] = []
|
||
trace: List[str] = []
|
||
trace.append(f"source_url={source_url}")
|
||
trace.append(f"export={export}")
|
||
if artifacts_dir:
|
||
trace.append(f"artifacts_dir={artifacts_dir}")
|
||
if export.lower() == "markdown":
|
||
try:
|
||
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
||
if client is not None and bucket and public_base:
|
||
trace.append(f"minio bucket={bucket} public={public_base} prefix={(prefix or '').strip('/')}")
|
||
base_dir = Path(artifacts_dir) if artifacts_dir else Path(tempfile.mkdtemp(prefix="md_assets_"))
|
||
new_text, ms = _rewrite_md_assets_to_minio(content, base_dir, client, bucket, public_base, prefix, search_root=(Path(artifacts_dir) if artifacts_dir else None))
|
||
urls: List[str] = []
|
||
if artifacts_dir:
|
||
try:
|
||
urls = _bulk_upload_assets(Path(artifacts_dir), client, bucket, public_base, prefix)
|
||
except Exception:
|
||
urls = []
|
||
trace.append(f"asset_urls={len(urls)}")
|
||
try:
|
||
if source_url:
|
||
src_path: Optional[Path] = None
|
||
if source_url.startswith('file://') or Path(source_url).exists():
|
||
src_path = Path(source_url.replace('file://', ''))
|
||
elif source_url.startswith('http://') or source_url.startswith('https://'):
|
||
import tempfile as _tf
|
||
from urllib.request import urlopen
|
||
with _tf.NamedTemporaryFile(delete=False, suffix=Path(infer_basename(source_url, None)).suffix or '.bin') as _tmp:
|
||
try:
|
||
with urlopen(source_url) as resp:
|
||
_tmp.write(resp.read())
|
||
finally:
|
||
_tmp.flush(); _tmp.close()
|
||
src_path = Path(_tmp.name)
|
||
if src_path and src_path.exists() and str(src_path).lower().endswith('.pdf'):
|
||
pdf_imgs = _extract_pdf_images(src_path)
|
||
base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(source_url, None))
|
||
extra_urls: List[str] = []
|
||
for idx, (img_ext, data) in enumerate(pdf_imgs):
|
||
obj = join_prefix(prefix, f"converted/{base}_img_{idx}.{img_ext}")
|
||
bio = io.BytesIO(data)
|
||
mime = "image/png" if img_ext.lower() == "png" else "image/jpeg"
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(data), content_type=mime) # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
obj_enc = _quote(obj, safe="/")
|
||
extra_urls.append(f"{public_base}/{bucket}/{obj_enc}")
|
||
except Exception:
|
||
extra_urls.append(f"{public_base}/{bucket}/{obj}")
|
||
urls.extend(extra_urls)
|
||
trace.append(f"pdf_imgs_uploaded={len(extra_urls)}")
|
||
if source_url.startswith('http://') or source_url.startswith('https://'):
|
||
try:
|
||
os.unlink(str(src_path))
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
pass
|
||
before = new_text.count("<!-- image -->")
|
||
new_text = _inject_image_urls_for_markers(new_text, urls)
|
||
after = new_text.count("<!-- image -->")
|
||
trace.append(f"image_placeholders_before={before} after={after}")
|
||
content = new_text
|
||
mappings = ms
|
||
except Exception:
|
||
pass
|
||
if not save:
|
||
resp = _ok({"encoding": enc, "content": content, "name": f"{base}{out_ext}", "media_type": ct, "mappings": mappings, "trace": trace})
|
||
try:
|
||
if artifacts_dir:
|
||
shutil.rmtree(artifacts_dir, ignore_errors=True)
|
||
except Exception:
|
||
pass
|
||
return resp
|
||
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
||
if client is None or not bucket or not public_base:
|
||
return _err("MinIO 未配置,无法保存")
|
||
out_name = f"{base}{out_ext}"
|
||
if export.lower() == "markdown" and not out_name.lower().endswith(".md"):
|
||
out_name = f"{base}.md"
|
||
obj = join_prefix(prefix, f"converted/{out_name}")
|
||
bio = io.BytesIO(content.encode("utf-8"))
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(content.encode("utf-8")), content_type=ct) # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
||
except Exception:
|
||
minio_url = f"{public_base}/{bucket}/{obj}"
|
||
minio_url_display = unquote(minio_url)
|
||
try:
|
||
trace.append(f"save out_name={out_name}")
|
||
trace.append(f"save obj={obj}")
|
||
trace.append(f"save minio_url={minio_url}")
|
||
except Exception:
|
||
pass
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
minio_presigned_url = presigned_read(client, bucket, obj, exp)
|
||
resp = _ok({
|
||
"encoding": enc,
|
||
"name": out_name,
|
||
"media_type": ct,
|
||
"minio_url": minio_url,
|
||
"minio_presigned_url": minio_presigned_url,
|
||
"minio_url_display": minio_url_display,
|
||
"mappings": mappings,
|
||
"trace": trace,
|
||
})
|
||
try:
|
||
if artifacts_dir:
|
||
shutil.rmtree(artifacts_dir, ignore_errors=True)
|
||
except Exception:
|
||
pass
|
||
return resp
|
||
assert file is not None
|
||
suffix = ""
|
||
if file.filename and "." in file.filename:
|
||
suffix = "." + file.filename.rsplit(".", 1)[-1]
|
||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||
tmp.write(await file.read())
|
||
tmp_path = tmp.name
|
||
try:
|
||
enc, content, artifacts_dir = await asyncio.to_thread(_converter_v2.convert, tmp_path, export=export, engine=engine)
|
||
base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(None, file.filename))
|
||
out_ext = _export_ext(export)
|
||
ct = _media_type(export)
|
||
mappings: list[dict[str, str]] = []
|
||
trace: List[str] = []
|
||
trace.append(f"file={file.filename}")
|
||
trace.append(f"tmp_path={tmp_path}")
|
||
trace.append(f"export={export}")
|
||
if artifacts_dir:
|
||
trace.append(f"artifacts_dir={artifacts_dir}")
|
||
if export.lower() == "markdown":
|
||
try:
|
||
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
||
if client is not None and bucket and public_base:
|
||
trace.append(f"minio bucket={bucket} public={public_base} prefix={(prefix or '').strip('/')}")
|
||
base_dir = Path(artifacts_dir) if artifacts_dir else Path(tempfile.mkdtemp(prefix="md_assets_"))
|
||
new_text, ms = _rewrite_md_assets_to_minio(content, base_dir, client, bucket, public_base, prefix, search_root=(Path(artifacts_dir) if artifacts_dir else None))
|
||
urls: List[str] = []
|
||
if artifacts_dir:
|
||
try:
|
||
urls = _bulk_upload_assets(Path(artifacts_dir), client, bucket, public_base, prefix)
|
||
except Exception:
|
||
urls = []
|
||
trace.append(f"asset_urls={len(urls)}")
|
||
try:
|
||
if tmp_path and tmp_path.exists() and str(tmp_path).lower().endswith('.pdf'):
|
||
pdf_imgs = _extract_pdf_images(tmp_path)
|
||
base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(None, file.filename))
|
||
extra_urls: List[str] = []
|
||
for idx, (img_ext, data) in enumerate(pdf_imgs):
|
||
obj = join_prefix(prefix, f"converted/{base}_img_{idx}.{img_ext}")
|
||
bio = io.BytesIO(data)
|
||
mime = "image/png" if img_ext.lower() == "png" else "image/jpeg"
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(data), content_type=mime) # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
extra_urls.append(f"{public_base}/{bucket}/{_quote(obj, safe='/')}")
|
||
except Exception:
|
||
extra_urls.append(f"{public_base}/{bucket}/{obj}")
|
||
urls.extend(extra_urls)
|
||
trace.append(f"pdf_imgs_uploaded={len(extra_urls)}")
|
||
except Exception:
|
||
pass
|
||
before = new_text.count("<!-- image -->")
|
||
new_text = _inject_image_urls_for_markers(new_text, urls)
|
||
after = new_text.count("<!-- image -->")
|
||
trace.append(f"image_placeholders_before={before} after={after}")
|
||
content = new_text
|
||
mappings = ms
|
||
except Exception:
|
||
pass
|
||
if not save:
|
||
resp = _ok({"encoding": enc, "content": content, "name": f"{base}{out_ext}", "media_type": ct, "mappings": mappings, "trace": trace})
|
||
try:
|
||
if artifacts_dir:
|
||
shutil.rmtree(artifacts_dir, ignore_errors=True)
|
||
except Exception:
|
||
pass
|
||
return resp
|
||
client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG)
|
||
if client is None or not bucket or not public_base:
|
||
return _err("MinIO 未配置,无法保存")
|
||
out_name = f"{base}{out_ext}"
|
||
if export.lower() == "markdown" and not out_name.lower().endswith(".md"):
|
||
out_name = f"{base}.md"
|
||
obj = join_prefix(prefix, f"converted/{out_name}")
|
||
bio = io.BytesIO(content.encode("utf-8"))
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(content.encode("utf-8")), content_type=ct) # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
||
except Exception:
|
||
minio_url = f"{public_base}/{bucket}/{obj}"
|
||
minio_url_display = unquote(minio_url)
|
||
try:
|
||
trace.append(f"save out_name={out_name}")
|
||
trace.append(f"save obj={obj}")
|
||
trace.append(f"save minio_url={minio_url}")
|
||
except Exception:
|
||
pass
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
minio_presigned_url = presigned_read(client, bucket, obj, exp)
|
||
resp = _ok({
|
||
"encoding": enc,
|
||
"name": out_name,
|
||
"media_type": ct,
|
||
"minio_url": minio_url,
|
||
"minio_presigned_url": minio_presigned_url,
|
||
"minio_url_display": minio_url_display,
|
||
"mappings": mappings,
|
||
"trace": trace,
|
||
})
|
||
try:
|
||
if artifacts_dir:
|
||
shutil.rmtree(artifacts_dir, ignore_errors=True)
|
||
except Exception:
|
||
pass
|
||
return resp
|
||
finally:
|
||
try:
|
||
os.remove(tmp_path)
|
||
except Exception:
|
||
pass
|
||
except HTTPException as e:
|
||
return _err(str(e.detail), 400)
|
||
except Exception as e:
|
||
return _err(str(e))
|
||
|
||
@app.post("/api/import/convert")
|
||
async def api_import_convert(json_file: UploadFile = File(None), json_text: Optional[str] = Form(None), path: Optional[str] = Form(None), versionId: Optional[int] = Form(1001), download: Optional[bool] = Form(False)):
|
||
try:
|
||
raw_text: Optional[str] = None
|
||
if json_file is not None:
|
||
raw = await json_file.read()
|
||
raw_text = raw.decode("utf-8", errors="ignore")
|
||
elif json_text:
|
||
raw_text = json_text
|
||
else:
|
||
use_path = (path or "import.json").strip()
|
||
p = Path(use_path).expanduser()
|
||
if not p.exists():
|
||
return _err(f"未找到文件: {use_path}")
|
||
raw_text = p.read_text("utf-8", errors="ignore")
|
||
import json as _json
|
||
data = _json.loads(raw_text or "{}")
|
||
files = data.get("files", [])
|
||
if not isinstance(files, list):
|
||
return _err("JSON结构不合法:缺少 files 数组")
|
||
imp = _build_import_tree(files, int(versionId or 1001))
|
||
if download:
|
||
from fastapi.responses import StreamingResponse
|
||
b = _json.dumps(imp, ensure_ascii=False, indent=2).encode("utf-8")
|
||
return StreamingResponse(io.BytesIO(b), media_type="application/json; charset=utf-8", headers={"Content-Disposition": "attachment; filename=import.json"})
|
||
return _ok({"import": imp})
|
||
except Exception as e:
|
||
return _err(str(e))
|
||
|
||
@app.post("/api/upload-archive")
|
||
async def api_upload_archive(file: UploadFile = File(...), prefix: Optional[str] = Form(None)):
|
||
try:
|
||
client, bucket, public_base, env_prefix = minio_current(RUNTIME_CONFIG)
|
||
if client is None or bucket is None or not public_base:
|
||
return _err("MinIO 未配置")
|
||
use_prefix = (prefix or env_prefix or "").strip()
|
||
suffix = (file.filename or "").lower()
|
||
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
|
||
data = await file.read()
|
||
tmp.write(data)
|
||
tmp.flush(); tmp.close()
|
||
root = Path(tempfile.mkdtemp(prefix="extract_"))
|
||
try:
|
||
if suffix.endswith(".zip"):
|
||
import zipfile
|
||
with zipfile.ZipFile(tmp.name, "r") as zf:
|
||
_zip_extract_safely(zf, root)
|
||
elif ".tar" in suffix or suffix.endswith(".tgz") or suffix.endswith(".tar.gz") or suffix.endswith(".tar.bz2") or suffix.endswith(".tar.xz"):
|
||
import tarfile
|
||
with tarfile.open(tmp.name, "r:*") as tf:
|
||
_tar_extract_safely(tf, root)
|
||
else:
|
||
return _err("不支持的压缩格式")
|
||
try:
|
||
_bulk_upload_assets(root, client, bucket, public_base, use_prefix)
|
||
except Exception:
|
||
pass
|
||
files = []
|
||
# Process Markdown files as-is
|
||
for md in root.rglob("*.md"):
|
||
try:
|
||
text = md.read_text("utf-8", errors="ignore")
|
||
new_text, mappings = _rewrite_md_assets_to_minio(text, md.parent, client, bucket, public_base, use_prefix, search_root=root)
|
||
rel_md = md.relative_to(root)
|
||
rel_uplift = _uplift_rel_path(rel_md, md.parent, root, mappings)
|
||
obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/")
|
||
bio = io.BytesIO(new_text.encode("utf-8"))
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(new_text.encode("utf-8")), content_type="text/markdown; charset=utf-8") # type: ignore
|
||
try:
|
||
url = f"{public_base}/{bucket}/{quote(obj, safe='/')}"
|
||
except Exception:
|
||
url = f"{public_base}/{bucket}/{obj}"
|
||
url_display = unquote(url)
|
||
url_display = unquote(url)
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
ps = presigned_read(client, bucket, obj, exp) if client is not None else None
|
||
raw = new_text.encode("utf-8")
|
||
files.append({
|
||
"source": rel_uplift.as_posix(),
|
||
"minio_url": url,
|
||
"minio_presigned_url": ps,
|
||
"minio_url_display": url_display,
|
||
"mappings": mappings,
|
||
"object_name": obj,
|
||
"size": len(raw),
|
||
})
|
||
except Exception:
|
||
files.append({"source": (md.relative_to(root).as_posix()), "minio_url": None, "minio_presigned_url": None, "mappings": [], "object_name": None, "size": 0})
|
||
|
||
# Convert HTML files to Markdown and process similarly
|
||
for html in [p for p in root.rglob("*") if p.is_file() and p.suffix.lower() in {".html", ".htm"}]:
|
||
try:
|
||
# Skip if a sibling Markdown already exists for the same base name
|
||
rel_html = html.relative_to(root)
|
||
md_target_rel = rel_html.with_suffix(".md")
|
||
md_sibling = (root / md_target_rel).exists()
|
||
if md_sibling:
|
||
continue
|
||
html_src = html.read_text("utf-8", errors="ignore")
|
||
html_rew, mappings = _rewrite_md_assets_to_minio(html_src, html.parent, client, bucket, public_base, use_prefix, search_root=root)
|
||
tmpd = Path(tempfile.mkdtemp(prefix="rew_html_"))
|
||
tmpf = tmpd / html.name
|
||
tmpf.write_text(html_rew, "utf-8")
|
||
enc, md_text, _art = _converter_v2.convert(str(tmpf), export="markdown")
|
||
md_text2, mappings2 = _rewrite_md_assets_to_minio(md_text, html.parent, client, bucket, public_base, use_prefix, search_root=root)
|
||
mappings = (mappings or []) + (mappings2 or [])
|
||
new_text = md_text2
|
||
rel_uplift = _uplift_rel_path(md_target_rel, html.parent, root, mappings)
|
||
obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/")
|
||
raw = new_text.encode(enc or "utf-8")
|
||
bio = io.BytesIO(raw)
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type="text/markdown; charset=utf-8") # type: ignore
|
||
try:
|
||
url = f"{public_base}/{bucket}/{quote(obj, safe='/')}"
|
||
except Exception:
|
||
url = f"{public_base}/{bucket}/{obj}"
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
ps = presigned_read(client, bucket, obj, exp) if client is not None else None
|
||
files.append({
|
||
"source": rel_uplift.as_posix(),
|
||
"minio_url": url,
|
||
"minio_presigned_url": ps,
|
||
"minio_url_display": url_display,
|
||
"mappings": mappings,
|
||
"object_name": obj,
|
||
"size": len(raw),
|
||
})
|
||
except Exception:
|
||
files.append({"source": (html.relative_to(root).as_posix()), "minio_url": None, "minio_presigned_url": None, "mappings": [], "object_name": None, "size": 0})
|
||
finally:
|
||
try:
|
||
shutil.rmtree(tmpd, ignore_errors=True)
|
||
except Exception:
|
||
pass
|
||
imp = _build_import_tree(files, int(1001))
|
||
return _ok({"count": len(files), "files": files, "import": imp})
|
||
finally:
|
||
try:
|
||
os.unlink(tmp.name)
|
||
except Exception:
|
||
pass
|
||
try:
|
||
shutil.rmtree(root)
|
||
except Exception:
|
||
pass
|
||
except Exception as e:
|
||
return _err(str(e))
|
||
|
||
STAGED_ARCHIVES: Dict[str, Dict[str, object]] = {}
|
||
|
||
def _build_import_tree(processed: List[Dict[str, object]], version_id: int) -> Dict[str, object]:
|
||
def ensure_folder(children: list, name: str) -> Dict[str, object]:
|
||
for n in children:
|
||
if isinstance(n, dict) and n.get("name") == name and n.get("type") == "FOLDER":
|
||
return n
|
||
node = {"name": name, "type": "FOLDER", "children": [], "sortOrder": 100}
|
||
children.append(node)
|
||
return node
|
||
tree: List[Dict[str, object]] = []
|
||
for idx, f in enumerate(processed):
|
||
src = str(f.get("source") or "")
|
||
obj = str(f.get("object_name") or "")
|
||
size = int(f.get("size") or 0)
|
||
parts = [p for p in src.split("/") if p]
|
||
if not parts:
|
||
continue
|
||
cur = tree
|
||
for d in parts[:-1]:
|
||
folder = ensure_folder(cur, d)
|
||
cur = folder.setdefault("children", []) # type: ignore
|
||
fname = parts[-1]
|
||
base = fname.rsplit(".", 1)[0]
|
||
file_node = {"name": base, "type": "FILE", "sortOrder": 100 + idx, "files": [{"languageId": 1, "objectName": obj, "fileName": fname, "fileSize": size}]}
|
||
cur.append(file_node) # type: ignore
|
||
return {"versionId": version_id, "tree": tree}
|
||
|
||
@app.post("/api/archive/stage")
|
||
async def api_archive_stage(file: UploadFile = File(...), prefix: Optional[str] = Form(None)):
|
||
try:
|
||
suffix = (file.filename or "").lower()
|
||
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
|
||
data = await file.read()
|
||
tmp.write(data)
|
||
tmp.flush(); tmp.close()
|
||
sid = uuid.uuid4().hex
|
||
STAGED_ARCHIVES[sid] = {"path": tmp.name, "prefix": (prefix or "")}
|
||
return _ok({"id": sid, "name": file.filename, "size": len(data)})
|
||
except Exception as e:
|
||
return _err(str(e))
|
||
|
||
@app.post("/api/archive/process")
|
||
async def api_archive_process(id: str = Form(...), prefix: Optional[str] = Form(None), versionId: Optional[int] = Form(1001)):
|
||
try:
|
||
st = STAGED_ARCHIVES.get(id)
|
||
if not st:
|
||
return _err("未找到已上传的压缩包")
|
||
tmp_path = Path(str(st.get("path")))
|
||
use_prefix_param = (prefix or str(st.get("prefix") or "")).strip()
|
||
client, bucket, public_base, env_prefix = minio_current(RUNTIME_CONFIG)
|
||
if client is None or bucket is None or not public_base:
|
||
return _err("MinIO 未配置")
|
||
use_prefix = (use_prefix_param or env_prefix or "").strip()
|
||
root = Path(tempfile.mkdtemp(prefix="extract_"))
|
||
try:
|
||
sfx = tmp_path.name.lower()
|
||
if sfx.endswith(".zip"):
|
||
import zipfile
|
||
with zipfile.ZipFile(str(tmp_path), "r") as zf:
|
||
_zip_extract_safely(zf, root)
|
||
elif ".tar" in sfx or sfx.endswith(".tgz") or sfx.endswith(".tar.gz") or sfx.endswith(".tar.bz2") or sfx.endswith(".tar.xz"):
|
||
import tarfile
|
||
with tarfile.open(str(tmp_path), "r:*") as tf:
|
||
_tar_extract_safely(tf, root)
|
||
else:
|
||
return _err("不支持的压缩格式")
|
||
try:
|
||
_bulk_upload_assets(root, client, bucket, public_base, use_prefix)
|
||
except Exception:
|
||
pass
|
||
processed: List[Dict[str, object]] = []
|
||
# Process existing Markdown files
|
||
for md in root.rglob("*.md"):
|
||
try:
|
||
text = md.read_text("utf-8", errors="ignore")
|
||
new_text, mappings = _rewrite_md_assets_to_minio(text, md.parent, client, bucket, public_base, use_prefix, search_root=root)
|
||
rel_md = md.relative_to(root)
|
||
rel_uplift = _uplift_rel_path(rel_md, md.parent, root, mappings)
|
||
obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/")
|
||
raw = new_text.encode("utf-8")
|
||
bio = io.BytesIO(raw)
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type="text/markdown; charset=utf-8") # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
||
except Exception:
|
||
url = f"{public_base}/{bucket}/{obj}"
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
ps = presigned_read(client, bucket, obj, exp) if client is not None else None
|
||
processed.append({"source": rel_uplift.as_posix(), "minio_url": url, "minio_presigned_url": ps, "mappings": mappings, "object_name": obj, "size": len(raw)})
|
||
except Exception:
|
||
processed.append({"source": (md.relative_to(root).as_posix()), "minio_url": None, "minio_presigned_url": None, "mappings": [], "object_name": None, "size": 0})
|
||
|
||
# Convert HTML files to Markdown and process
|
||
for html in [p for p in root.rglob("*") if p.is_file() and p.suffix.lower() in {".html", ".htm"}]:
|
||
try:
|
||
rel_html = html.relative_to(root)
|
||
md_target_rel = rel_html.with_suffix(".md")
|
||
md_sibling = (root / md_target_rel).exists()
|
||
if md_sibling:
|
||
continue
|
||
html_src = html.read_text("utf-8", errors="ignore")
|
||
html_rew, mappings = _rewrite_md_assets_to_minio(html_src, html.parent, client, bucket, public_base, use_prefix, search_root=root)
|
||
tmpd = Path(tempfile.mkdtemp(prefix="rew_html_"))
|
||
tmpf = tmpd / html.name
|
||
tmpf.write_text(html_rew, "utf-8")
|
||
enc, md_text, _art = _converter_v2.convert(str(tmpf), export="markdown")
|
||
md_text2, mappings2 = _rewrite_md_assets_to_minio(md_text, html.parent, client, bucket, public_base, use_prefix, search_root=root)
|
||
mappings = (mappings or []) + (mappings2 or [])
|
||
new_text = md_text2
|
||
rel_uplift = _uplift_rel_path(md_target_rel, html.parent, root, mappings)
|
||
obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/")
|
||
raw = new_text.encode(enc or "utf-8")
|
||
bio = io.BytesIO(raw)
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type="text/markdown; charset=utf-8") # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
||
except Exception:
|
||
url = f"{public_base}/{bucket}/{obj}"
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
ps = presigned_read(client, bucket, obj, exp) if client is not None else None
|
||
processed.append({"source": rel_uplift.as_posix(), "minio_url": url, "minio_presigned_url": ps, "mappings": mappings, "object_name": obj, "size": len(raw)})
|
||
except Exception:
|
||
processed.append({"source": (html.relative_to(root).as_posix()), "minio_url": None, "minio_presigned_url": None, "mappings": [], "object_name": None, "size": 0})
|
||
finally:
|
||
try:
|
||
shutil.rmtree(tmpd, ignore_errors=True)
|
||
except Exception:
|
||
pass
|
||
imp = _build_import_tree(processed, int(versionId or 1001))
|
||
return _ok({"count": len(processed), "files": processed, "import": imp})
|
||
finally:
|
||
try:
|
||
os.unlink(str(tmp_path))
|
||
except Exception:
|
||
pass
|
||
try:
|
||
shutil.rmtree(root)
|
||
except Exception:
|
||
pass
|
||
try:
|
||
STAGED_ARCHIVES.pop(id, None)
|
||
except Exception:
|
||
pass
|
||
except Exception as e:
|
||
return _err(str(e))
|
||
|
||
@app.post("/api/upload-list")
|
||
async def api_upload_list(list_file: UploadFile = File(...), prefix: Optional[str] = Form(None), versionId: Optional[int] = Form(1001)):
|
||
try:
|
||
client, bucket, public_base, env_prefix = minio_current(RUNTIME_CONFIG)
|
||
if client is None or bucket is None or not public_base:
|
||
return _err("MinIO 未配置")
|
||
use_prefix = (prefix or env_prefix or "").strip()
|
||
raw = await list_file.read()
|
||
text = raw.decode("utf-8", errors="ignore")
|
||
lines = [l.strip() for l in text.splitlines()]
|
||
paths: List[str] = [l for l in lines if l and not l.startswith("#")]
|
||
locals: List[Path] = []
|
||
for p in paths:
|
||
if p.startswith("http://") or p.startswith("https://"):
|
||
pass
|
||
else:
|
||
lp = Path(p).expanduser()
|
||
if lp.exists() and lp.is_file():
|
||
locals.append(lp.resolve())
|
||
base_root = None
|
||
try:
|
||
if locals:
|
||
base_root = Path(os.path.commonpath([str(x) for x in locals]))
|
||
except Exception:
|
||
base_root = None
|
||
processed: List[Dict[str, object]] = []
|
||
for p in locals:
|
||
try:
|
||
content = p.read_text("utf-8", errors="ignore")
|
||
new_text, mappings = _rewrite_md_assets_to_minio(content, p.parent, client, bucket, public_base, use_prefix, search_root=base_root)
|
||
rel0 = p.relative_to(base_root) if base_root else Path(p.name)
|
||
rel_uplift = _uplift_rel_path(rel0, p.parent, base_root, mappings)
|
||
obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/")
|
||
raw_md = new_text.encode("utf-8")
|
||
bio = io.BytesIO(raw_md)
|
||
client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw_md), content_type="text/markdown; charset=utf-8") # type: ignore
|
||
try:
|
||
from urllib.parse import quote as _quote
|
||
url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}"
|
||
except Exception:
|
||
url = f"{public_base}/{bucket}/{obj}"
|
||
exp = int(timedelta(hours=12).total_seconds())
|
||
ps = presigned_read(client, bucket, obj, exp) if client is not None else None
|
||
processed.append({"source": rel_uplift.as_posix(), "minio_url": url, "minio_presigned_url": ps, "mappings": mappings, "object_name": obj, "size": len(raw_md)})
|
||
except Exception:
|
||
processed.append({"source": p.name, "minio_url": None, "minio_presigned_url": None, "mappings": [], "object_name": None, "size": 0})
|
||
imp = _build_import_tree(processed, int(versionId or 1001))
|
||
return _ok({"count": len(processed), "files": processed, "import": imp})
|
||
except Exception as e:
|
||
return _err(str(e))
|
||
@app.get("/config/minio/policy")
|
||
async def get_minio_policy(bucket: Optional[str] = None):
|
||
client, cfg_bucket, _, _ = minio_current(RUNTIME_CONFIG)
|
||
if client is None:
|
||
raise HTTPException(status_code=400, detail="MinIO 未配置")
|
||
bkt = (bucket or cfg_bucket or "").strip()
|
||
if not bkt:
|
||
raise HTTPException(status_code=400, detail="bucket 不能为空")
|
||
try:
|
||
pol = client.get_bucket_policy(bucket_name=bkt) # type: ignore
|
||
try:
|
||
import json as _json
|
||
data = _json.loads(pol)
|
||
except Exception:
|
||
data = {"raw": pol}
|
||
return {"ok": True, "bucket": bkt, "policy": data}
|
||
except Exception as e:
|
||
try:
|
||
try:
|
||
region = client._get_region(bkt) # type: ignore
|
||
except Exception:
|
||
region = "us-east-1"
|
||
resp = client._url_open(method="GET", region=region, bucket_name=bkt, query_params={"policy": ""}) # type: ignore
|
||
raw = None
|
||
try:
|
||
raw = getattr(resp, "data", None)
|
||
if raw is not None and hasattr(raw, "decode"):
|
||
raw = raw.decode("utf-8")
|
||
except Exception:
|
||
raw = None
|
||
if raw is None:
|
||
try:
|
||
raw = resp.read().decode("utf-8") # type: ignore
|
||
except Exception:
|
||
raw = ""
|
||
try:
|
||
import json as _json
|
||
data = _json.loads(raw)
|
||
except Exception:
|
||
data = {"raw": raw}
|
||
return {"ok": True, "bucket": bkt, "policy": data}
|
||
except Exception as e2:
|
||
return {"ok": False, "bucket": bkt, "error": str(e2)}
|
||
|
||
@app.post("/config/minio/apply_public_read")
|
||
async def apply_public_read(bucket: Optional[str] = Form(None), enable: Optional[str] = Form("true")):
|
||
client, cfg_bucket, _, _ = minio_current(RUNTIME_CONFIG)
|
||
if client is None:
|
||
raise HTTPException(status_code=400, detail="MinIO 未配置")
|
||
bkt = (bucket or cfg_bucket or "").strip()
|
||
if not bkt:
|
||
raise HTTPException(status_code=400, detail="bucket 不能为空")
|
||
try:
|
||
import json as _json
|
||
if str(enable or "true").lower() in {"1","true","yes","on"}:
|
||
policy = {
|
||
"Version": "2012-10-17",
|
||
"Statement": [
|
||
{"Effect": "Allow", "Principal": "*", "Action": ["s3:GetBucketLocation", "s3:ListBucket"], "Resource": [f"arn:aws:s3:::{bkt}"]},
|
||
{"Effect": "Allow", "Principal": "*", "Action": ["s3:GetObject"], "Resource": [f"arn:aws:s3:::{bkt}/*"]},
|
||
],
|
||
}
|
||
try:
|
||
client.set_bucket_policy(bucket_name=bkt, policy=_json.dumps(policy)) # type: ignore
|
||
return {"ok": True, "bucket": bkt, "applied": True}
|
||
except Exception:
|
||
try:
|
||
try:
|
||
region = client._get_region(bkt) # type: ignore
|
||
except Exception:
|
||
region = "us-east-1"
|
||
raw = _json.dumps(policy).encode("utf-8")
|
||
client._url_open(method="PUT", region=region, bucket_name=bkt, query_params={"policy": ""}, body=raw) # type: ignore
|
||
return {"ok": True, "bucket": bkt, "applied": True}
|
||
except Exception as e2:
|
||
return {"ok": False, "bucket": bkt, "error": str(e2)}
|
||
try:
|
||
client.delete_bucket_policy(bkt) # type: ignore
|
||
except Exception:
|
||
pass
|
||
return {"ok": True, "bucket": bkt, "applied": False}
|
||
except Exception as e:
|
||
return {"ok": False, "bucket": bkt, "error": str(e)}
|