from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request, Query from fastapi.responses import Response, HTMLResponse, JSONResponse, FileResponse from fastapi.staticfiles import StaticFiles from fastapi.middleware.cors import CORSMiddleware from pathlib import Path import tempfile import os import asyncio from typing import Optional, List, Dict, Tuple from datetime import timedelta import mimetypes from urllib.request import urlopen, Request from urllib.error import HTTPError, URLError from urllib.parse import urlsplit, urlunsplit, quote, unquote import logging import traceback import time import re import io import shutil import uuid import subprocess import sys import json try: from minio import Minio # type: ignore import urllib3 # type: ignore except Exception: Minio = None urllib3 = None # type: ignore from pydantic import BaseModel class ConvertResponse(BaseModel): minio_url: Optional[str] minio_presigned_url: Optional[str] name: str media_type: str class MinioPresignResponse(BaseModel): bucket: str object: str minio_url: Optional[str] minio_presigned_url: Optional[str] expires: int try: import fitz # type: ignore except Exception: fitz = None # type: ignore from app.services.docling_adapter import ( convert_source, md_to_docx_bytes, md_to_pdf_bytes_with_renderer, infer_basename, sanitize_filename, load_linkmap, save_linkmap, ) from app.services.unified_converter import FormatConverter from app.services.minio_utils import minio_current, join_prefix, presigned_read from app.services.pdf_converter import ( word_to_pdf_bytes, markdown_to_pdf_bytes, markdown_file_to_pdf_bytes, read_file_content, ) """ @api Server Application @description FastAPI server providing document conversion endpoints and MinIO integration """ app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) try: _ui_dir = Path(__file__).resolve().parents[2] / "frontend" / "dist" if _ui_dir.exists(): app.mount("/ui", StaticFiles(directory=str(_ui_dir), html=True), name="ui") try: assets_dir = _ui_dir / "assets" if assets_dir.exists(): app.mount("/assets", StaticFiles(directory=str(assets_dir)), name="assets") except Exception: pass try: svg_path = _ui_dir / "vite.svg" if svg_path.exists(): @app.get("/vite.svg") def _vite_svg(): return FileResponse(str(svg_path), media_type="image/svg+xml") except Exception: pass except Exception: pass @app.get("/health") def health(): """ @function health @description Health check endpoint @return {"status": "ok"} """ return {"status": "ok"} @app.post("/convert") async def convert( file: Optional[UploadFile] = File(None), source_url: Optional[str] = Form(None), export: str = Form("markdown"), save: Optional[bool] = Form(False), filename: Optional[str] = Form(None), ): """ @function convert @description Convert various document formats to Markdown/HTML/JSON @param file Uploaded file (optional) @param source_url URL of the source document (optional) @param export Target export format (default: markdown) @param save Whether to save to MinIO (default: False) @param filename Custom filename for the output @return JSON response with conversion result or MinIO URL """ if (file is None and not source_url) or (file is not None and source_url): raise HTTPException(status_code=400, detail="provide exactly one of file or source_url") export = _normalize_export(export) if source_url: enc, content, artifacts_dir = await asyncio.to_thread(_converter_v2.convert, source_url, export=export) base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(source_url, None)) out_ext = _export_ext(export) ct = _media_type(export) if export.lower() == "markdown": try: client_rw, bucket_rw, public_rw, prefix_rw = minio_current(RUNTIME_CONFIG) if client_rw is not None and bucket_rw and public_rw: base_dir = Path(artifacts_dir) if artifacts_dir else Path(tempfile.mkdtemp(prefix="md_assets_")) new_text, _ms = _rewrite_md_assets_to_minio( content, base_dir, client_rw, bucket_rw, public_rw, prefix_rw, search_root=(Path(artifacts_dir) if artifacts_dir else None), ) content = new_text try: if artifacts_dir: _bulk_upload_assets(Path(artifacts_dir), client_rw, bucket_rw, public_rw, prefix_rw) except Exception: pass except Exception: pass client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG) if client is None or not bucket or not public_base: raise HTTPException(status_code=400, detail="MinIO is not configured for save") rc_store_final = str(RUNTIME_CONFIG.get("minio", {}).get("store_final") or "true").lower() in {"1","true","yes","on"} if not rc_store_final: raise HTTPException(status_code=400, detail="Saving to MinIO is disabled by configuration") out_name = f"{base}{out_ext}" obj = join_prefix(prefix, f"converted/{out_name}") raw = content.encode(enc or "utf-8") bio = io.BytesIO(raw) client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type=ct) # type: ignore try: from urllib.parse import quote as _quote minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}" except Exception: minio_url = f"{public_base}/{bucket}/{obj}" exp = int(timedelta(hours=12).total_seconds()) minio_presigned_url = presigned_read(client, bucket, obj, exp) resp = JSONResponse({ "minio_url": minio_url, "minio_presigned_url": minio_presigned_url, "name": out_name, "export": export, "media_type": ct }) try: if artifacts_dir: shutil.rmtree(artifacts_dir, ignore_errors=True) except Exception: pass return resp assert file is not None suffix = "" if file.filename and "." in file.filename: suffix = "." + file.filename.rsplit(".", 1)[-1] with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: tmp.write(await file.read()) tmp_path = tmp.name try: enc, content, artifacts_dir = await asyncio.to_thread(_converter_v2.convert, tmp_path, export=export) base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(None, file.filename)) out_ext = _export_ext(export) ct = _media_type(export) if export.lower() == "markdown": try: client_rw, bucket_rw, public_rw, prefix_rw = minio_current(RUNTIME_CONFIG) if client_rw is not None and bucket_rw and public_rw: base_dir = Path(artifacts_dir) if artifacts_dir else Path(tempfile.mkdtemp(prefix="md_assets_")) new_text, _ms = _rewrite_md_assets_to_minio( content, base_dir, client_rw, bucket_rw, public_rw, prefix_rw, search_root=(Path(artifacts_dir) if artifacts_dir else None), ) content = new_text try: if artifacts_dir: _bulk_upload_assets(Path(artifacts_dir), client_rw, bucket_rw, public_rw, prefix_rw) except Exception: pass except Exception: pass client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG) if client is None or not bucket or not public_base: raise HTTPException(status_code=400, detail="MinIO is not configured for save") rc_store_final = str(RUNTIME_CONFIG.get("minio", {}).get("store_final") or "true").lower() in {"1","true","yes","on"} if not rc_store_final: raise HTTPException(status_code=400, detail="Saving to MinIO is disabled by configuration") out_name = f"{base}{out_ext}" obj = join_prefix(prefix, f"converted/{out_name}") raw = content.encode(enc or "utf-8") bio = io.BytesIO(raw) client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type=ct) # type: ignore minio_url = f"{public_base}/{bucket}/{obj}" exp = int(timedelta(hours=12).total_seconds()) minio_presigned_url = presigned_read(client, bucket, obj, exp) resp = JSONResponse({ "minio_url": minio_url, "minio_presigned_url": minio_presigned_url, "name": out_name, "export": export, "media_type": ct }) try: if artifacts_dir: shutil.rmtree(artifacts_dir, ignore_errors=True) except Exception: pass return resp finally: try: os.remove(tmp_path) except Exception: pass profiles_dir = Path(__file__).parent / "configs" profiles_dir.mkdir(parents=True, exist_ok=True) @app.get("/") def index(): return JSONResponse({"ok": True, "service": "docling-api", "version": "v2"}) @app.get("/@vite/client") def vite_client_stub(): return JSONResponse({"ok": True}) @app.get("/refresh.js") def refresh_js_stub(): return Response(content="window.initClient=function(){},window.addRefresh=function(){};", media_type="application/javascript") RUNTIME_CONFIG: Dict[str, Dict[str, Optional[str]]] = { "minio": { "endpoint": None, "public": None, "access": None, "secret": None, "bucket": None, "secure": None, "prefix": None, "store_final": "true", "public_read": "true", }, "db": { "webhook_url": None, "token": None, }, } def _normalize_export(export: str) -> str: e = (export or "").strip().lower() allowed = {"markdown", "html", "json", "doctags"} if e not in allowed: raise HTTPException(status_code=422, detail="unsupported export") return e def _normalize_engine(engine: Optional[str]) -> Optional[str]: if engine is None: return None e = (engine or "").strip().lower() allowed = {"docling", "word2markdown", "pandoc", "custom"} if e not in allowed: raise HTTPException(status_code=422, detail="unsupported engine") return e def _fix_garbled_name(name: str) -> str: try: s = name t = s.strip() # If pure ASCII, no fix needed if all(ord(c) < 128 for c in t): return name # Try to reconstruct original bytes assuming CP437 (Zip default when UTF-8 flag not set) try: raw = s.encode("cp437", errors="strict") except UnicodeEncodeError: # Not CP437 mojibake, keep original return name encs = [ "gb18030", "gbk", "cp936", "utf-8", "big5", "cp950", "shift_jis", "cp932", "cp949", "euc-kr", "euc-jp", ] for e in encs: try: fixed = raw.decode(e) if fixed: return fixed except Exception: continue except Exception: pass return name def _safe_target(base: Path, name: str) -> Optional[Path]: try: n = name.replace("\\", "/").lstrip("/") parts = [p for p in n.split("/") if p and p not in {".", ".."}] tgt = base / "/".join(parts) rp = tgt.resolve() rb = base.resolve() try: rp.relative_to(rb) except Exception: return None return rp except Exception: return None def _zip_extract_safely(zf: object, dest: Path) -> None: try: for zi in zf.infolist(): # type: ignore try: name = str(getattr(zi, "filename", "")) flag = int(getattr(zi, "flag_bits", 0)) use = name if (flag & 0x800) == 0: use = _fix_garbled_name(name) target = _safe_target(dest, use) if target is None: continue if hasattr(zi, "is_dir") and zi.is_dir(): # type: ignore target.mkdir(parents=True, exist_ok=True) continue target.parent.mkdir(parents=True, exist_ok=True) with zf.open(zi, "r") as src: # type: ignore data = src.read() with open(target, "wb") as out: out.write(data) except Exception: continue except Exception: pass def _tar_extract_safely(tf: object, dest: Path) -> None: try: for m in tf.getmembers(): # type: ignore try: name = str(getattr(m, "name", "")) use = _fix_garbled_name(name) target = _safe_target(dest, use) if target is None: continue if getattr(m, "isdir", lambda: False)(): target.mkdir(parents=True, exist_ok=True) continue target.parent.mkdir(parents=True, exist_ok=True) f = tf.extractfile(m) # type: ignore if f is None: continue data = f.read() with open(target, "wb") as out: out.write(data) except Exception: continue except Exception: pass def _minio_head_bucket(client: object, bucket: str) -> bool: try: if hasattr(client, "bucket_exists"): try: return bool(client.bucket_exists(bucket)) # type: ignore except Exception: pass try: region = client._get_region(bucket) # type: ignore except Exception: region = "us-east-1" client._url_open(method="HEAD", region=region, bucket_name=bucket) # type: ignore return True except Exception: try: names = [getattr(b, "name", None) for b in client.list_buckets()] # type: ignore return bucket in set(n for n in names if n) except Exception: return False def _minio_create_bucket(client: object, bucket: str) -> bool: # Prefer SDK methods, fallback to low-level call try: if hasattr(client, "bucket_exists"): try: if client.bucket_exists(bucket): # type: ignore return True except Exception: pass if hasattr(client, "make_bucket"): try: client.make_bucket(bucket) # type: ignore return True except Exception: try: region = client._get_region(bucket) # type: ignore except Exception: region = "us-east-1" try: client.make_bucket(bucket, location=region) # type: ignore return True except Exception: pass try: try: region = client._get_region(bucket) # type: ignore except Exception: region = "us-east-1" client._url_open(method="PUT", region=region, bucket_name=bucket) # type: ignore return True except Exception as ce: if "BucketAlreadyOwnedByYou" in str(ce) or "BucketAlreadyExists" in str(ce): return True raise except Exception as e: raise e def _minio_client(endpoint: str, access: str, secret: str, secure: bool): if urllib3 is not None: try: http = urllib3.PoolManager(timeout=urllib3.Timeout(connect=3.0, read=20.0)) return Minio(endpoint=endpoint, access_key=access, secret_key=secret, secure=secure, http_client=http) # type: ignore except Exception: return Minio(endpoint=endpoint, access_key=access, secret_key=secret, secure=secure) # type: ignore return Minio(endpoint=endpoint, access_key=access, secret_key=secret, secure=secure) # type: ignore def _minio_time_hint(endpoint: str, secure: bool) -> Optional[str]: try: scheme = "https" if secure else "http" r = urlopen(f"{scheme}://{endpoint}", timeout=3) srv_date = r.headers.get("Date") if not srv_date: return None from email.utils import parsedate_to_datetime from datetime import datetime, timezone dt = parsedate_to_datetime(srv_date) now = datetime.now(timezone.utc) diff = abs((now - dt).total_seconds()) return f"服务器时间与本机相差约 {int(diff)} 秒" except Exception: return None def _db_notify(payload: Dict[str, object]): try: import requests # type: ignore except Exception: return url = (RUNTIME_CONFIG.get("db", {}).get("webhook_url") or "").strip() if not url: return token = (RUNTIME_CONFIG.get("db", {}).get("token") or "") headers = {"Content-Type": "application/json"} if token: headers["Authorization"] = f"Bearer {token}" try: requests.post(url, json=payload, headers=headers, timeout=5) except Exception: pass @app.post("/config/minio") async def set_minio_config( endpoint: str = Form(...), public: Optional[str] = Form(None), access: str = Form(...), secret: str = Form(...), bucket: str = Form(...), secure: Optional[str] = Form("false"), prefix: Optional[str] = Form(None), store_final: Optional[str] = Form("true"), public_read: Optional[str] = Form("true"), ): ep_raw = (endpoint or "").strip() ep_host = ep_raw try: from urllib.parse import urlsplit u = urlsplit(ep_raw) if u.scheme: ep_host = (u.netloc or ep_raw).split("/")[0] else: ep_host = ep_raw.split("/")[0] except Exception: ep_host = ep_raw.split("/")[0] # reject console port or console paths for endpoint try: if (":9001" in ep_host) or ("/browser" in ep_raw) or ("/minio" in ep_raw): return {"ok": False, "error": "请使用 MinIO API 端口 9000(而非 9001 控制台)"} except Exception: pass pub_val = public try: from urllib.parse import urlsplit pu = urlsplit((public or "").strip()) if (pu.netloc.endswith(":9001") or "/browser" in (public or "") or "/minio" in (public or "")): pub_val = None except Exception: if public and (":9001" in public or "/browser" in public or "/minio" in public): pub_val = None # ensure public has scheme try: if pub_val: from urllib.parse import urlsplit pu = urlsplit(pub_val.strip()) scheme = pu.scheme or ("https" if str(secure or "false").lower() in {"1","true","yes","on"} else "http") host = pu.netloc or pu.path.split("/")[0] pub_val = f"{scheme}://{host}" except Exception: try: if pub_val: host = pub_val.strip().split("/")[0] scheme = "https" if str(secure or "false").lower() in {"1","true","yes","on"} else "http" pub_val = f"{scheme}://{host}" except Exception: pass RUNTIME_CONFIG["minio"].update({ "endpoint": ep_host, "public": pub_val, "access": access, "secret": secret, "bucket": bucket, "secure": secure, "prefix": prefix, "store_final": store_final, "public_read": public_read, }) client, bkt, pub, _ = minio_current(RUNTIME_CONFIG) if client is None or not bkt or not pub: return {"ok": False, "error": "MinIO config invalid"} try: pr = str(public_read or "true").lower() in {"1","true","yes","on"} if pr: policy = { "Version": "2012-10-17", "Statement": [ {"Effect": "Allow", "Principal": "*", "Action": ["s3:GetBucketLocation", "s3:ListBucket"], "Resource": [f"arn:aws:s3:::{bkt}"]}, {"Effect": "Allow", "Principal": "*", "Action": ["s3:GetObject"], "Resource": [f"arn:aws:s3:::{bkt}/*"]}, ], } import json as _json client.set_bucket_policy(bucket_name=bkt, policy=_json.dumps(policy)) # type: ignore else: try: client.delete_bucket_policy(bkt) # type: ignore except Exception: pass except Exception: pass return {"ok": True} @app.post("/config/minio/test") async def test_minio_config( endpoint: str = Form(...), public: Optional[str] = Form(None), access: str = Form(...), secret: str = Form(...), bucket: str = Form(...), secure: Optional[str] = Form("false"), create_if_missing: Optional[str] = Form("true"), public_read: Optional[str] = Form("false"), ): if Minio is None: return {"ok": False, "connected": False, "bucket_exists": False, "error": "minio client not available"} try: sec = str(secure or "false").lower() in {"1","true","yes","on"} ep_raw = (endpoint or "").strip() ep_host = ep_raw try: from urllib.parse import urlsplit u = urlsplit(ep_raw) if u.scheme: ep_host = (u.netloc or ep_raw).split("/")[0] else: ep_host = ep_raw.split("/")[0] except Exception: ep_host = ep_raw.split("/")[0] if ":9001" in ep_host or "/browser" in ep_raw or "/minio" in ep_raw: return {"ok": False, "connected": False, "bucket_exists": False, "error": "请使用 MinIO API 端口 9000(而非 9001 控制台)"} client = _minio_client(endpoint=ep_host, access=access, secret=secret, secure=sec) # handshake fallback try: try: client.list_buckets() # type: ignore except Exception as e: if sec and ("SSL" in str(e) or "HTTPSConnectionPool" in str(e) or "SSLError" in str(e)): client = _minio_client(endpoint=ep_host, access=access, secret=secret, secure=False) sec = False except Exception: pass exists = False created = False exists = _minio_head_bucket(client, bucket) if not exists and str(create_if_missing or "true").lower() in {"1","true","yes","on"}: if _minio_create_bucket(client, bucket): exists = True created = True # 始终根据 public_read 应用/移除策略(即使桶已存在) try: import json as _json if str(public_read or "false").lower() in {"1","true","yes","on"}: policy = { "Version": "2012-10-17", "Statement": [ {"Effect": "Allow", "Principal": "*", "Action": ["s3:GetBucketLocation", "s3:ListBucket"], "Resource": [f"arn:aws:s3:::{bucket}"]}, {"Effect": "Allow", "Principal": "*", "Action": ["s3:GetObject"], "Resource": [f"arn:aws:s3:::{bucket}/*"]}, ], } client.set_bucket_policy(bucket_name=bucket, policy=_json.dumps(policy)) # type: ignore else: try: client.delete_bucket_policy(bucket) # type: ignore except Exception: pass except Exception: pass return {"ok": True, "connected": True, "bucket_exists": exists, "created": created, "hint": ("使用 HTTPS 访问 9000 端口可能失败,请确认启用 HTTPS 与证书配置匹配" if sec and (public or "").startswith("http://") else None)} except Exception as e: hint = None if "RequestTimeTooSkewed" in str(e): hint = _minio_time_hint(ep_host, sec) return {"ok": False, "connected": False, "bucket_exists": False, "error": str(e), "hint": hint} @app.get("/config/profile/list") async def list_profiles(): names: List[str] = [] try: for p in profiles_dir.rglob("*.json"): try: names.append(p.stem) except Exception: continue except Exception: pass return {"ok": True, "profiles": sorted(set(names))} @app.post("/config/profile/activate") async def activate_profile(name: str = Form(...)): target = None try: for p in profiles_dir.rglob("*.json"): if p.stem.lower() == (name or "").strip().lower(): target = p break if target is None: raise HTTPException(status_code=404, detail="profile not found") active_path = profiles_dir / "active.json" data = json.loads(target.read_text("utf-8")) # 应用并覆盖到运行时配置 try: minio_cfg = data.get("minio", {}) if isinstance(minio_cfg, dict) and minio_cfg: sanitized = dict(minio_cfg) try: ep = str(sanitized.get("endpoint") or "").strip() if ep and ":9001" in ep: h = ep.split("/")[0] if ":" in h: parts = h.split(":") sanitized["endpoint"] = f"{parts[0]}:9000" else: sanitized["endpoint"] = h except Exception: pass try: pub = str(sanitized.get("public") or "").strip() if pub and (":9001" in pub or "/browser" in pub or "/minio" in pub): host = pub.split("/")[0] sec = str(sanitized.get("secure") or RUNTIME_CONFIG.get("minio", {}).get("secure") or "false").lower() in {"1","true","yes","on"} scheme = "https" if sec else "http" if ":" in host: base_host = host.split(":")[0] sanitized["public"] = f"{scheme}://{base_host}:9000" else: sanitized["public"] = f"{scheme}://{host}:9000" except Exception: pass RUNTIME_CONFIG["minio"].update(sanitized) except Exception: pass try: db_cfg = data.get("db", {}) if isinstance(db_cfg, dict) and db_cfg: RUNTIME_CONFIG["db"].update(db_cfg) except Exception: pass # 写入 active.json 以便后续观察者检测到变更 active_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), "utf-8") return {"ok": True, "active": target.stem} except HTTPException: raise except Exception as e: raise HTTPException(status_code=400, detail=str(e)) @app.get("/system/time/check") def system_time_check( endpoint: Optional[str] = Query(None), public: Optional[str] = Query(None), secure: Optional[str] = Query(None), ): try: rc = RUNTIME_CONFIG.get("minio", {}) ep_raw = (endpoint or rc.get("endpoint") or "").strip() pub_raw = (public or rc.get("public") or "").strip() sec_flag = secure if secure is not None else (rc.get("secure") or "false") sec = str(sec_flag or "false").lower() in {"1","true","yes","on"} scheme = "https" if sec else "http" # 解析 host(优先 public,其次 endpoint) def _host(s: str) -> str: try: from urllib.parse import urlsplit u = urlsplit(s) return (u.netloc or s).split("/")[0] if u.scheme else s.split("/")[0] except Exception: return s.split("/")[0] base_host = _host(pub_raw or ep_raw) if not base_host: from datetime import datetime, timezone now = datetime.now(timezone.utc) return {"ok": True, "server_time": None, "local_time": now.isoformat(), "diff_sec": None, "hint": "未配置 MinIO 端点"} # 构造候选检测 URL(尽量使用 MinIO 健康端点以获取标准 Date 头) base = f"{scheme}://{base_host}" candidates = [ base, base + "/minio/health/live", base + "/minio/health/ready", base + "/minio/health/version", ] srv_date = None for url in candidates: try: req = Request(url, method="HEAD") r = urlopen(req, timeout=3) d = r.headers.get("Date") or r.headers.get("date") if d: srv_date = d break except Exception: try: r = urlopen(url, timeout=3) d = r.headers.get("Date") or r.headers.get("date") if d: srv_date = d break except Exception: pass # 如果按当前 scheme 获取失败,尝试切换 scheme 再试一次 if not srv_date: alt_scheme = "http" if scheme == "https" else "https" alt_base = f"{alt_scheme}://{base_host}" alt_candidates = [ alt_base, alt_base + "/minio/health/live", alt_base + "/minio/health/ready", alt_base + "/minio/health/version", ] for url in alt_candidates: try: req = Request(url, method="HEAD") r = urlopen(req, timeout=3) d = r.headers.get("Date") or r.headers.get("date") if d: srv_date = d break except Exception: try: r = urlopen(url, timeout=3) d = r.headers.get("Date") or r.headers.get("date") if d: srv_date = d break except Exception: pass from datetime import datetime, timezone now = datetime.now(timezone.utc) diff = None if srv_date: from email.utils import parsedate_to_datetime try: dt = parsedate_to_datetime(srv_date) diff = int(abs((now - dt).total_seconds())) except Exception: diff = None hint = _minio_time_hint(base_host, sec) return {"ok": True, "server_time": srv_date, "local_time": now.isoformat(), "diff_sec": diff, "hint": hint} except Exception as e: return {"ok": False, "error": str(e)} @app.post("/system/time/sync") async def system_time_sync(method: Optional[str] = Form("auto"), ntp_server: Optional[str] = Form(None)): cmds = [] servers = [s for s in [ntp_server, "time.apple.com", "pool.ntp.org"] if s] for srv in servers: if (method or "auto") in {"auto", "sntp"}: cmds.append(["sntp", "-sS", srv]) if (method or "auto") in {"auto", "ntpdate"}: cmds.append(["ntpdate", "-u", srv]) outputs = [] success = False for cmd in cmds: try: p = subprocess.run(cmd, capture_output=True, text=True, timeout=8) outputs.append({"cmd": " ".join(cmd), "code": p.returncode, "out": p.stdout, "err": p.stderr}) if p.returncode == 0: success = True break except Exception as e: outputs.append({"cmd": " ".join(cmd), "code": -1, "out": "", "err": str(e)}) if not success and sys.platform == "darwin": elev_cmds = [] for srv in servers: elev_cmds.append(["osascript", "-e", f'do shell script "sntp -sS {srv}" with administrator privileges']) elev_cmds.append(["osascript", "-e", f'do shell script "ntpdate -u {srv}" with administrator privileges']) elev_cmds.append(["osascript", "-e", f'do shell script "/usr/sbin/systemsetup -setnetworktimeserver {srv}" with administrator privileges']) elev_cmds.append(["osascript", "-e", 'do shell script "/usr/sbin/systemsetup -setusingnetworktime on" with administrator privileges']) for cmd in elev_cmds: try: p = subprocess.run(cmd, capture_output=True, text=True, timeout=12) outputs.append({"cmd": " ".join(cmd), "code": p.returncode, "out": p.stdout, "err": p.stderr}) if p.returncode == 0: success = True break except Exception as e: outputs.append({"cmd": " ".join(cmd), "code": -1, "out": "", "err": str(e)}) chk = system_time_check() return {"ok": success, "result": outputs, "check": chk} @app.get("/api/system/time/check") def system_time_check_api( endpoint: Optional[str] = Query(None), public: Optional[str] = Query(None), secure: Optional[str] = Query(None), ): return system_time_check(endpoint=endpoint, public=public, secure=secure) @app.post("/api/system/time/sync") async def system_time_sync_api(method: Optional[str] = Form("auto"), ntp_server: Optional[str] = Form(None)): return await system_time_sync(method=method, ntp_server=ntp_server) async def _auto_time_calibration(): try: await asyncio.sleep(1.0) chk = system_time_check() try: diff = int((chk or {}).get("diff_sec") or 0) except Exception: diff = 0 if diff and diff > 120: try: await system_time_sync(method="auto", ntp_server=None) except Exception: pass except Exception: pass @app.get("/config/minio/buckets") def list_minio_buckets( endpoint: str, access: str, secret: str, secure: Optional[str] = "false", ): if Minio is None: return {"ok": False, "error": "minio client not available", "buckets": []} try: sec = str(secure or "false").lower() in {"1","true","yes","on"} client = _minio_client(endpoint=endpoint, access=access, secret=secret, secure=sec) names = [b.name for b in client.list_buckets()] return {"ok": True, "buckets": names} except Exception as e: return {"ok": False, "error": str(e), "buckets": []} @app.post("/config/minio/create-bucket") async def create_minio_bucket( endpoint: str = Form(...), access: str = Form(...), secret: str = Form(...), bucket: str = Form(...), secure: Optional[str] = Form("false"), public_read: Optional[str] = Form("false"), ): if Minio is None: return {"ok": False, "error": "minio client not available"} try: sec = str(secure or "false").lower() in {"1","true","yes","on"} ep_raw = (endpoint or "").strip() ep_host = ep_raw try: from urllib.parse import urlsplit u = urlsplit(ep_raw) if u.scheme: ep_host = (u.netloc or ep_raw).split("/")[0] else: ep_host = ep_raw.split("/")[0] except Exception: ep_host = ep_raw.split("/")[0] if ":9001" in ep_host or "/browser" in ep_raw or "/minio" in ep_raw: return {"ok": False, "error": "请使用 MinIO API 端口 9000(而非 9001 控制台)"} client = _minio_client(endpoint=ep_host, access=access, secret=secret, secure=sec) try: try: client.list_buckets() # type: ignore except Exception as e: if sec and ("SSL" in str(e) or "HTTPSConnectionPool" in str(e) or "SSLError" in str(e)): client = _minio_client(endpoint=ep_host, access=access, secret=secret, secure=False) sec = False except Exception: pass _minio_create_bucket(client, bucket) try: pr = str(public_read or "false").lower() in {"1","true","yes","on"} if pr: policy = { "Version": "2012-10-17", "Statement": [ {"Effect": "Allow", "Principal": {"AWS": ["*"]}, "Action": ["s3:GetBucketLocation", "s3:ListBucket"], "Resource": [f"arn:aws:s3:::{bucket}"]}, {"Effect": "Allow", "Principal": {"AWS": ["*"]}, "Action": ["s3:GetObject"], "Resource": [f"arn:aws:s3:::{bucket}/*"]}, ], } import json as _json client.set_bucket_policy(bucket, _json.dumps(policy)) # type: ignore except Exception: pass return {"ok": True, "bucket_exists": True} except Exception as e: hint = None if "RequestTimeTooSkewed" in str(e): hint = _minio_time_hint(ep_host, sec) return {"ok": False, "error": str(e), "hint": hint} @app.post("/minio/presign", response_model=MinioPresignResponse) async def minio_presign( url: Optional[str] = Form(None), object_name: Optional[str] = Form(None), bucket: Optional[str] = Form(None), expires: Optional[int] = Form(3600), ): client, cfg_bucket, public_base, _ = minio_current(RUNTIME_CONFIG) if client is None: raise HTTPException(status_code=400, detail="MinIO 未配置") obj = (object_name or "").strip() bkt = (bucket or cfg_bucket or "").strip() if (not obj) and url: try: from urllib.parse import urlsplit, unquote u = urlsplit((url or "").strip()) path = u.path or "" parts = [p for p in path.split("/") if p] if parts: if not bkt: bkt = parts[0] obj = "/".join(parts[1:]) obj = unquote(obj) except Exception: obj = obj if not bkt or not obj: raise HTTPException(status_code=400, detail="bucket 与 object_name/URL 不能为空") exp = int(expires or 3600) ps = presigned_read(client, bkt, obj, exp) if client is not None else None pub_url = None try: from urllib.parse import quote as _quote if public_base: pub_url = f"{public_base}/{bkt}/{_quote(obj, safe='/')}" except Exception: pub_url = None return MinioPresignResponse( bucket=bkt, object=obj, minio_url=pub_url, minio_presigned_url=ps, expires=exp, ) @app.get("/minio/object") def minio_object(bucket: Optional[str] = None, object: str = ""): client, cfg_bucket, public_base, _ = minio_current(RUNTIME_CONFIG) if client is None: raise HTTPException(status_code=400, detail="MinIO 未配置") bkt = (bucket or cfg_bucket or "").strip() obj_in = (object or "").strip() try: from urllib.parse import unquote as _unquote obj = _unquote(obj_in) except Exception: obj = obj_in if not bkt or not obj: raise HTTPException(status_code=400, detail="bucket 与 object 不能为空") ct = None try: try: st = client.stat_object(bucket_name=bkt, object_name=obj) # type: ignore except TypeError: st = client.stat_object(bkt, obj) # type: ignore ct = getattr(st, "content_type", None) except Exception: ct = None data = b"" try: try: resp = client.get_object(bucket_name=bkt, object_name=obj) # type: ignore except TypeError: resp = client.get_object(bkt, obj) # type: ignore try: data = resp.read() # type: ignore finally: try: resp.close() # type: ignore except Exception: pass except Exception as e: raise HTTPException(status_code=403, detail=str(e)) media = ct or detect_mime(obj, data) headers = {"Content-Disposition": f"inline; filename*=UTF-8''" + quote(Path(obj).name)} return Response(content=data, media_type=media, headers=headers) @app.post("/config/db") async def set_db_config(webhook_url: Optional[str] = Form(None), token: Optional[str] = Form(None)): RUNTIME_CONFIG["db"].update({"webhook_url": webhook_url, "token": token}) return {"ok": True} @app.get("/config") def get_config_snapshot(): safe = { "minio": { k: ("***" if k == "secret" and v else v) for k, v in RUNTIME_CONFIG.get("minio", {}).items() }, "db": RUNTIME_CONFIG.get("db", {}), } return safe @app.get("/config/profiles") def list_profiles(): names = [] try: for p in profiles_dir.glob("*.json"): names.append(p.stem) except Exception: names = [] return {"ok": True, "profiles": sorted(names)} @app.post("/config/save_profile") async def save_profile(name: str = Form(...)): if not name.strip(): raise HTTPException(status_code=400, detail="name required") data = { "minio": RUNTIME_CONFIG.get("minio", {}), "db": RUNTIME_CONFIG.get("db", {}), } import json as _json path = profiles_dir / f"{sanitize_filename(name)}.json" try: path.write_text(_json.dumps(data, ensure_ascii=False, indent=2), "utf-8") return {"ok": True, "name": path.stem} except Exception as e: raise HTTPException(status_code=400, detail=str(e)) @app.get("/config/load_profile") def load_profile(name: str): import json as _json path = profiles_dir / f"{sanitize_filename(name)}.json" if not path.exists(): raise HTTPException(status_code=404, detail="profile not found") try: data = _json.loads(path.read_text("utf-8")) m = data.get("minio", {}) d = data.get("db", {}) RUNTIME_CONFIG["minio"].update(m) RUNTIME_CONFIG["db"].update(d) client, bkt, pub, _ = minio_current(RUNTIME_CONFIG) if client is None or not bkt or not pub: raise HTTPException(status_code=400, detail="MinIO config invalid") return {"ok": True, "config": data} except HTTPException: raise except Exception as e: raise HTTPException(status_code=400, detail=str(e)) # ────────────────────────────────────────────────────────────────────────────── # Auto-load DB config from app/configs without restart or page refresh # ────────────────────────────────────────────────────────────────────────────── def _choose_default_config_file() -> Optional[Path]: try: candidates: List[Path] = [] for p in profiles_dir.rglob("*.json"): candidates.append(p) if not candidates: return None by_name = {x.stem.lower(): x for x in candidates} for prefer in ("active", "default", "test"): if prefer in by_name: return by_name[prefer] return sorted(candidates, key=lambda x: x.stat().st_mtime, reverse=True)[0] except Exception: return None def _apply_configs_from_file(path: Path) -> None: try: import json as _json data = _json.loads(path.read_text("utf-8")) db_cfg = data.get("db", {}) if isinstance(db_cfg, dict) and db_cfg: RUNTIME_CONFIG["db"].update(db_cfg) minio_cfg = data.get("minio", {}) if isinstance(minio_cfg, dict) and minio_cfg: sanitized = dict(minio_cfg) try: ep = str(sanitized.get("endpoint") or "").strip() if ep and ":9001" in ep: h = ep.split("/")[0] if ":" in h: parts = h.split(":") sanitized["endpoint"] = f"{parts[0]}:9000" else: sanitized["endpoint"] = h except Exception: pass try: pub = str(sanitized.get("public") or "").strip() if pub and (":9001" in pub or "/browser" in pub or "/minio" in pub): host = pub.split("/")[0] sec = str(sanitized.get("secure") or RUNTIME_CONFIG.get("minio", {}).get("secure") or "false").lower() in {"1","true","yes","on"} scheme = "https" if sec else "http" if ":" in host: base_host = host.split(":")[0] sanitized["public"] = f"{scheme}://{base_host}:9000" else: sanitized["public"] = f"{scheme}://{host}:9000" except Exception: pass for k, v in sanitized.items(): try: cur = RUNTIME_CONFIG["minio"].get(k) if cur in (None, ""): RUNTIME_CONFIG["minio"][k] = v except Exception: RUNTIME_CONFIG["minio"][k] = v except Exception: pass async def _watch_db_config_changes(interval_sec: float = 3.0) -> None: last_path: Optional[Path] = _choose_default_config_file() last_mtime: float = (last_path.stat().st_mtime if last_path and last_path.exists() else 0.0) # Apply once at startup if last_path: _apply_configs_from_file(last_path) while True: try: cur = _choose_default_config_file() if cur and cur.exists(): mt = cur.stat().st_mtime if cur != last_path or mt > last_mtime: _apply_configs_from_file(cur) last_path = cur last_mtime = mt except Exception: pass await asyncio.sleep(interval_sec) @app.on_event("startup") async def _startup_autoload_configs(): try: asyncio.create_task(_watch_db_config_changes(interval_sec=3.0)) except Exception: pass try: asyncio.create_task(_auto_time_calibration()) except Exception: pass @app.post("/md/convert", response_model=ConvertResponse) async def md_convert( md_file: Optional[UploadFile] = File(None), markdown_text: Optional[str] = Form(None), markdown_url: Optional[str] = Form(None), target: str = Form("docx"), save: Optional[bool] = Form(False), filename: Optional[str] = Form(None), css_name: Optional[str] = Form(None), css_text: Optional[str] = Form(None), toc: Optional[bool] = Form(True), header_text: Optional[str] = Form(None), footer_text: Optional[str] = Form(None), logo_url: Optional[str] = Form(None), logo_file: Optional[UploadFile] = File(None), cover_url: Optional[str] = Form(None), cover_file: Optional[UploadFile] = File(None), product_name: Optional[str] = Form(None), document_name: Optional[str] = Form(None), product_version: Optional[str] = Form(None), document_version: Optional[str] = Form(None), copyright_text: Optional[str] = Form(None), ): """ @function md_convert @description Advanced Markdown conversion endpoint supporting custom styling, logos, and metadata @param md_file Uploaded Markdown file (optional) @param markdown_text Raw Markdown text (optional) @param markdown_url URL to Markdown file (optional) @param target Output format (docx/pdf) @param save Save to MinIO @param filename Output filename @param css_name Predefined CSS profile name @param css_text Custom CSS content @param toc Include Table of Contents @param header_text Custom header text @param footer_text Custom footer text @param logo_url URL for logo image @param logo_file Uploaded logo file @param cover_url URL for cover image @param cover_file Uploaded cover file @param product_name Product name for cover @param document_name Document name for cover @param product_version Product version for cover @param document_version Document version for cover @param copyright_text Copyright text @return File download or JSON response """ logging.info(f"md_convert start target={target} save={save} filename={filename}") provided = 0 if md_file is not None: provided += 1 if markdown_text: provided += 1 if markdown_url: provided += 1 if provided != 1: raise HTTPException(status_code=400, detail="provide exactly one of md_file, markdown_text, markdown_url") if target.lower() not in {"docx", "pdf"}: raise HTTPException(status_code=400, detail="target must be docx or pdf") mappings: List[Dict[str, str]] = [] base_dir = Path(".").resolve() if md_file is not None: content = (await md_file.read()).decode("utf-8", errors="ignore") base_dir = Path(md_file.filename or ".").resolve().parent if md_file and md_file.filename else Path(".") base = sanitize_filename(filename) if filename else sanitize_filename(os.path.splitext(md_file.filename or "document")[0]) elif markdown_url: src = markdown_url.strip() try: if src.lower().startswith("http"): already_escaped = "%" in src safe = src if already_escaped else _safe_http_url(src) try: with urlopen(safe, timeout=10) as r: raw = r.read() try: logging.info(f"md_convert fetched markdown_url len={len(raw)} url={safe}") except Exception: pass except UnicodeEncodeError: alt = quote(src, safe=':/?&=%#') with urlopen(_safe_http_url(alt), timeout=10) as r: raw = r.read() try: logging.info(f"md_convert fetched markdown_url(len={len(raw)}) with alt url") except Exception: pass except HTTPError as err: raise HTTPException(status_code=400, detail={"error": "fetch_failed", "status": err.code, "url": getattr(err, 'url', src)}) except URLError as err: raise HTTPException(status_code=400, detail={"error": "fetch_failed", "status": None, "url": src, "reason": str(getattr(err, 'reason', err))}) try: content = raw.decode("utf-8") except Exception: content = raw.decode("latin-1", errors="ignore") else: with open(src, "r", encoding="utf-8", errors="ignore") as f: content = f.read() base_dir = Path(src).resolve().parent except HTTPException: raise except Exception as e: raise HTTPException(status_code=400, detail={"error": "fetch_failed", "url": src, "message": str(e)}) base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(src, None)) else: content = markdown_text or "" base = sanitize_filename(filename) if filename else "document" # Rewrite local assets to MinIO URLs if configured client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG) if client is not None and bucket and public_base and base_dir: try: content, mappings = _rewrite_md_assets_to_minio(content, base_dir, client, bucket, public_base, prefix) except Exception: pass # Prepare common assets (logo, cover) for both DOCX and PDF logo_src = None try: client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG) if logo_file is not None and getattr(logo_file, "filename", None): lb = await logo_file.read() mime = detect_image_mime(logo_file.filename, lb) safe_logo = sanitize_filename(os.path.splitext(logo_file.filename or "logo")[0]) extl = "." + (logo_file.filename.rsplit(".", 1)[-1].lower() if "." in (logo_file.filename or "") else "png") obj_logo = join_prefix(prefix, f"uploads/logo/{int(time.time())}-{safe_logo}{extl}") bio = io.BytesIO(lb) if client is not None and bucket and public_base: client.put_object(bucket_name=bucket, object_name=obj_logo, data=bio, length=len(lb), content_type=mime) # type: ignore try: from urllib.parse import quote as _quote enc = _quote(obj_logo, safe="/") exp = int(timedelta(hours=12).total_seconds()) ps = presigned_read(client, bucket, obj_logo, exp) if client is not None else None logo_src = ps or f"{public_base}/{bucket}/{enc}" except Exception: logo_src = f"{public_base}/{bucket}/{obj_logo}" try: if not save: import base64 as _b64 logo_src = f"data:{mime};base64," + _b64.b64encode(lb).decode("ascii") except Exception: pass elif logo_url: u = logo_url.strip() if u.lower().startswith("http://") or u.lower().startswith("https://"): logo_src = u elif u.startswith("/"): p = Path(u) try: lb = p.read_bytes() mime = detect_image_mime(p.name, lb) obj_logo = join_prefix(prefix, f"uploads/logo/{int(time.time())}-{sanitize_filename(p.stem)}{p.suffix or '.png'}") bio = io.BytesIO(lb) if client is not None and bucket and public_base: client.put_object(bucket_name=bucket, object_name=obj_logo, data=bio, length=len(lb), content_type=mime) # type: ignore try: from urllib.parse import quote as _quote enc = _quote(obj_logo, safe="/") exp = int(timedelta(hours=12).total_seconds()) ps = presigned_read(client, bucket, obj_logo, exp) if client is not None else None logo_src = ps or f"{public_base}/{bucket}/{enc}" except Exception: logo_src = f"{public_base}/{bucket}/{obj_logo}" try: if not save: import base64 as _b64 logo_src = f"data:{mime};base64," + _b64.b64encode(lb).decode("ascii") except Exception: pass except Exception: logo_src = p.resolve().as_uri() else: p = Path(u) try: lb = p.read_bytes() mime = detect_image_mime(p.name, lb) obj_logo = join_prefix(prefix, f"uploads/logo/{int(time.time())}-{sanitize_filename(p.stem)}{p.suffix or '.png'}") bio = io.BytesIO(lb) if client is not None and bucket and public_base: client.put_object(bucket_name=bucket, object_name=obj_logo, data=bio, length=len(lb), content_type=mime) # type: ignore try: from urllib.parse import quote as _quote enc = _quote(obj_logo, safe="/") exp = int(timedelta(hours=12).total_seconds()) ps = presigned_read(client, bucket, obj_logo, exp) if client is not None else None logo_src = ps or f"{public_base}/{bucket}/{enc}" except Exception: logo_src = f"{public_base}/{bucket}/{obj_logo}" try: if not save: import base64 as _b64 logo_src = f"data:{mime};base64," + _b64.b64encode(lb).decode("ascii") except Exception: pass except Exception: logo_src = p.resolve().as_uri() except Exception: logo_src = None cover_src = None try: limit = 2 * 1024 * 1024 if cover_file is not None and getattr(cover_file, "filename", None): cb = await cover_file.read() if len(cb) > limit: raise HTTPException(status_code=400, detail="cover image exceeds 2MB limit") client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG) mime = detect_image_mime(cover_file.filename, cb) safe_cov = sanitize_filename(os.path.splitext(cover_file.filename or "cover")[0]) extc = "." + (cover_file.filename.rsplit(".", 1)[-1].lower() if "." in (cover_file.filename or "") else "png") obj_cov = join_prefix(prefix, f"uploads/cover/{int(time.time())}-{safe_cov}{extc}") bio = io.BytesIO(cb) if client is not None and bucket and public_base: client.put_object(bucket_name=bucket, object_name=obj_cov, data=bio, length=len(cb), content_type=mime) # type: ignore try: from urllib.parse import quote as _quote enc = _quote(obj_cov, safe="/") exp = int(timedelta(hours=12).total_seconds()) ps = presigned_read(client, bucket, obj_cov, exp) if client is not None else None cover_src = ps or f"{public_base}/{bucket}/{enc}" except Exception: cover_src = f"{public_base}/{bucket}/{obj_cov}" try: if not save: import base64 as _b64 cover_src = f"data:{mime};base64," + _b64.b64encode(cb).decode("ascii") except Exception: pass elif cover_url: cu = cover_url.strip() if cu.lower().startswith("http://") or cu.lower().startswith("https://"): cover_src = cu else: p = Path(cu) rb = p.read_bytes() if len(rb) > limit: raise HTTPException(status_code=400, detail="cover image exceeds 2MB limit") client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG) mime = detect_image_mime(cu, rb) obj_cov = join_prefix(prefix, f"uploads/cover/{int(time.time())}-{sanitize_filename(p.stem)}{p.suffix or '.png'}") bio = io.BytesIO(rb) if client is not None and bucket and public_base: client.put_object(bucket_name=bucket, object_name=obj_cov, data=bio, length=len(rb), content_type=mime) # type: ignore try: from urllib.parse import quote as _quote enc = _quote(obj_cov, safe="/") exp = int(timedelta(hours=12).total_seconds()) ps = presigned_read(client, bucket, obj_cov, exp) if client is not None else None cover_src = ps or f"{public_base}/{bucket}/{enc}" except Exception: cover_src = f"{public_base}/{bucket}/{obj_cov}" try: if not save: import base64 as _b64 cover_src = f"data:{mime};base64," + _b64.b64encode(rb).decode("ascii") except Exception: pass except HTTPException: raise except Exception: cover_src = None logging.info(f"md_convert assets prepared logo_src={bool(logo_src)} cover_src={bool(cover_src)} css_name={css_name} css_text_len={(len(css_text) if css_text else 0)}") if target.lower() == "docx": data = md_to_docx_bytes( content, toc=bool(toc), header_text=header_text, footer_text=footer_text, logo_url=logo_src or logo_url, copyright_text=copyright_text, filename_text=base, cover_src=cover_src, product_name=product_name, document_name=document_name, product_version=product_version, document_version=document_version, ) media = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ext = ".docx" # Upload final docx to MinIO client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG) minio_url = None minio_presigned_url = None try: rc_store_final = str(RUNTIME_CONFIG.get("minio", {}).get("store_final") or "true").lower() in {"1","true","yes","on"} if client is not None and bucket and public_base and rc_store_final: out_name = f"{base}{ext}" obj = f"{(prefix or '').strip('/')}/converted/{out_name}".lstrip("/") bio = io.BytesIO(data) ct = media or "application/octet-stream" client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(data), content_type=ct) # type: ignore try: from urllib.parse import quote as _quote minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}" except Exception: minio_url = f"{public_base}/{bucket}/{obj}" try: exp = int(timedelta(hours=12).total_seconds()) minio_presigned_url = presigned_read(client, bucket, obj, exp) if client is not None else None except Exception: minio_presigned_url = None except Exception: minio_url = None logging.info(f"md_convert done docx name={base}{ext} size={len(data)}") _db_notify({ "type": "md_convert", "base": base, "target": target.lower(), "local_url": None, "minio_url": minio_url, "minio_presigned_url": minio_presigned_url, "mappings": mappings, "time": int(time.time()) }) return ConvertResponse( minio_url=minio_url, minio_presigned_url=minio_presigned_url, name=f"{base}{ext}", media_type=media, ) else: use_css_name = css_name if css_name else ("default" if not css_text else None) data = md_to_pdf_bytes_with_renderer( content, "weasyprint", css_name=use_css_name, css_text=css_text, toc=bool(toc), header_text=header_text, footer_text=footer_text, logo_url=logo_src or logo_url, copyright_text=copyright_text, filename_text=base, cover_src=cover_src, product_name=product_name, document_name=document_name, product_version=product_version, document_version=document_version, ) media = "application/pdf" ext = ".pdf" minio_url = None minio_presigned_url = None try: rc_store_final = str(RUNTIME_CONFIG.get("minio", {}).get("store_final") or "true").lower() in {"1","true","yes","on"} if client is not None and bucket and public_base and rc_store_final: out_name = f"{base}{ext}" obj = f"{(prefix or '').strip('/')}/converted/{out_name}".lstrip("/") bio = io.BytesIO(data) ct = media or "application/octet-stream" try: if ct.startswith("text/") and "charset" not in ct.lower(): ct = ct + "; charset=utf-8" except Exception: pass client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(data), content_type=ct) # type: ignore try: from urllib.parse import quote as _quote minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}" except Exception: minio_url = f"{public_base}/{bucket}/{obj}" try: exp = int(timedelta(hours=12).total_seconds()) minio_presigned_url = presigned_read(client, bucket, obj, exp) if client is not None else None except Exception: minio_presigned_url = None except Exception: minio_url = None logging.info(f"md_convert done pdf name={base}{ext} size={len(data)}") _db_notify({ "type": "md_convert", "base": base, "target": target.lower(), "local_url": None, "minio_url": minio_url, "minio_presigned_url": minio_presigned_url, "mappings": mappings, "time": int(time.time()) }) return ConvertResponse( minio_url=minio_url, minio_presigned_url=minio_presigned_url, name=f"{base}{ext}", media_type=media, ) @app.get("/config/linkmap") def get_linkmap(): return load_linkmap() @app.post("/config/linkmap") async def set_linkmap(mapping: dict): try: save_linkmap(mapping) return {"ok": True} except Exception as e: raise HTTPException(status_code=400, detail=str(e)) def detect_image_mime(filename: Optional[str], data: bytes) -> str: ext = (os.path.splitext(filename or "")[1] or "").lower() if ext in {".png"}: return "image/png" if ext in {".jpg", ".jpeg"}: return "image/jpeg" if ext in {".svg"}: return "image/svg+xml" if ext in {".webp"}: return "image/webp" if data.startswith(b"\x89PNG\r\n\x1a\n"): return "image/png" if data.startswith(b"\xff\xd8\xff"): return "image/jpeg" if len(data) >= 12 and data[:4] == b"RIFF" and data[8:12] == b"WEBP": return "image/webp" try: head = data[:512].decode("utf-8", errors="ignore") if " str: ext = (os.path.splitext(filename or "")[1] or "").lower() if ext in {".png", ".jpg", ".jpeg", ".svg", ".webp"}: return detect_image_mime(filename, data) sig_png = data.startswith(b"\x89PNG\r\n\x1a\n") sig_jpg = data.startswith(b"\xff\xd8\xff") sig_webp = len(data) >= 12 and data[:4] == b"RIFF" and data[8:12] == b"WEBP" if sig_png or sig_jpg or sig_webp: return detect_image_mime(filename, data) guessed, _ = mimetypes.guess_type(filename or "") if guessed: return guessed return "application/octet-stream" @app.post("/proxy/download") async def proxy_download(url: str = Form(...)): u = (url or "").strip() if not u: raise HTTPException(status_code=400, detail="url required") try: data: bytes ct: str name: str if u.lower().startswith("http://") or u.lower().startswith("https://"): already_escaped = "%" in u safe = u if already_escaped else _safe_http_url(u) with urlopen(safe, timeout=15) as r: data = r.read() ct = r.headers.get("Content-Type") or detect_mime(None, data) from urllib.parse import urlparse, unquote import os as _os parsed = urlparse(u) path = unquote(parsed.path or "") last = (_os.path.basename(path) or "download").split("?")[0] if "." in last: name = last else: import mimetypes as _m ext = _m.guess_extension((ct or "").split(";")[0].strip()) or ".md" name = last + ext else: p = Path(u) if not p.exists() or not p.is_file(): raise HTTPException(status_code=404, detail="local path not found") data = p.read_bytes() ct = detect_mime(p.name, data) name = p.name disp = f"attachment; filename=\"{name}\"; filename*=UTF-8''" + quote(name) headers = {"Content-Disposition": disp} return Response(content=data, media_type=ct, headers=headers) except HTTPError as err: raise HTTPException(status_code=err.code, detail=f"download failed: {err}") except URLError as err: raise HTTPException(status_code=400, detail=f"download failed: {err}") except HTTPException: raise except Exception as e: raise HTTPException(status_code=400, detail=str(e)) def _minio_from_env() -> Tuple[Optional[object], Optional[str], Optional[str], str]: endpoint = os.environ.get("MINIO_ENDPOINT") access = os.environ.get("MINIO_ACCESS_KEY") secret = os.environ.get("MINIO_SECRET_KEY") bucket = os.environ.get("MINIO_BUCKET") secure = str(os.environ.get("MINIO_SECURE", "false")).lower() in {"1","true","yes","on"} public_base = os.environ.get("MINIO_PUBLIC_ENDPOINT") or (f"https://{endpoint}" if secure else f"http://{endpoint}" if endpoint else None) if Minio is None or not endpoint or not access or not secret or not bucket or not public_base: return None, None, None, "" client = Minio(endpoint, access_key=access, secret_key=secret, secure=secure) try: _minio_create_bucket(client, bucket) except Exception: pass return client, bucket, public_base, os.environ.get("MINIO_PREFIX", "") def _export_ext(export: str) -> str: e = (export or "").lower() if e == "markdown": return ".md" if e == "html": return ".html" if e in {"json", "doctags"}: return ".json" return ".txt" def _media_type(export: str) -> str: e = (export or "").lower() if e == "markdown": return "text/markdown; charset=utf-8" if e == "html": return "text/html; charset=utf-8" if e in {"json", "doctags"}: return "application/json" return "text/plain; charset=utf-8" def _rewrite_md_assets_to_minio(text: str, base_dir: Path, client: object, bucket: str, public_base: str, prefix: str, search_root: Optional[Path] = None) -> Tuple[str, List[Dict[str, str]]]: mappings: List[Dict[str, str]] = [] def _abs_key(p: Path) -> str: k = p.resolve().as_posix().lstrip("/") return k.replace(":", "") def _upload_data_uri(uri: str) -> Optional[str]: try: import base64, hashlib head, _, b64 = uri.partition(",") if not b64: return None b = base64.b64decode(b64, validate=False) mime = "" try: low = head.lower() pos = low.find("data:") if pos != -1: rest = head[pos+5:] semi = rest.find(";") mime = rest[:semi] if semi != -1 else rest except Exception: mime = "" if not mime: mime = detect_image_mime(None, b) ext = ".png" if mime.lower() in {"image/jpeg", "image/jpg"}: ext = ".jpg" elif mime.lower() == "image/webp": ext = ".webp" elif mime.lower() == "image/svg+xml": ext = ".svg" elif mime.lower() == "image/gif": ext = ".gif" h = hashlib.sha256(b).hexdigest()[:16] obj = join_prefix(prefix, f"embed/{h}{ext}") bio = io.BytesIO(b) client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(b), content_type=mime or detect_image_mime(None, b)) # type: ignore try: from urllib.parse import quote as _quote return f"{public_base}/{bucket}/{_quote(obj, safe='/')}" except Exception: return f"{public_base}/{bucket}/{obj}" except Exception: return None def _upload(path: Path) -> Optional[str]: try: data = path.read_bytes() mime = detect_mime(path.name, data) obj = join_prefix(prefix, f"abs/{_abs_key(path)}") bio = io.BytesIO(data) size = len(data) client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=size, content_type=mime) # type: ignore try: from urllib.parse import quote as _quote return f"{public_base}/{bucket}/{_quote(obj, safe='/')}" except Exception: return f"{public_base}/{bucket}/{obj}" except Exception: return None def _resolve_path(pure: str) -> Optional[Path]: q = pure.replace("\\", "/") if q.startswith("/"): try: rel = q.lstrip("/") base = (search_root or base_dir) p0 = (base / rel).resolve() except Exception: p0 = (search_root or base_dir) / q.lstrip("/") if p0.exists(): return p0 try: p = (base_dir / q).resolve() except Exception: p = (base_dir / q) if p.exists(): return p try: name = Path(q).name search = (search_root or base_dir) for hit in search.rglob(name): if hit.exists(): return hit except Exception: pass return None def _replace_md(m: re.Match) -> str: full = m.group(0) urlpart = m.group(1).strip() if urlpart.startswith("data:"): new = _upload_data_uri(urlpart) if new: mappings.append({"from": "data_uri", "to": new, "ok": True, "type": "md_image_data"}) return full.replace(urlpart, new) mappings.append({"from": "data_uri", "to": None, "ok": False, "type": "md_image_data"}) return full if urlpart.startswith("http://") or urlpart.startswith("https://"): return full s = urlpart pure = s tail = "" if s.startswith("<"): gt = s.find(">") if gt != -1: pure = s[1:gt].strip() tail = s[gt+1:] else: dq = s.find('"') sq = s.find("'") qpos = -1 if dq != -1 and sq != -1: qpos = dq if dq < sq else sq elif dq != -1: qpos = dq elif sq != -1: qpos = sq if qpos != -1: pure = s[:qpos].rstrip() tail = s[qpos:] p = _resolve_path(pure) if not p or not p.exists(): mappings.append({"from": pure, "to": None, "ok": False, "type": "md_link"}) return full new = _upload(p) if not new: mappings.append({"from": pure, "to": None, "ok": False, "type": "md_link"}) return full mappings.append({"from": pure, "to": new, "ok": True, "type": "md_link"}) return full.replace(urlpart, f"{new}{tail}") text = re.sub(r"!\[[^\]]*\]\(([^)]+)\)", _replace_md, text) def _replace_mdlink(m: re.Match) -> str: full = m.group(0) urlpart = m.group(1).strip() if urlpart.startswith("http://") or urlpart.startswith("https://") or urlpart.startswith("data:"): return full s = urlpart pure = s tail = "" if s.startswith("<"): gt = s.find(">") if gt != -1: pure = s[1:gt].strip() tail = s[gt+1:] else: dq = s.find('"') sq = s.find("'") qpos = -1 if dq != -1 and sq != -1: qpos = dq if dq < sq else sq elif dq != -1: qpos = dq elif sq != -1: qpos = sq if qpos != -1: pure = s[:qpos].rstrip() tail = s[qpos:] p = _resolve_path(pure) if not p or not p.exists(): mappings.append({"from": pure, "to": None, "ok": False, "type": "md_link"}) return full new = _upload(p) if not new: mappings.append({"from": pure, "to": None, "ok": False, "type": "md_link"}) return full mappings.append({"from": pure, "to": new, "ok": True, "type": "md_link"}) return full.replace(urlpart, f"{new}{tail}") text = re.sub(r"(? str: src = m.group(1).strip() if src.startswith("data:"): new = _upload_data_uri(src) if new: mappings.append({"from": "data_uri", "to": new, "ok": True, "type": "html_img_data"}) return m.group(0).replace(src, new) mappings.append({"from": "data_uri", "to": None, "ok": False, "type": "html_img_data"}) return m.group(0) if src.startswith("http://") or src.startswith("https://"): return m.group(0) pure = src p = _resolve_path(pure) if not p or not p.exists(): mappings.append({"from": pure, "to": None, "ok": False, "type": "html_img"}) return m.group(0) new = _upload(p) if not new: mappings.append({"from": pure, "to": None, "ok": False, "type": "html_img"}) return m.group(0) mappings.append({"from": pure, "to": new, "ok": True, "type": "html_img"}) return m.group(0).replace(src, new) text = re.sub(r"]+src=\"([^\"]+)\"", _replace_img, text) text = re.sub(r"]+src='([^']+)'", _replace_img, text) def _replace_href(m: re.Match) -> str: src = m.group(1).strip() if src.startswith("http://") or src.startswith("https://") or src.startswith("data:"): return m.group(0) pure = src p = _resolve_path(pure) if not p or not p.exists(): mappings.append({"from": pure, "to": None, "ok": False, "type": "html_href"}) return m.group(0) new = _upload(p) if not new: mappings.append({"from": pure, "to": None, "ok": False, "type": "html_href"}) return m.group(0) mappings.append({"from": pure, "to": new, "ok": True, "type": "html_href"}) return m.group(0).replace(src, new) text = re.sub(r"]+href=\"([^\"]+)\"", _replace_href, text) text = re.sub(r"]+href='([^']+)'", _replace_href, text) def _replace_video(m: re.Match) -> str: src = m.group(1).strip() if src.startswith("http://") or src.startswith("https://") or src.startswith("data:"): return m.group(0) pure = src p = _resolve_path(pure) if not p or not p.exists(): mappings.append({"from": pure, "to": None, "ok": False, "type": "html_video"}) return m.group(0) new = _upload(p) if not new: mappings.append({"from": pure, "to": None, "ok": False, "type": "html_video"}) return m.group(0) mappings.append({"from": pure, "to": new, "ok": True, "type": "html_video"}) return m.group(0).replace(src, new) text = re.sub(r"]+src=\"([^\"]+)\"", _replace_video, text) text = re.sub(r"]+src='([^']+)'", _replace_video, text) def _replace_audio(m: re.Match) -> str: src = m.group(1).strip() if src.startswith("http://") or src.startswith("https://") or src.startswith("data:"): return m.group(0) pure = src p = _resolve_path(pure) if not p or not p.exists(): mappings.append({"from": pure, "to": None, "ok": False, "type": "html_audio"}) return m.group(0) new = _upload(p) if not new: mappings.append({"from": pure, "to": None, "ok": False, "type": "html_audio"}) return m.group(0) mappings.append({"from": pure, "to": new, "ok": True, "type": "html_audio"}) return m.group(0).replace(src, new) text = re.sub(r"]+src=\"([^\"]+)\"", _replace_audio, text) text = re.sub(r"]+src='([^']+)'", _replace_audio, text) def _replace_source(m: re.Match) -> str: src = m.group(1).strip() if src.startswith("http://") or src.startswith("https://") or src.startswith("data:"): return m.group(0) pure = src p = _resolve_path(pure) if not p or not p.exists(): mappings.append({"from": pure, "to": None, "ok": False, "type": "html_source"}) return m.group(0) new = _upload(p) if not new: mappings.append({"from": pure, "to": None, "ok": False, "type": "html_source"}) return m.group(0) mappings.append({"from": pure, "to": new, "ok": True, "type": "html_source"}) return m.group(0).replace(src, new) text = re.sub(r"]+src=\"([^\"]+)\"", _replace_source, text) text = re.sub(r"]+src='([^']+)'", _replace_source, text) return text, mappings def _uplift_rel_path(rel: Path, md_dir: Path, root: Optional[Path], mappings: List[Dict[str, str]]) -> Path: try: parts = list(rel.parts) if len(parts) < 2: return rel exts = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".svg", ".webp"} def _is_asset_dir(name: str) -> bool: n = name.strip().lower() return n in {"image", "images", "img", "imgs", "media", "assets", "pic", "pics", "picture", "pictures", "visio pic", "visio_pic", "visio", "图片", "图像"} def _has_asset_sibling() -> bool: try: for ch in md_dir.iterdir(): if ch.is_dir() and _is_asset_dir(ch.name): for f in ch.rglob("*"): if f.is_file() and f.suffix.lower() in exts: return True for f in md_dir.iterdir(): if f.is_file() and f.suffix.lower() in exts: return True except Exception: pass return False def _mappings_indicate_local_assets() -> bool: try: for m in mappings or []: if isinstance(m.get("from"), str): s = str(m.get("from") or "").strip() if s and not (s.startswith("http://") or s.startswith("https://") or s.startswith("data:") or s.startswith("file://")): return True except Exception: pass return False try: if len(parts) >= 2: new_parts = parts[:-2] + [parts[-1]] return Path("/".join(new_parts)) except Exception: pass return rel except Exception: return rel def _inject_image_urls_for_markers(text: str, urls: List[str]) -> str: if not urls: return text out = [] i = 0 for line in text.splitlines(): if "" in line and i < len(urls): line = line.replace("", f"![image]({urls[i]})") i += 1 out.append(line) return "\n".join(out) def _extract_pdf_images(pdf_path: Path) -> List[Tuple[str, bytes]]: imgs: List[Tuple[str, bytes]] = [] if fitz is None: return imgs try: doc = fitz.open(pdf_path) for page in doc: for xref in page.get_images(full=True): try: info = doc.extract_image(xref[0]) ext = info.get("ext", "png") data = info.get("image", b"") if data: imgs.append((ext, data)) except Exception: continue doc.close() except Exception: pass return imgs def _bulk_upload_assets(root: Path, client: object, bucket: str, public_base: str, prefix: str) -> List[str]: urls: List[str] = [] exts = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".svg", ".webp", ".tif", ".tiff", ".ico", ".jfif", ".heic", ".heif", ".emf", ".wmf", ".eps", ".psd"} for f in root.rglob("*"): try: if not f.is_file(): continue if f.suffix.lower() not in exts: continue data = f.read_bytes() mime = detect_mime(f.name, data) k = f.resolve().as_posix().lstrip("/").replace(":", "") obj = join_prefix(prefix, f"abs/{k}") bio = io.BytesIO(data) client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(data), content_type=mime) # type: ignore urls.append(f"{public_base}/{bucket}/{obj}") except Exception: pass return urls @app.post("/md/convert-folder") async def md_convert_folder(folder_path: str = Form(...), prefix: Optional[str] = Form(None)): p = Path(folder_path).expanduser().resolve() if not p.exists() or not p.is_dir(): raise HTTPException(status_code=400, detail="folder_path must be an existing directory") client, bucket, public_base, env_prefix = minio_current(RUNTIME_CONFIG) if client is None or bucket is None or not public_base: raise HTTPException(status_code=400, detail="MinIO is not configured") use_prefix = (prefix or env_prefix or "").strip() processed: List[Dict[str, str]] = [] try: _bulk_upload_assets(p, client, bucket, public_base, use_prefix) except Exception: pass for md_file in p.rglob("*.md"): rel_md = md_file.relative_to(p) rel_uplift_path = rel_md minio_url: Optional[str] = None minio_presigned_url: Optional[str] = None mappings: List[Dict[str, str]] = [] try: content = md_file.read_text("utf-8", errors="ignore") new_text, mappings = _rewrite_md_assets_to_minio(content, md_file.parent, client, bucket, public_base, use_prefix, search_root=p) rel_uplift_path = _uplift_rel_path(rel_md, md_file.parent, p, mappings) # upload rewritten md to MinIO obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift_path.as_posix()}".lstrip("/") raw = new_text.encode("utf-8") bio = io.BytesIO(raw) client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type="text/markdown; charset=utf-8") # type: ignore try: from urllib.parse import quote as _quote minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}" except Exception: minio_url = f"{public_base}/{bucket}/{obj}" minio_url_display = unquote(minio_url) minio_url_display = unquote(minio_url) try: exp = int(timedelta(hours=12).total_seconds()) minio_presigned_url = presigned_read(client, bucket, obj, exp) if client is not None else None except Exception: minio_presigned_url = None except Exception as e: logging.error(str(e)) okc = sum(1 for m in mappings if m.get("ok")) frc = sum(1 for m in mappings if not m.get("ok")) asset_urls = [m.get("to") for m in mappings if m.get("ok") and m.get("to")] processed.append({ "source": rel_uplift_path.as_posix(), "output": None, "minio_url": minio_url, "minio_presigned_url": minio_presigned_url, "mappings": mappings, "asset_ok": okc, "asset_fail": frc, "asset_urls": asset_urls }) return {"ok": True, "count": len(processed), "files": processed} @app.post("/md/upload-folder") async def md_upload_folder(folder_files: List[UploadFile] = File(None), folder_paths: List[str] = Form(None), prefix: Optional[str] = Form(None)): if not folder_files or not folder_paths or len(folder_files) != len(folder_paths): raise HTTPException(status_code=400, detail="folder_files and folder_paths are required and must match in length") client, bucket, public_base, env_prefix = minio_current(RUNTIME_CONFIG) if client is None or bucket is None or not public_base: raise HTTPException(status_code=400, detail="MinIO is not configured") use_prefix = (prefix or env_prefix or "").strip() staging = Path(tempfile.mkdtemp(prefix="folder_stage_")) try: for f, rel in zip(folder_files, folder_paths): rel_norm = rel.replace("\\", "/") dest = staging / rel_norm dest.parent.mkdir(parents=True, exist_ok=True) dest.write_bytes(await f.read()) base = staging try: _bulk_upload_assets(base, client, bucket, public_base, use_prefix) except Exception: pass processed: List[Dict[str, str]] = [] for md_file in base.rglob("*.md"): try: content = md_file.read_text("utf-8", errors="ignore") new_text, mappings = _rewrite_md_assets_to_minio(content, md_file.parent, client, bucket, public_base, use_prefix, search_root=base) rel_md = md_file.relative_to(base) rel_uplift = _uplift_rel_path(rel_md, md_file.parent, base, mappings) try: obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/") bio = io.BytesIO(new_text.encode("utf-8")) client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(new_text.encode("utf-8")), content_type="text/markdown; charset=utf-8") # type: ignore try: from urllib.parse import quote as _quote minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}" except Exception: minio_url = f"{public_base}/{bucket}/{obj}" minio_presigned_url = None try: exp = int(timedelta(hours=12).total_seconds()) minio_presigned_url = presigned_read(client, bucket, obj, exp) if client is not None else None except Exception: minio_presigned_url = None except Exception: minio_url = None minio_presigned_url = None okc = sum(1 for m in mappings if m.get("ok")) frc = sum(1 for m in mappings if not m.get("ok")) asset_urls = [m.get("to") for m in mappings if m.get("ok") and m.get("to")] processed.append({ "source": rel_uplift.as_posix(), "output": None, "minio_url": minio_url, "minio_presigned_url": minio_presigned_url, "mappings": mappings, "asset_ok": okc, "asset_fail": frc, "asset_urls": asset_urls }) except Exception as e: logging.error(str(e)) return {"ok": True, "count": len(processed), "files": processed} finally: try: shutil.rmtree(staging) except Exception: pass logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") def _is_debug(request: Request) -> bool: try: q = request.query_params.get("debug") if q and str(q).lower() in ("1", "true", "yes", "on"): return True except Exception: pass h = request.headers.get("X-Debug") if h and str(h).lower() in ("1", "true", "yes", "on"): return True env = os.environ.get("APP_DEBUG") if env and str(env).lower() in ("1", "true", "yes", "on"): return True return False @app.middleware("http") async def logging_middleware(request: Request, call_next): start = time.time() try: response = await call_next(request) duration = int((time.time() - start) * 1000) logging.info(f"{request.method} {request.url.path} -> {response.status_code} {duration}ms") return response except Exception as exc: duration = int((time.time() - start) * 1000) tb = "".join(traceback.format_exception(type(exc), exc, exc.__traceback__)) logging.error(f"{request.method} {request.url.path} FAILED {duration}ms: {exc}\n{tb}") raise @app.exception_handler(HTTPException) async def http_exception_handler(request: Request, exc: HTTPException): tb = "".join(traceback.format_exception(type(exc), exc, exc.__traceback__)) logging.error(f"HTTP error on {request.method} {request.url.path}: {exc}\n{tb}") debug = _is_debug(request) body = {"error": "http_error", "detail": exc.detail} if debug: body["trace"] = tb return JSONResponse(status_code=exc.status_code, content=body) @app.exception_handler(Exception) async def global_exception_handler(request: Request, exc: Exception): tb = "".join(traceback.format_exception(type(exc), exc, exc.__traceback__)) logging.error(f"Unhandled error on {request.method} {request.url.path}: {exc}\n{tb}") debug = _is_debug(request) body = {"error": "internal_error", "detail": str(exc)} if debug: body["trace"] = tb return JSONResponse(status_code=500, content=body) def _safe_http_url(u: str) -> str: try: parts = urlsplit(u) path = quote(parts.path, safe="/:%") query = quote(parts.query, safe="=&%") frag = quote(parts.fragment, safe="") netloc = parts.netloc try: userinfo = '' hostport = netloc if '@' in netloc: userinfo, hostport = netloc.split('@', 1) userinfo += '@' if hostport.startswith('['): netloc = userinfo + hostport else: port = '' host = hostport if ':' in hostport: host, port = hostport.rsplit(':', 1) if port and not port.isdigit(): host = hostport port = '' try: host_idna = host.encode('idna').decode('ascii') except Exception: host_idna = host netloc = f"{userinfo}{host_idna}{(':' + port) if port else ''}" except Exception: pass return urlunsplit((parts.scheme, netloc, path, query, frag)) except Exception: return u # ────────────────────────────────────────────────────────────────────────────── # API v2 endpoints with standard code/msg/data # ────────────────────────────────────────────────────────────────────────────── _converter_v2 = FormatConverter() def _ok(data: dict, msg: str = "ok"): return JSONResponse({"code": 0, "msg": msg, "data": data}) def _err(msg: str, code: int = 500, detail: object = None): payload = {"code": code, "msg": msg, "data": None} if detail is not None: payload["detail"] = detail return JSONResponse(payload, status_code=200) @app.post("/api/convert") async def api_convert( file: Optional[UploadFile] = File(None), source_url: Optional[str] = Form(None), export: str = Form("markdown"), engine: Optional[str] = Form(None), save: Optional[bool] = Form(False), filename: Optional[str] = Form(None), ): try: if (file is None and not source_url) or (file is not None and source_url): return _err("参数错误:file 与 source_url 二选一") export = _normalize_export(export) engine = _normalize_engine(engine) if source_url: enc, content, artifacts_dir = await asyncio.to_thread(_converter_v2.convert, source_url, export=export, engine=engine) base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(source_url, None)) out_ext = _export_ext(export) ct = _media_type(export) mappings: list[dict[str, str]] = [] trace: List[str] = [] trace.append(f"source_url={source_url}") trace.append(f"export={export}") if artifacts_dir: trace.append(f"artifacts_dir={artifacts_dir}") if export.lower() == "markdown": try: client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG) if client is not None and bucket and public_base: trace.append(f"minio bucket={bucket} public={public_base} prefix={(prefix or '').strip('/')}") base_dir = Path(artifacts_dir) if artifacts_dir else Path(tempfile.mkdtemp(prefix="md_assets_")) new_text, ms = _rewrite_md_assets_to_minio(content, base_dir, client, bucket, public_base, prefix, search_root=(Path(artifacts_dir) if artifacts_dir else None)) urls: List[str] = [] if artifacts_dir: try: urls = _bulk_upload_assets(Path(artifacts_dir), client, bucket, public_base, prefix) except Exception: urls = [] trace.append(f"asset_urls={len(urls)}") try: if source_url: src_path: Optional[Path] = None if source_url.startswith('file://') or Path(source_url).exists(): src_path = Path(source_url.replace('file://', '')) elif source_url.startswith('http://') or source_url.startswith('https://'): import tempfile as _tf from urllib.request import urlopen with _tf.NamedTemporaryFile(delete=False, suffix=Path(infer_basename(source_url, None)).suffix or '.bin') as _tmp: try: with urlopen(source_url) as resp: _tmp.write(resp.read()) finally: _tmp.flush(); _tmp.close() src_path = Path(_tmp.name) if src_path and src_path.exists() and str(src_path).lower().endswith('.pdf'): pdf_imgs = _extract_pdf_images(src_path) base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(source_url, None)) extra_urls: List[str] = [] for idx, (img_ext, data) in enumerate(pdf_imgs): obj = join_prefix(prefix, f"converted/{base}_img_{idx}.{img_ext}") bio = io.BytesIO(data) mime = "image/png" if img_ext.lower() == "png" else "image/jpeg" client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(data), content_type=mime) # type: ignore try: from urllib.parse import quote as _quote obj_enc = _quote(obj, safe="/") extra_urls.append(f"{public_base}/{bucket}/{obj_enc}") except Exception: extra_urls.append(f"{public_base}/{bucket}/{obj}") urls.extend(extra_urls) trace.append(f"pdf_imgs_uploaded={len(extra_urls)}") if source_url.startswith('http://') or source_url.startswith('https://'): try: os.unlink(str(src_path)) except Exception: pass except Exception: pass before = new_text.count("") new_text = _inject_image_urls_for_markers(new_text, urls) after = new_text.count("") trace.append(f"image_placeholders_before={before} after={after}") content = new_text mappings = ms except Exception: pass if not save: resp = _ok({"encoding": enc, "content": content, "name": f"{base}{out_ext}", "media_type": ct, "mappings": mappings, "trace": trace}) try: if artifacts_dir: shutil.rmtree(artifacts_dir, ignore_errors=True) except Exception: pass return resp client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG) if client is None or not bucket or not public_base: return _err("MinIO 未配置,无法保存") out_name = f"{base}{out_ext}" if export.lower() == "markdown" and not out_name.lower().endswith(".md"): out_name = f"{base}.md" obj = join_prefix(prefix, f"converted/{out_name}") bio = io.BytesIO(content.encode("utf-8")) client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(content.encode("utf-8")), content_type=ct) # type: ignore try: from urllib.parse import quote as _quote minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}" except Exception: minio_url = f"{public_base}/{bucket}/{obj}" minio_url_display = unquote(minio_url) try: trace.append(f"save out_name={out_name}") trace.append(f"save obj={obj}") trace.append(f"save minio_url={minio_url}") except Exception: pass exp = int(timedelta(hours=12).total_seconds()) minio_presigned_url = presigned_read(client, bucket, obj, exp) resp = _ok({ "encoding": enc, "name": out_name, "media_type": ct, "minio_url": minio_url, "minio_presigned_url": minio_presigned_url, "minio_url_display": minio_url_display, "mappings": mappings, "trace": trace, }) try: if artifacts_dir: shutil.rmtree(artifacts_dir, ignore_errors=True) except Exception: pass return resp assert file is not None suffix = "" if file.filename and "." in file.filename: suffix = "." + file.filename.rsplit(".", 1)[-1] with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: tmp.write(await file.read()) tmp_path = tmp.name try: enc, content, artifacts_dir = await asyncio.to_thread(_converter_v2.convert, tmp_path, export=export, engine=engine) base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(None, file.filename)) out_ext = _export_ext(export) ct = _media_type(export) mappings: list[dict[str, str]] = [] trace: List[str] = [] trace.append(f"file={file.filename}") trace.append(f"tmp_path={tmp_path}") trace.append(f"export={export}") if artifacts_dir: trace.append(f"artifacts_dir={artifacts_dir}") if export.lower() == "markdown": try: client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG) if client is not None and bucket and public_base: trace.append(f"minio bucket={bucket} public={public_base} prefix={(prefix or '').strip('/')}") base_dir = Path(artifacts_dir) if artifacts_dir else Path(tempfile.mkdtemp(prefix="md_assets_")) new_text, ms = _rewrite_md_assets_to_minio(content, base_dir, client, bucket, public_base, prefix, search_root=(Path(artifacts_dir) if artifacts_dir else None)) urls: List[str] = [] if artifacts_dir: try: urls = _bulk_upload_assets(Path(artifacts_dir), client, bucket, public_base, prefix) except Exception: urls = [] trace.append(f"asset_urls={len(urls)}") try: if tmp_path and tmp_path.exists() and str(tmp_path).lower().endswith('.pdf'): pdf_imgs = _extract_pdf_images(tmp_path) base = sanitize_filename(filename) if filename else sanitize_filename(infer_basename(None, file.filename)) extra_urls: List[str] = [] for idx, (img_ext, data) in enumerate(pdf_imgs): obj = join_prefix(prefix, f"converted/{base}_img_{idx}.{img_ext}") bio = io.BytesIO(data) mime = "image/png" if img_ext.lower() == "png" else "image/jpeg" client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(data), content_type=mime) # type: ignore try: from urllib.parse import quote as _quote extra_urls.append(f"{public_base}/{bucket}/{_quote(obj, safe='/')}") except Exception: extra_urls.append(f"{public_base}/{bucket}/{obj}") urls.extend(extra_urls) trace.append(f"pdf_imgs_uploaded={len(extra_urls)}") except Exception: pass before = new_text.count("") new_text = _inject_image_urls_for_markers(new_text, urls) after = new_text.count("") trace.append(f"image_placeholders_before={before} after={after}") content = new_text mappings = ms except Exception: pass if not save: resp = _ok({"encoding": enc, "content": content, "name": f"{base}{out_ext}", "media_type": ct, "mappings": mappings, "trace": trace}) try: if artifacts_dir: shutil.rmtree(artifacts_dir, ignore_errors=True) except Exception: pass return resp client, bucket, public_base, prefix = minio_current(RUNTIME_CONFIG) if client is None or not bucket or not public_base: return _err("MinIO 未配置,无法保存") out_name = f"{base}{out_ext}" if export.lower() == "markdown" and not out_name.lower().endswith(".md"): out_name = f"{base}.md" obj = join_prefix(prefix, f"converted/{out_name}") bio = io.BytesIO(content.encode("utf-8")) client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(content.encode("utf-8")), content_type=ct) # type: ignore try: from urllib.parse import quote as _quote minio_url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}" except Exception: minio_url = f"{public_base}/{bucket}/{obj}" minio_url_display = unquote(minio_url) try: trace.append(f"save out_name={out_name}") trace.append(f"save obj={obj}") trace.append(f"save minio_url={minio_url}") except Exception: pass exp = int(timedelta(hours=12).total_seconds()) minio_presigned_url = presigned_read(client, bucket, obj, exp) resp = _ok({ "encoding": enc, "name": out_name, "media_type": ct, "minio_url": minio_url, "minio_presigned_url": minio_presigned_url, "minio_url_display": minio_url_display, "mappings": mappings, "trace": trace, }) try: if artifacts_dir: shutil.rmtree(artifacts_dir, ignore_errors=True) except Exception: pass return resp finally: try: os.remove(tmp_path) except Exception: pass except HTTPException as e: return _err(str(e.detail), 400) except Exception as e: return _err(str(e)) @app.post("/api/pdf/convert") async def api_pdf_convert( file: Optional[UploadFile] = File(None), file_path: Optional[str] = Form(None), markdown_content: Optional[str] = Form(None), toc: bool = Form(False), header_text: Optional[str] = Form(None), footer_text: Optional[str] = Form(None), logo_url: Optional[str] = Form(None), copyright_text: Optional[str] = Form(None), filename_text: Optional[str] = Form(None), cover_src: Optional[str] = Form(None), product_name: Optional[str] = Form(None), document_name: Optional[str] = Form(None), product_version: Optional[str] = Form(None), document_version: Optional[str] = Form(None), css_name: Optional[str] = Form(None), css_text: Optional[str] = Form(None), download: bool = Form(True), ): """ Convert Word or Markdown to PDF Supports three input methods: 1. Upload file (Word .doc/.docx or Markdown .md) 2. Specify file_path (local file path) 3. Provide markdown_content directly Returns PDF file as download by default """ try: pdf_bytes: bytes = b"" output_filename: str = "document.pdf" # Determine input source if file: # Handle uploaded file filename = file.filename or "upload" suffix = Path(filename).suffix.lower() # Save uploaded file to temp tmp_path = Path(tempfile.mktemp(suffix=suffix)) try: content = await file.read() tmp_path.write_bytes(content) if suffix in {".doc", ".docx"}: # Convert Word to PDF output_filename = f"{Path(filename).stem}.pdf" pdf_bytes = await asyncio.to_thread( word_to_pdf_bytes, tmp_path, toc=toc, header_text=header_text, footer_text=footer_text, logo_url=logo_url, copyright_text=copyright_text, filename_text=filename_text or Path(filename).stem, cover_src=cover_src, product_name=product_name, document_name=document_name, product_version=product_version, document_version=document_version, ) elif suffix in {".md", ".markdown"}: # Convert Markdown file to PDF output_filename = f"{Path(filename).stem}.pdf" pdf_bytes = await asyncio.to_thread( markdown_file_to_pdf_bytes, tmp_path, toc=toc, header_text=header_text, footer_text=footer_text, logo_url=logo_url, copyright_text=copyright_text, filename_text=filename_text or Path(filename).stem, cover_src=cover_src, product_name=product_name, document_name=document_name, product_version=product_version, document_version=document_version, css_name=css_name, css_text=css_text, ) else: return _err(f"不支持的文件格式: {suffix}。支持的格式: .doc, .docx, .md") finally: try: tmp_path.unlink(missing_ok=True) except Exception: pass elif file_path: # Handle local file path path = Path(file_path).expanduser() if not path.exists(): return _err(f"文件不存在: {file_path}") suffix = path.suffix.lower() output_filename = f"{path.stem}.pdf" if suffix in {".doc", ".docx"}: pdf_bytes = await asyncio.to_thread( word_to_pdf_bytes, path, toc=toc, header_text=header_text, footer_text=footer_text, logo_url=logo_url, copyright_text=copyright_text, filename_text=filename_text or path.stem, cover_src=cover_src, product_name=product_name, document_name=document_name, product_version=product_version, document_version=document_version, ) elif suffix in {".md", ".markdown"}: pdf_bytes = await asyncio.to_thread( markdown_file_to_pdf_bytes, path, toc=toc, header_text=header_text, footer_text=footer_text, logo_url=logo_url, copyright_text=copyright_text, filename_text=filename_text or path.stem, cover_src=cover_src, product_name=product_name, document_name=document_name, product_version=product_version, document_version=document_version, css_name=css_name, css_text=css_text, ) else: return _err(f"不支持的文件格式: {suffix}。支持的格式: .doc, .docx, .md") elif markdown_content: # Handle direct markdown content output_filename = f"{filename_text or 'document'}.pdf" pdf_bytes = await asyncio.to_thread( markdown_to_pdf_bytes, markdown_content, toc=toc, header_text=header_text, footer_text=footer_text, logo_url=logo_url, copyright_text=copyright_text, filename_text=filename_text, cover_src=cover_src, product_name=product_name, document_name=document_name, product_version=product_version, document_version=document_version, css_name=css_name, css_text=css_text, ) else: return _err("必须提供 file、file_path 或 markdown_content 中的一个") if not pdf_bytes: return _err("PDF 转换失败,未生成内容") # Return PDF file if download: from fastapi.responses import StreamingResponse import urllib.parse # 处理中文文件名 - 使用 URL 编码确保只包含 ASCII 字符 # 先将中文文件名进行百分比编码 safe_filename = urllib.parse.quote(output_filename, safe='') return StreamingResponse( io.BytesIO(pdf_bytes), media_type="application/pdf", headers={ "Content-Disposition": f"attachment; filename={safe_filename}" } ) else: # Return as base64 in JSON import base64 return _ok({ "pdf_base64": base64.b64encode(pdf_bytes).decode("ascii"), "filename": output_filename, "size": len(pdf_bytes) }) except Exception as e: logging.exception("PDF conversion error") return _err(f"PDF 转换失败: {str(e)}") @app.post("/api/import/convert") async def api_import_convert(json_file: UploadFile = File(None), json_text: Optional[str] = Form(None), path: Optional[str] = Form(None), versionId: Optional[int] = Form(1001), download: Optional[bool] = Form(False)): try: raw_text: Optional[str] = None if json_file is not None: raw = await json_file.read() raw_text = raw.decode("utf-8", errors="ignore") elif json_text: raw_text = json_text else: use_path = (path or "import.json").strip() p = Path(use_path).expanduser() if not p.exists(): return _err(f"未找到文件: {use_path}") raw_text = p.read_text("utf-8", errors="ignore") import json as _json data = _json.loads(raw_text or "{}") files = data.get("files", []) if not isinstance(files, list): return _err("JSON结构不合法:缺少 files 数组") imp = _build_import_tree(files, int(versionId or 1001)) if download: from fastapi.responses import StreamingResponse b = _json.dumps(imp, ensure_ascii=False, indent=2).encode("utf-8") return StreamingResponse(io.BytesIO(b), media_type="application/json; charset=utf-8", headers={"Content-Disposition": "attachment; filename=import.json"}) return _ok({"import": imp}) except Exception as e: return _err(str(e)) @app.post("/api/upload-archive") async def api_upload_archive(file: UploadFile = File(...), prefix: Optional[str] = Form(None)): try: client, bucket, public_base, env_prefix = minio_current(RUNTIME_CONFIG) if client is None or bucket is None or not public_base: return _err("MinIO 未配置") use_prefix = (prefix or env_prefix or "").strip() suffix = (file.filename or "").lower() tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix) data = await file.read() tmp.write(data) tmp.flush(); tmp.close() root = Path(tempfile.mkdtemp(prefix="extract_")) try: if suffix.endswith(".zip"): import zipfile with zipfile.ZipFile(tmp.name, "r") as zf: _zip_extract_safely(zf, root) elif ".tar" in suffix or suffix.endswith(".tgz") or suffix.endswith(".tar.gz") or suffix.endswith(".tar.bz2") or suffix.endswith(".tar.xz"): import tarfile with tarfile.open(tmp.name, "r:*") as tf: _tar_extract_safely(tf, root) else: return _err("不支持的压缩格式") try: _bulk_upload_assets(root, client, bucket, public_base, use_prefix) except Exception: pass files = [] # Process Markdown files as-is for md in root.rglob("*.md"): try: text = md.read_text("utf-8", errors="ignore") new_text, mappings = _rewrite_md_assets_to_minio(text, md.parent, client, bucket, public_base, use_prefix, search_root=root) rel_md = md.relative_to(root) rel_uplift = _uplift_rel_path(rel_md, md.parent, root, mappings) obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/") bio = io.BytesIO(new_text.encode("utf-8")) client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(new_text.encode("utf-8")), content_type="text/markdown; charset=utf-8") # type: ignore try: url = f"{public_base}/{bucket}/{quote(obj, safe='/')}" except Exception: url = f"{public_base}/{bucket}/{obj}" url_display = unquote(url) url_display = unquote(url) exp = int(timedelta(hours=12).total_seconds()) ps = presigned_read(client, bucket, obj, exp) if client is not None else None raw = new_text.encode("utf-8") files.append({ "source": rel_uplift.as_posix(), "minio_url": url, "minio_presigned_url": ps, "minio_url_display": url_display, "mappings": mappings, "object_name": obj, "size": len(raw), }) except Exception: files.append({"source": (md.relative_to(root).as_posix()), "minio_url": None, "minio_presigned_url": None, "mappings": [], "object_name": None, "size": 0}) # Convert HTML files to Markdown and process similarly for html in [p for p in root.rglob("*") if p.is_file() and p.suffix.lower() in {".html", ".htm"}]: try: # Skip if a sibling Markdown already exists for the same base name rel_html = html.relative_to(root) md_target_rel = rel_html.with_suffix(".md") md_sibling = (root / md_target_rel).exists() if md_sibling: continue html_src = html.read_text("utf-8", errors="ignore") html_rew, mappings = _rewrite_md_assets_to_minio(html_src, html.parent, client, bucket, public_base, use_prefix, search_root=root) tmpd = Path(tempfile.mkdtemp(prefix="rew_html_")) tmpf = tmpd / html.name tmpf.write_text(html_rew, "utf-8") enc, md_text, _art = _converter_v2.convert(str(tmpf), export="markdown") md_text2, mappings2 = _rewrite_md_assets_to_minio(md_text, html.parent, client, bucket, public_base, use_prefix, search_root=root) mappings = (mappings or []) + (mappings2 or []) new_text = md_text2 rel_uplift = _uplift_rel_path(md_target_rel, html.parent, root, mappings) obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/") raw = new_text.encode(enc or "utf-8") bio = io.BytesIO(raw) client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type="text/markdown; charset=utf-8") # type: ignore try: url = f"{public_base}/{bucket}/{quote(obj, safe='/')}" except Exception: url = f"{public_base}/{bucket}/{obj}" exp = int(timedelta(hours=12).total_seconds()) ps = presigned_read(client, bucket, obj, exp) if client is not None else None files.append({ "source": rel_uplift.as_posix(), "minio_url": url, "minio_presigned_url": ps, "minio_url_display": url_display, "mappings": mappings, "object_name": obj, "size": len(raw), }) except Exception: files.append({"source": (html.relative_to(root).as_posix()), "minio_url": None, "minio_presigned_url": None, "mappings": [], "object_name": None, "size": 0}) finally: try: shutil.rmtree(tmpd, ignore_errors=True) except Exception: pass imp = _build_import_tree(files, int(1001)) return _ok({"count": len(files), "files": files, "import": imp}) finally: try: os.unlink(tmp.name) except Exception: pass try: shutil.rmtree(root) except Exception: pass except Exception as e: return _err(str(e)) STAGED_ARCHIVES: Dict[str, Dict[str, object]] = {} def _build_import_tree(processed: List[Dict[str, object]], version_id: int) -> Dict[str, object]: def ensure_folder(children: list, name: str) -> Dict[str, object]: for n in children: if isinstance(n, dict) and n.get("name") == name and n.get("type") == "FOLDER": return n node = {"name": name, "type": "FOLDER", "children": [], "sortOrder": 100} children.append(node) return node tree: List[Dict[str, object]] = [] for idx, f in enumerate(processed): src = str(f.get("source") or "") obj = str(f.get("object_name") or "") size = int(f.get("size") or 0) parts = [p for p in src.split("/") if p] if not parts: continue cur = tree for d in parts[:-1]: folder = ensure_folder(cur, d) cur = folder.setdefault("children", []) # type: ignore fname = parts[-1] base = fname.rsplit(".", 1)[0] file_node = {"name": base, "type": "FILE", "sortOrder": 100 + idx, "files": [{"languageId": 1, "objectName": obj, "fileName": fname, "fileSize": size}]} cur.append(file_node) # type: ignore return {"versionId": version_id, "tree": tree} @app.post("/api/archive/stage") async def api_archive_stage(file: UploadFile = File(...), prefix: Optional[str] = Form(None)): try: suffix = (file.filename or "").lower() tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix) data = await file.read() tmp.write(data) tmp.flush(); tmp.close() sid = uuid.uuid4().hex STAGED_ARCHIVES[sid] = {"path": tmp.name, "prefix": (prefix or "")} return _ok({"id": sid, "name": file.filename, "size": len(data)}) except Exception as e: return _err(str(e)) @app.post("/api/archive/process") async def api_archive_process(id: str = Form(...), prefix: Optional[str] = Form(None), versionId: Optional[int] = Form(1001)): try: st = STAGED_ARCHIVES.get(id) if not st: return _err("未找到已上传的压缩包") tmp_path = Path(str(st.get("path"))) use_prefix_param = (prefix or str(st.get("prefix") or "")).strip() client, bucket, public_base, env_prefix = minio_current(RUNTIME_CONFIG) if client is None or bucket is None or not public_base: return _err("MinIO 未配置") use_prefix = (use_prefix_param or env_prefix or "").strip() root = Path(tempfile.mkdtemp(prefix="extract_")) try: sfx = tmp_path.name.lower() if sfx.endswith(".zip"): import zipfile with zipfile.ZipFile(str(tmp_path), "r") as zf: _zip_extract_safely(zf, root) elif ".tar" in sfx or sfx.endswith(".tgz") or sfx.endswith(".tar.gz") or sfx.endswith(".tar.bz2") or sfx.endswith(".tar.xz"): import tarfile with tarfile.open(str(tmp_path), "r:*") as tf: _tar_extract_safely(tf, root) else: return _err("不支持的压缩格式") try: _bulk_upload_assets(root, client, bucket, public_base, use_prefix) except Exception: pass processed: List[Dict[str, object]] = [] # Process existing Markdown files for md in root.rglob("*.md"): try: text = md.read_text("utf-8", errors="ignore") new_text, mappings = _rewrite_md_assets_to_minio(text, md.parent, client, bucket, public_base, use_prefix, search_root=root) rel_md = md.relative_to(root) rel_uplift = _uplift_rel_path(rel_md, md.parent, root, mappings) obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/") raw = new_text.encode("utf-8") bio = io.BytesIO(raw) client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type="text/markdown; charset=utf-8") # type: ignore try: from urllib.parse import quote as _quote url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}" except Exception: url = f"{public_base}/{bucket}/{obj}" exp = int(timedelta(hours=12).total_seconds()) ps = presigned_read(client, bucket, obj, exp) if client is not None else None processed.append({"source": rel_uplift.as_posix(), "minio_url": url, "minio_presigned_url": ps, "mappings": mappings, "object_name": obj, "size": len(raw)}) except Exception: processed.append({"source": (md.relative_to(root).as_posix()), "minio_url": None, "minio_presigned_url": None, "mappings": [], "object_name": None, "size": 0}) # Convert HTML files to Markdown and process for html in [p for p in root.rglob("*") if p.is_file() and p.suffix.lower() in {".html", ".htm"}]: try: rel_html = html.relative_to(root) md_target_rel = rel_html.with_suffix(".md") md_sibling = (root / md_target_rel).exists() if md_sibling: continue html_src = html.read_text("utf-8", errors="ignore") html_rew, mappings = _rewrite_md_assets_to_minio(html_src, html.parent, client, bucket, public_base, use_prefix, search_root=root) tmpd = Path(tempfile.mkdtemp(prefix="rew_html_")) tmpf = tmpd / html.name tmpf.write_text(html_rew, "utf-8") enc, md_text, _art = _converter_v2.convert(str(tmpf), export="markdown") md_text2, mappings2 = _rewrite_md_assets_to_minio(md_text, html.parent, client, bucket, public_base, use_prefix, search_root=root) mappings = (mappings or []) + (mappings2 or []) new_text = md_text2 rel_uplift = _uplift_rel_path(md_target_rel, html.parent, root, mappings) obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/") raw = new_text.encode(enc or "utf-8") bio = io.BytesIO(raw) client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw), content_type="text/markdown; charset=utf-8") # type: ignore try: from urllib.parse import quote as _quote url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}" except Exception: url = f"{public_base}/{bucket}/{obj}" exp = int(timedelta(hours=12).total_seconds()) ps = presigned_read(client, bucket, obj, exp) if client is not None else None processed.append({"source": rel_uplift.as_posix(), "minio_url": url, "minio_presigned_url": ps, "mappings": mappings, "object_name": obj, "size": len(raw)}) except Exception: processed.append({"source": (html.relative_to(root).as_posix()), "minio_url": None, "minio_presigned_url": None, "mappings": [], "object_name": None, "size": 0}) finally: try: shutil.rmtree(tmpd, ignore_errors=True) except Exception: pass imp = _build_import_tree(processed, int(versionId or 1001)) return _ok({"count": len(processed), "files": processed, "import": imp}) finally: try: os.unlink(str(tmp_path)) except Exception: pass try: shutil.rmtree(root) except Exception: pass try: STAGED_ARCHIVES.pop(id, None) except Exception: pass except Exception as e: return _err(str(e)) @app.post("/api/upload-list") async def api_upload_list(list_file: UploadFile = File(...), prefix: Optional[str] = Form(None), versionId: Optional[int] = Form(1001)): try: client, bucket, public_base, env_prefix = minio_current(RUNTIME_CONFIG) if client is None or bucket is None or not public_base: return _err("MinIO 未配置") use_prefix = (prefix or env_prefix or "").strip() raw = await list_file.read() text = raw.decode("utf-8", errors="ignore") lines = [l.strip() for l in text.splitlines()] paths: List[str] = [l for l in lines if l and not l.startswith("#")] locals: List[Path] = [] for p in paths: if p.startswith("http://") or p.startswith("https://"): pass else: lp = Path(p).expanduser() if lp.exists() and lp.is_file(): locals.append(lp.resolve()) base_root = None try: if locals: base_root = Path(os.path.commonpath([str(x) for x in locals])) except Exception: base_root = None processed: List[Dict[str, object]] = [] for p in locals: try: content = p.read_text("utf-8", errors="ignore") new_text, mappings = _rewrite_md_assets_to_minio(content, p.parent, client, bucket, public_base, use_prefix, search_root=base_root) rel0 = p.relative_to(base_root) if base_root else Path(p.name) rel_uplift = _uplift_rel_path(rel0, p.parent, base_root, mappings) obj = f"{use_prefix.strip('/')}/rewritten/{rel_uplift.as_posix()}".lstrip("/") raw_md = new_text.encode("utf-8") bio = io.BytesIO(raw_md) client.put_object(bucket_name=bucket, object_name=obj, data=bio, length=len(raw_md), content_type="text/markdown; charset=utf-8") # type: ignore try: from urllib.parse import quote as _quote url = f"{public_base}/{bucket}/{_quote(obj, safe='/')}" except Exception: url = f"{public_base}/{bucket}/{obj}" exp = int(timedelta(hours=12).total_seconds()) ps = presigned_read(client, bucket, obj, exp) if client is not None else None processed.append({"source": rel_uplift.as_posix(), "minio_url": url, "minio_presigned_url": ps, "mappings": mappings, "object_name": obj, "size": len(raw_md)}) except Exception: processed.append({"source": p.name, "minio_url": None, "minio_presigned_url": None, "mappings": [], "object_name": None, "size": 0}) imp = _build_import_tree(processed, int(versionId or 1001)) return _ok({"count": len(processed), "files": processed, "import": imp}) except Exception as e: return _err(str(e)) @app.get("/config/minio/policy") async def get_minio_policy(bucket: Optional[str] = None): client, cfg_bucket, _, _ = minio_current(RUNTIME_CONFIG) if client is None: raise HTTPException(status_code=400, detail="MinIO 未配置") bkt = (bucket or cfg_bucket or "").strip() if not bkt: raise HTTPException(status_code=400, detail="bucket 不能为空") try: pol = client.get_bucket_policy(bucket_name=bkt) # type: ignore try: import json as _json data = _json.loads(pol) except Exception: data = {"raw": pol} return {"ok": True, "bucket": bkt, "policy": data} except Exception as e: try: try: region = client._get_region(bkt) # type: ignore except Exception: region = "us-east-1" resp = client._url_open(method="GET", region=region, bucket_name=bkt, query_params={"policy": ""}) # type: ignore raw = None try: raw = getattr(resp, "data", None) if raw is not None and hasattr(raw, "decode"): raw = raw.decode("utf-8") except Exception: raw = None if raw is None: try: raw = resp.read().decode("utf-8") # type: ignore except Exception: raw = "" try: import json as _json data = _json.loads(raw) except Exception: data = {"raw": raw} return {"ok": True, "bucket": bkt, "policy": data} except Exception as e2: return {"ok": False, "bucket": bkt, "error": str(e2)} @app.post("/config/minio/apply_public_read") async def apply_public_read(bucket: Optional[str] = Form(None), enable: Optional[str] = Form("true")): client, cfg_bucket, _, _ = minio_current(RUNTIME_CONFIG) if client is None: raise HTTPException(status_code=400, detail="MinIO 未配置") bkt = (bucket or cfg_bucket or "").strip() if not bkt: raise HTTPException(status_code=400, detail="bucket 不能为空") try: import json as _json if str(enable or "true").lower() in {"1","true","yes","on"}: policy = { "Version": "2012-10-17", "Statement": [ {"Effect": "Allow", "Principal": "*", "Action": ["s3:GetBucketLocation", "s3:ListBucket"], "Resource": [f"arn:aws:s3:::{bkt}"]}, {"Effect": "Allow", "Principal": "*", "Action": ["s3:GetObject"], "Resource": [f"arn:aws:s3:::{bkt}/*"]}, ], } try: client.set_bucket_policy(bucket_name=bkt, policy=_json.dumps(policy)) # type: ignore return {"ok": True, "bucket": bkt, "applied": True} except Exception: try: try: region = client._get_region(bkt) # type: ignore except Exception: region = "us-east-1" raw = _json.dumps(policy).encode("utf-8") client._url_open(method="PUT", region=region, bucket_name=bkt, query_params={"policy": ""}, body=raw) # type: ignore return {"ok": True, "bucket": bkt, "applied": True} except Exception as e2: return {"ok": False, "bucket": bkt, "error": str(e2)} try: client.delete_bucket_policy(bkt) # type: ignore except Exception: pass return {"ok": True, "bucket": bkt, "applied": False} except Exception as e: return {"ok": False, "bucket": bkt, "error": str(e)}