Files
FunMD_Convert/docling/app/services/minio_utils.py

191 lines
7.3 KiB
Python
Raw Normal View History

2026-01-07 17:18:26 +08:00
from typing import Optional, Tuple, Dict
import os
import logging
from urllib.request import urlopen
try:
from minio import Minio # type: ignore
import urllib3 # type: ignore
except Exception:
Minio = None
urllib3 = None # type: ignore
def minio_head_bucket(client: object, bucket: str) -> bool:
try:
if hasattr(client, "bucket_exists"):
try:
return bool(client.bucket_exists(bucket)) # type: ignore
except Exception:
pass
try:
region = client._get_region(bucket) # type: ignore
except Exception:
region = "us-east-1"
client._url_open(method="HEAD", region=region, bucket_name=bucket) # type: ignore
return True
except Exception:
try:
names = [getattr(b, "name", None) for b in client.list_buckets()] # type: ignore
return bucket in set(n for n in names if n)
except Exception:
return False
def minio_create_bucket(client: object, bucket: str) -> bool:
try:
if hasattr(client, "bucket_exists"):
try:
if client.bucket_exists(bucket): # type: ignore
return True
except Exception:
pass
if hasattr(client, "make_bucket"):
try:
client.make_bucket(bucket) # type: ignore
return True
except Exception:
try:
region = client._get_region(bucket) # type: ignore
except Exception:
region = "us-east-1"
try:
client.make_bucket(bucket, location=region) # type: ignore
return True
except Exception:
pass
try:
try:
region = client._get_region(bucket) # type: ignore
except Exception:
region = "us-east-1"
client._url_open(method="PUT", region=region, bucket_name=bucket) # type: ignore
return True
except Exception as ce:
if "BucketAlreadyOwnedByYou" in str(ce) or "BucketAlreadyExists" in str(ce):
return True
raise
except Exception as e:
raise e
def minio_client(endpoint: str, access: str, secret: str, secure: bool):
if urllib3 is not None:
try:
http = urllib3.PoolManager(timeout=urllib3.Timeout(connect=3.0, read=20.0))
return Minio(endpoint=endpoint, access_key=access, secret_key=secret, secure=secure, http_client=http) # type: ignore
except Exception:
return Minio(endpoint=endpoint, access_key=access, secret_key=secret, secure=secure) # type: ignore
return Minio(endpoint=endpoint, access_key=access, secret_key=secret, secure=secure) # type: ignore
def minio_time_hint(endpoint: str, secure: bool) -> Optional[str]:
try:
scheme = "https" if secure else "http"
r = urlopen(f"{scheme}://{endpoint}", timeout=3)
srv_date = r.headers.get("Date")
if not srv_date:
return None
from email.utils import parsedate_to_datetime
from datetime import datetime, timezone
dt = parsedate_to_datetime(srv_date)
now = datetime.now(timezone.utc)
diff = abs((now - dt).total_seconds())
return f"服务器时间与本机相差约 {int(diff)}"
except Exception:
return None
def join_prefix(prefix: str, rel: str) -> str:
pre = (prefix or "").strip("/")
r = rel.lstrip("/")
if pre and r.startswith(pre + "/"):
return r
return f"{pre}/{r}" if pre else r
def presigned_read(client: object, bucket: str, obj: str, expires_seconds: int) -> Optional[str]:
try:
from datetime import timedelta
exp = expires_seconds
try:
exp = int(exp)
except Exception:
pass
td = timedelta(seconds=exp)
try:
return client.get_presigned_url("GET", bucket, obj, expires=td) # type: ignore
except Exception:
return client.presigned_get_object(bucket, obj, expires=td) # type: ignore
except Exception:
return None
def minio_current(runtime_cfg: Dict[str, Dict[str, Optional[str]]]) -> Tuple[Optional[object], Optional[str], Optional[str], str]:
rc = runtime_cfg.get("minio", {})
endpoint_raw = rc.get("endpoint") or os.environ.get("MINIO_ENDPOINT")
access_raw = rc.get("access") or os.environ.get("MINIO_ACCESS_KEY")
secret_raw = rc.get("secret") or os.environ.get("MINIO_SECRET_KEY")
bucket_raw = rc.get("bucket") or os.environ.get("MINIO_BUCKET")
secure_flag = rc.get("secure") or os.environ.get("MINIO_SECURE", "false")
secure = str(secure_flag or "false").lower() in {"1","true","yes","on"}
public_raw = rc.get("public") or os.environ.get("MINIO_PUBLIC_ENDPOINT")
endpoint = (str(endpoint_raw).strip() if endpoint_raw else None)
try:
if isinstance(endpoint, str) and ":9001" in endpoint:
h = endpoint.split("/")[0]
if ":" in h:
parts = h.split(":")
endpoint = f"{parts[0]}:9000"
else:
endpoint = h
except Exception:
endpoint = endpoint
access = (str(access_raw).strip() if access_raw else None)
secret = (str(secret_raw).strip() if secret_raw else None)
bucket = (str(bucket_raw).strip() if bucket_raw else None)
public_base = (str(public_raw).strip() if public_raw else None)
try:
if isinstance(public_base, str) and (":9001" in public_base or "/browser" in public_base or "/minio" in public_base):
host = public_base.strip().split("/")[0]
scheme = "https" if secure else "http"
if ":" in host:
host = host.split("/")[0]
base_host = host.split(":")[0]
public_base = f"{scheme}://{base_host}:9000"
else:
public_base = f"{scheme}://{host}:9000"
except Exception:
public_base = public_base
if not public_base and endpoint:
public_base = f"https://{endpoint}" if secure else f"http://{endpoint}"
missing = []
if Minio is None:
missing.append("client")
if not endpoint:
missing.append("endpoint")
if not access:
missing.append("access")
if not secret:
missing.append("secret")
if not bucket:
missing.append("bucket")
if not public_base:
missing.append("public")
if missing:
try:
logging.error(f"minio config invalid: missing={missing}")
except Exception:
pass
return None, None, None, ""
client = minio_client(endpoint=endpoint, access=access, secret=secret, secure=secure)
try:
try:
client.list_buckets() # type: ignore
except Exception as e:
if secure and ("SSL" in str(e) or "HTTPSConnectionPool" in str(e) or "SSLError" in str(e)):
client = minio_client(endpoint=endpoint, access=access, secret=secret, secure=False)
except Exception:
pass
try:
exists = minio_head_bucket(client, bucket)
if not exists:
minio_create_bucket(client, bucket)
except Exception:
pass
prefix = rc.get("prefix") or os.environ.get("MINIO_PREFIX", "")
return client, bucket, public_base, prefix