提取操作使用千问plus大模型

This commit is contained in:
wh
2026-04-17 01:22:05 +08:00
parent d34f703523
commit 3a60d8cb33
8 changed files with 196 additions and 22 deletions

View File

@@ -6,7 +6,7 @@ import docx
from app.clients.llm.base import LLMClient
from app.clients.storage.base import StorageClient
from app.core.config import get_config
from app.core.exceptions import UnsupportedFileTypeError
from app.core.exceptions import StorageError, UnsupportedFileTypeError
from app.core.json_utils import extract_json
from app.core.logging import get_logger
from app.models.text_models import (
@@ -61,7 +61,21 @@ async def extract_triples(
bucket = cfg["storage"]["buckets"]["source_data"]
model = req.model or cfg["models"]["default_text"]
data = await storage.download_bytes(bucket, req.file_path)
try:
data = await storage.download_bytes(bucket, req.file_path)
logger.info("文件下载成功", extra={
"file_name": req.file_name,
"size_bytes": len(data)
})
except Exception as e:
logger.error("文件下载失败", extra={
"file_name": req.file_name,
"file_path": req.file_path,
"bucket": bucket,
"error_type": type(e).__name__,
"error_message": str(e)
}, exc_info=True)
raise StorageError(f"下载文件失败: {str(e)}") from e
if ext == ".txt":
text = _parse_txt(data)