From 68567b98b38ebf6e9d6aa8b20c05719fe073574e Mon Sep 17 00:00:00 2001 From: wh Date: Wed, 15 Apr 2026 11:12:50 +0800 Subject: [PATCH] =?UTF-8?q?=E5=81=9C=E6=AD=A2=E8=B7=9F=E8=B8=AA=E6=96=87?= =?UTF-8?q?=E6=A1=A3=E5=92=8C=E5=8D=95=E5=85=83=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CLAUDE.md | 30 - .../plans/2026-04-10-ai-service-impl.md | 3004 ----------------- .../specs/2026-04-10-ai-service-design.md | 835 ----- pytest.ini | 3 - .../checklists/requirements.md | 38 - .../contracts/api.md | 333 -- .../001-ai-service-requirements/data-model.md | 167 - specs/001-ai-service-requirements/plan.md | 120 - .../001-ai-service-requirements/quickstart.md | 109 - specs/001-ai-service-requirements/research.md | 76 - specs/001-ai-service-requirements/spec.md | 258 -- specs/001-ai-service-requirements/tasks.md | 318 -- tests/__init__.py | 0 tests/conftest.py | 39 - tests/test_config.py | 40 - tests/test_finetune_router.py | 112 - tests/test_finetune_service.py | 151 - tests/test_health.py | 8 - tests/test_image_router.py | 63 - tests/test_image_service.py | 102 - tests/test_llm_client.py | 81 - tests/test_qa_router.py | 121 - tests/test_qa_service.py | 236 -- tests/test_storage_client.py | 62 - tests/test_text_router.py | 63 - tests/test_text_service.py | 122 - tests/test_video_router.py | 71 - tests/test_video_service.py | 195 -- 28 files changed, 6757 deletions(-) delete mode 100644 CLAUDE.md delete mode 100644 docs/superpowers/plans/2026-04-10-ai-service-impl.md delete mode 100644 docs/superpowers/specs/2026-04-10-ai-service-design.md delete mode 100644 pytest.ini delete mode 100644 specs/001-ai-service-requirements/checklists/requirements.md delete mode 100644 specs/001-ai-service-requirements/contracts/api.md delete mode 100644 specs/001-ai-service-requirements/data-model.md delete mode 100644 specs/001-ai-service-requirements/plan.md delete mode 100644 specs/001-ai-service-requirements/quickstart.md delete mode 100644 specs/001-ai-service-requirements/research.md delete mode 100644 specs/001-ai-service-requirements/spec.md delete mode 100644 specs/001-ai-service-requirements/tasks.md delete mode 100644 tests/__init__.py delete mode 100644 tests/conftest.py delete mode 100644 tests/test_config.py delete mode 100644 tests/test_finetune_router.py delete mode 100644 tests/test_finetune_service.py delete mode 100644 tests/test_health.py delete mode 100644 tests/test_image_router.py delete mode 100644 tests/test_image_service.py delete mode 100644 tests/test_llm_client.py delete mode 100644 tests/test_qa_router.py delete mode 100644 tests/test_qa_service.py delete mode 100644 tests/test_storage_client.py delete mode 100644 tests/test_text_router.py delete mode 100644 tests/test_text_service.py delete mode 100644 tests/test_video_router.py delete mode 100644 tests/test_video_service.py diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index c635081..0000000 --- a/CLAUDE.md +++ /dev/null @@ -1,30 +0,0 @@ -# label_ai_service Development Guidelines - -Auto-generated from all feature plans. Last updated: 2026-04-10 - -## Active Technologies - -- Python 3.12.13(conda `label` 环境) + FastAPI ≥0.111, uvicorn[standard] ≥0.29, pydantic ≥2.7, zhipuai ≥2.1, boto3 ≥1.34, pdfplumber ≥0.11, python-docx ≥1.1, opencv-python-headless ≥4.9, numpy ≥1.26, httpx ≥0.27, python-dotenv ≥1.0, pyyaml ≥6.0 (001-ai-service-requirements) - -## Project Structure - -```text -backend/ -frontend/ -tests/ -``` - -## Commands - -cd src; pytest; ruff check . - -## Code Style - -Python 3.12.13(conda `label` 环境): Follow standard conventions - -## Recent Changes - -- 001-ai-service-requirements: Added Python 3.12.13(conda `label` 环境) + FastAPI ≥0.111, uvicorn[standard] ≥0.29, pydantic ≥2.7, zhipuai ≥2.1, boto3 ≥1.34, pdfplumber ≥0.11, python-docx ≥1.1, opencv-python-headless ≥4.9, numpy ≥1.26, httpx ≥0.27, python-dotenv ≥1.0, pyyaml ≥6.0 - - - diff --git a/docs/superpowers/plans/2026-04-10-ai-service-impl.md b/docs/superpowers/plans/2026-04-10-ai-service-impl.md deleted file mode 100644 index eaff9ee..0000000 --- a/docs/superpowers/plans/2026-04-10-ai-service-impl.md +++ /dev/null @@ -1,3004 +0,0 @@ -# AI Service Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** 实现 label_ai_service,一个 Python FastAPI 服务,为知识图谱标注平台提供文本三元组提取、图像四元组提取、视频处理、问答对生成和 GLM 微调管理能力。 - -**Architecture:** 分层架构:routers(HTTP 入口)→ services(业务逻辑)→ clients(外部适配层)。LLMClient 和 StorageClient 均为 ABC,当前分别实现 ZhipuAIClient 和 RustFSClient,通过 FastAPI Depends 注入,services 层不感知具体实现。视频任务用 FastAPI BackgroundTasks 异步执行,完成后回调 Java 后端。 - -**Tech Stack:** Python 3.12(conda `label` 环境),FastAPI,ZhipuAI SDK,boto3(S3),OpenCV,pdfplumber,python-docx,httpx,pytest - ---- - -## Task 1: 项目脚手架 - -**Files:** -- Create: `app/__init__.py` -- Create: `app/core/__init__.py` -- Create: `app/clients/__init__.py` -- Create: `app/clients/llm/__init__.py` -- Create: `app/clients/storage/__init__.py` -- Create: `app/services/__init__.py` -- Create: `app/routers/__init__.py` -- Create: `app/models/__init__.py` -- Create: `tests/__init__.py` -- Create: `tests/conftest.py` -- Create: `config.yaml` -- Create: `.env` -- Create: `requirements.txt` - -- [ ] **Step 1: 创建包目录结构** - -```bash -mkdir -p app/core app/clients/llm app/clients/storage app/services app/routers app/models tests -touch app/__init__.py app/core/__init__.py -touch app/clients/__init__.py app/clients/llm/__init__.py app/clients/storage/__init__.py -touch app/services/__init__.py app/routers/__init__.py app/models/__init__.py -touch tests/__init__.py -``` - -- [ ] **Step 2: 创建 `config.yaml`** - -```yaml -server: - port: 8000 - log_level: INFO - -storage: - buckets: - source_data: "source-data" - finetune_export: "finetune-export" - -backend: {} - -video: - frame_sample_count: 8 - max_file_size_mb: 200 - -models: - default_text: "glm-4-flash" - default_vision: "glm-4v-flash" -``` - -- [ ] **Step 3: 创建 `.env`** - -```ini -ZHIPUAI_API_KEY=your-zhipuai-api-key -STORAGE_ACCESS_KEY=minioadmin -STORAGE_SECRET_KEY=minioadmin -STORAGE_ENDPOINT=http://rustfs:9000 -BACKEND_CALLBACK_URL=http://backend:8080/internal/video-job/callback -# MAX_VIDEO_SIZE_MB=200 # 可选,覆盖 config.yaml 中的视频大小上限 -``` - -- [ ] **Step 4: 创建 `requirements.txt`** - -``` -fastapi>=0.111 -uvicorn[standard]>=0.29 -pydantic>=2.7 -python-dotenv>=1.0 -pyyaml>=6.0 -zhipuai>=2.1 -boto3>=1.34 -pdfplumber>=0.11 -python-docx>=1.1 -opencv-python-headless>=4.9 -numpy>=1.26 -httpx>=0.27 -pytest>=8.0 -pytest-asyncio>=0.23 -``` - -- [ ] **Step 5: 创建 `tests/conftest.py`** - -```python -import pytest -from unittest.mock import AsyncMock, MagicMock -from app.clients.llm.base import LLMClient -from app.clients.storage.base import StorageClient - - -@pytest.fixture -def mock_llm(): - client = MagicMock(spec=LLMClient) - client.chat = AsyncMock() - client.chat_vision = AsyncMock() - return client - - -@pytest.fixture -def mock_storage(): - client = MagicMock(spec=StorageClient) - client.download_bytes = AsyncMock() - client.upload_bytes = AsyncMock() - client.get_presigned_url = MagicMock(return_value="https://example.com/presigned/crop.jpg") - client.get_object_size = AsyncMock(return_value=10 * 1024 * 1024) # 默认 10MB,小于限制 - return client -``` - -- [ ] **Step 6: 安装依赖** - -```bash -conda run -n label pip install -r requirements.txt -``` - -Expected: 所有包安装成功,无错误 - -- [ ] **Step 7: Commit** - -```bash -git add app/ tests/ config.yaml .env requirements.txt -git commit -m "feat: project scaffold - directory structure and config files" -``` - ---- - -## Task 2: Core Config 模块 - -**Files:** -- Create: `app/core/config.py` -- Create: `tests/test_config.py` - -- [ ] **Step 1: 编写失败测试** - -`tests/test_config.py`: - -```python -import pytest -from unittest.mock import patch, mock_open -from app.core.config import get_config - -MOCK_YAML = """ -server: - port: 8000 - log_level: INFO -storage: - buckets: - source_data: "source-data" - finetune_export: "finetune-export" -backend: {} -video: - frame_sample_count: 8 -models: - default_text: "glm-4-flash" - default_vision: "glm-4v-flash" -""" - - -def _fresh_config(monkeypatch, extra_env: dict = None): - """每次测试前清除 lru_cache,设置环境变量。""" - get_config.cache_clear() - base_env = { - "ZHIPUAI_API_KEY": "test-key", - "STORAGE_ACCESS_KEY": "test-access", - "STORAGE_SECRET_KEY": "test-secret", - "STORAGE_ENDPOINT": "http://localhost:9000", - "BACKEND_CALLBACK_URL": "http://localhost:8080/callback", - } - if extra_env: - base_env.update(extra_env) - for k, v in base_env.items(): - monkeypatch.setenv(k, v) - - -def test_env_overrides_yaml(monkeypatch): - _fresh_config(monkeypatch) - with patch("builtins.open", mock_open(read_data=MOCK_YAML)): - with patch("app.core.config.load_dotenv"): - cfg = get_config() - assert cfg["zhipuai"]["api_key"] == "test-key" - assert cfg["storage"]["access_key"] == "test-access" - assert cfg["storage"]["endpoint"] == "http://localhost:9000" - assert cfg["backend"]["callback_url"] == "http://localhost:8080/callback" - get_config.cache_clear() - - -def test_yaml_values_preserved(monkeypatch): - _fresh_config(monkeypatch) - with patch("builtins.open", mock_open(read_data=MOCK_YAML)): - with patch("app.core.config.load_dotenv"): - cfg = get_config() - assert cfg["models"]["default_text"] == "glm-4-flash" - assert cfg["video"]["frame_sample_count"] == 8 - assert cfg["storage"]["buckets"]["source_data"] == "source-data" - get_config.cache_clear() - - -def test_missing_api_key_raises(monkeypatch): - get_config.cache_clear() - monkeypatch.delenv("ZHIPUAI_API_KEY", raising=False) - monkeypatch.setenv("STORAGE_ACCESS_KEY", "a") - monkeypatch.setenv("STORAGE_SECRET_KEY", "b") - with patch("builtins.open", mock_open(read_data=MOCK_YAML)): - with patch("app.core.config.load_dotenv"): - with pytest.raises(RuntimeError, match="ZHIPUAI_API_KEY"): - get_config() - get_config.cache_clear() - - -def test_missing_storage_key_raises(monkeypatch): - get_config.cache_clear() - monkeypatch.setenv("ZHIPUAI_API_KEY", "key") - monkeypatch.delenv("STORAGE_ACCESS_KEY", raising=False) - monkeypatch.setenv("STORAGE_SECRET_KEY", "b") - with patch("builtins.open", mock_open(read_data=MOCK_YAML)): - with patch("app.core.config.load_dotenv"): - with pytest.raises(RuntimeError, match="STORAGE_ACCESS_KEY"): - get_config() - get_config.cache_clear() -``` - -- [ ] **Step 2: 运行,确认失败** - -```bash -conda run -n label pytest tests/test_config.py -v -``` - -Expected: `ImportError: cannot import name 'get_config'` - -- [ ] **Step 3: 实现 `app/core/config.py`** - -```python -import os -import yaml -from functools import lru_cache -from pathlib import Path -from dotenv import load_dotenv - -_ROOT = Path(__file__).parent.parent.parent - -_ENV_OVERRIDES = { - "ZHIPUAI_API_KEY": ["zhipuai", "api_key"], - "STORAGE_ACCESS_KEY": ["storage", "access_key"], - "STORAGE_SECRET_KEY": ["storage", "secret_key"], - "STORAGE_ENDPOINT": ["storage", "endpoint"], - "BACKEND_CALLBACK_URL": ["backend", "callback_url"], - "LOG_LEVEL": ["server", "log_level"], - "MAX_VIDEO_SIZE_MB": ["video", "max_file_size_mb"], -} - - -def _set_nested(d: dict, keys: list[str], value: str) -> None: - for k in keys[:-1]: - d = d.setdefault(k, {}) - d[keys[-1]] = value - - -@lru_cache(maxsize=1) -def get_config() -> dict: - load_dotenv(_ROOT / ".env") - with open(_ROOT / "config.yaml", encoding="utf-8") as f: - cfg = yaml.safe_load(f) - for env_key, yaml_path in _ENV_OVERRIDES.items(): - val = os.environ.get(env_key) - if val: - _set_nested(cfg, yaml_path, val) - _validate(cfg) - return cfg - - -def _validate(cfg: dict) -> None: - checks = [ - (["zhipuai", "api_key"], "ZHIPUAI_API_KEY"), - (["storage", "access_key"], "STORAGE_ACCESS_KEY"), - (["storage", "secret_key"], "STORAGE_SECRET_KEY"), - ] - for path, name in checks: - val = cfg - for k in path: - val = (val or {}).get(k, "") - if not val: - raise RuntimeError(f"缺少必要配置项:{name}") -``` - -- [ ] **Step 4: 运行,确认通过** - -```bash -conda run -n label pytest tests/test_config.py -v -``` - -Expected: `4 passed` - -- [ ] **Step 5: Commit** - -```bash -git add app/core/config.py tests/test_config.py -git commit -m "feat: core config module with YAML + env layered loading" -``` - ---- - -## Task 3: Core Logging、Exceptions、JSON Utils - -**Files:** -- Create: `app/core/logging.py` -- Create: `app/core/exceptions.py` -- Create: `app/core/json_utils.py` - -- [ ] **Step 1: 实现 `app/core/logging.py`** - -```python -import json -import logging -import time -from typing import Callable - -from fastapi import Request, Response - - -class _JSONFormatter(logging.Formatter): - def format(self, record: logging.LogRecord) -> str: - entry: dict = { - "time": self.formatTime(record), - "level": record.levelname, - "logger": record.name, - "message": record.getMessage(), - } - if record.exc_info: - entry["exception"] = self.formatException(record.exc_info) - return json.dumps(entry, ensure_ascii=False) - - -def setup_logging(log_level: str = "INFO") -> None: - handler = logging.StreamHandler() - handler.setFormatter(_JSONFormatter()) - root = logging.getLogger() - root.handlers.clear() - root.addHandler(handler) - root.setLevel(getattr(logging, log_level.upper(), logging.INFO)) - - -async def request_logging_middleware(request: Request, call_next: Callable) -> Response: - start = time.monotonic() - response = await call_next(request) - duration_ms = round((time.monotonic() - start) * 1000, 2) - logging.getLogger("api").info( - f"method={request.method} path={request.url.path} " - f"status={response.status_code} duration_ms={duration_ms}" - ) - return response -``` - -- [ ] **Step 2: 实现 `app/core/exceptions.py`** - -```python -import logging -from fastapi import Request -from fastapi.responses import JSONResponse - - -class UnsupportedFileTypeError(Exception): - def __init__(self, ext: str): - super().__init__(f"不支持的文件类型:{ext}") - - -class StorageDownloadError(Exception): - pass - - -class LLMResponseParseError(Exception): - pass - - -class LLMCallError(Exception): - pass - - -async def unsupported_file_type_handler(request: Request, exc: UnsupportedFileTypeError): - return JSONResponse( - status_code=400, - content={"code": "UNSUPPORTED_FILE_TYPE", "message": str(exc)}, - ) - - -async def storage_download_handler(request: Request, exc: StorageDownloadError): - return JSONResponse( - status_code=502, - content={"code": "STORAGE_ERROR", "message": str(exc)}, - ) - - -async def llm_parse_handler(request: Request, exc: LLMResponseParseError): - return JSONResponse( - status_code=502, - content={"code": "LLM_PARSE_ERROR", "message": str(exc)}, - ) - - -async def llm_call_handler(request: Request, exc: LLMCallError): - return JSONResponse( - status_code=503, - content={"code": "LLM_CALL_ERROR", "message": str(exc)}, - ) - - -async def generic_error_handler(request: Request, exc: Exception): - logging.getLogger("error").exception("未捕获异常") - return JSONResponse( - status_code=500, - content={"code": "INTERNAL_ERROR", "message": "服务器内部错误"}, - ) -``` - -- [ ] **Step 3: 实现 `app/core/json_utils.py`** - -```python -import json -from app.core.exceptions import LLMResponseParseError - - -def parse_json_response(raw: str) -> list | dict: - """从 GLM 响应中解析 JSON,兼容 markdown 代码块包裹格式。""" - content = raw.strip() - if "```json" in content: - content = content.split("```json")[1].split("```")[0] - elif "```" in content: - content = content.split("```")[1].split("```")[0] - content = content.strip() - try: - return json.loads(content) - except json.JSONDecodeError as e: - raise LLMResponseParseError( - f"GLM 返回内容无法解析为 JSON: {raw[:200]}" - ) from e -``` - -- [ ] **Step 4: Commit** - -```bash -git add app/core/logging.py app/core/exceptions.py app/core/json_utils.py -git commit -m "feat: core logging, exceptions, json utils" -``` - ---- - -## Task 4: LLM 适配层 - -**Files:** -- Create: `app/clients/llm/base.py` -- Create: `app/clients/llm/zhipuai_client.py` -- Create: `tests/test_llm_client.py` - -- [ ] **Step 1: 编写失败测试** - -`tests/test_llm_client.py`: - -```python -import asyncio -import pytest -from unittest.mock import MagicMock, patch -from app.clients.llm.zhipuai_client import ZhipuAIClient - - -@pytest.fixture -def zhipuai_client(): - with patch("app.clients.llm.zhipuai_client.ZhipuAI") as MockZhipuAI: - mock_sdk = MagicMock() - MockZhipuAI.return_value = mock_sdk - client = ZhipuAIClient(api_key="test-key") - client._mock_sdk = mock_sdk - yield client - - -def test_chat_returns_content(zhipuai_client): - mock_resp = MagicMock() - mock_resp.choices[0].message.content = "三元组提取结果" - zhipuai_client._mock_sdk.chat.completions.create.return_value = mock_resp - - result = asyncio.run( - zhipuai_client.chat( - messages=[{"role": "user", "content": "提取三元组"}], - model="glm-4-flash", - ) - ) - assert result == "三元组提取结果" - zhipuai_client._mock_sdk.chat.completions.create.assert_called_once() - - -def test_chat_vision_calls_same_endpoint(zhipuai_client): - mock_resp = MagicMock() - mock_resp.choices[0].message.content = "图像分析结果" - zhipuai_client._mock_sdk.chat.completions.create.return_value = mock_resp - - result = asyncio.run( - zhipuai_client.chat_vision( - messages=[{"role": "user", "content": [{"type": "text", "text": "分析"}]}], - model="glm-4v-flash", - ) - ) - assert result == "图像分析结果" -``` - -- [ ] **Step 2: 运行,确认失败** - -```bash -conda run -n label pytest tests/test_llm_client.py -v -``` - -Expected: `ImportError` - -- [ ] **Step 3: 实现 `app/clients/llm/base.py`** - -```python -from abc import ABC, abstractmethod - - -class LLMClient(ABC): - @abstractmethod - async def chat(self, messages: list[dict], model: str, **kwargs) -> str: - """纯文本对话,返回模型输出文本。""" - - @abstractmethod - async def chat_vision(self, messages: list[dict], model: str, **kwargs) -> str: - """多模态对话(图文混合输入),返回模型输出文本。""" -``` - -- [ ] **Step 4: 实现 `app/clients/llm/zhipuai_client.py`** - -```python -import asyncio -from zhipuai import ZhipuAI -from app.clients.llm.base import LLMClient - - -class ZhipuAIClient(LLMClient): - def __init__(self, api_key: str): - self._client = ZhipuAI(api_key=api_key) - - async def chat(self, messages: list[dict], model: str, **kwargs) -> str: - loop = asyncio.get_event_loop() - resp = await loop.run_in_executor( - None, - lambda: self._client.chat.completions.create( - model=model, messages=messages, **kwargs - ), - ) - return resp.choices[0].message.content - - async def chat_vision(self, messages: list[dict], model: str, **kwargs) -> str: - # GLM-4V 与文本接口相同,通过 image_url type 区分图文消息 - return await self.chat(messages, model, **kwargs) -``` - -- [ ] **Step 5: 运行,确认通过** - -```bash -conda run -n label pytest tests/test_llm_client.py -v -``` - -Expected: `2 passed` - -- [ ] **Step 6: Commit** - -```bash -git add app/clients/llm/ tests/test_llm_client.py -git commit -m "feat: LLMClient ABC and ZhipuAI implementation" -``` - ---- - -## Task 5: Storage 适配层 - -**Files:** -- Create: `app/clients/storage/base.py` -- Create: `app/clients/storage/rustfs_client.py` -- Create: `tests/test_storage_client.py` - -- [ ] **Step 1: 编写失败测试** - -`tests/test_storage_client.py`: - -```python -import asyncio -import pytest -from unittest.mock import MagicMock, patch -from app.clients.storage.rustfs_client import RustFSClient - - -@pytest.fixture -def rustfs_client(): - with patch("app.clients.storage.rustfs_client.boto3") as mock_boto3: - mock_s3 = MagicMock() - mock_boto3.client.return_value = mock_s3 - client = RustFSClient( - endpoint="http://localhost:9000", - access_key="minioadmin", - secret_key="minioadmin", - ) - client._mock_s3 = mock_s3 - yield client - - -def test_download_bytes(rustfs_client): - mock_body = MagicMock() - mock_body.read.return_value = b"file content" - rustfs_client._mock_s3.get_object.return_value = {"Body": mock_body} - - result = asyncio.run( - rustfs_client.download_bytes("source-data", "text/202404/1.txt") - ) - assert result == b"file content" - rustfs_client._mock_s3.get_object.assert_called_once_with( - Bucket="source-data", Key="text/202404/1.txt" - ) - - -def test_upload_bytes(rustfs_client): - asyncio.run( - rustfs_client.upload_bytes("source-data", "crops/1/0.jpg", b"img", "image/jpeg") - ) - rustfs_client._mock_s3.put_object.assert_called_once_with( - Bucket="source-data", Key="crops/1/0.jpg", Body=b"img", ContentType="image/jpeg" - ) - - -def test_get_presigned_url(rustfs_client): - rustfs_client._mock_s3.generate_presigned_url.return_value = "https://example.com/signed" - url = rustfs_client.get_presigned_url("source-data", "crops/1/0.jpg", expires=3600) - assert url == "https://example.com/signed" - rustfs_client._mock_s3.generate_presigned_url.assert_called_once_with( - "get_object", - Params={"Bucket": "source-data", "Key": "crops/1/0.jpg"}, - ExpiresIn=3600, - ) - - -def test_get_object_size(rustfs_client): - rustfs_client._mock_s3.head_object.return_value = {"ContentLength": 1024 * 1024 * 50} - size = asyncio.run(rustfs_client.get_object_size("source-data", "video/1.mp4")) - assert size == 1024 * 1024 * 50 - rustfs_client._mock_s3.head_object.assert_called_once_with( - Bucket="source-data", Key="video/1.mp4" - ) -``` - -- [ ] **Step 2: 运行,确认失败** - -```bash -conda run -n label pytest tests/test_storage_client.py -v -``` - -Expected: `ImportError` - -- [ ] **Step 3: 实现 `app/clients/storage/base.py`** - -```python -from abc import ABC, abstractmethod - - -class StorageClient(ABC): - @abstractmethod - async def download_bytes(self, bucket: str, path: str) -> bytes: - """从对象存储下载文件,返回字节内容。""" - - @abstractmethod - async def upload_bytes( - self, - bucket: str, - path: str, - data: bytes, - content_type: str = "application/octet-stream", - ) -> None: - """上传字节内容到对象存储。""" - - @abstractmethod - def get_presigned_url(self, bucket: str, path: str, expires: int = 3600) -> str: - """生成预签名访问 URL。""" - - @abstractmethod - async def get_object_size(self, bucket: str, path: str) -> int: - """返回对象字节大小,用于在下载前进行大小校验。""" -``` - -- [ ] **Step 4: 实现 `app/clients/storage/rustfs_client.py`** - -```python -import asyncio -import boto3 -from app.clients.storage.base import StorageClient - - -class RustFSClient(StorageClient): - def __init__(self, endpoint: str, access_key: str, secret_key: str): - self._s3 = boto3.client( - "s3", - endpoint_url=endpoint, - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, - ) - - async def download_bytes(self, bucket: str, path: str) -> bytes: - loop = asyncio.get_event_loop() - resp = await loop.run_in_executor( - None, lambda: self._s3.get_object(Bucket=bucket, Key=path) - ) - return resp["Body"].read() - - async def upload_bytes( - self, - bucket: str, - path: str, - data: bytes, - content_type: str = "application/octet-stream", - ) -> None: - loop = asyncio.get_event_loop() - await loop.run_in_executor( - None, - lambda: self._s3.put_object( - Bucket=bucket, Key=path, Body=data, ContentType=content_type - ), - ) - - def get_presigned_url(self, bucket: str, path: str, expires: int = 3600) -> str: - return self._s3.generate_presigned_url( - "get_object", - Params={"Bucket": bucket, "Key": path}, - ExpiresIn=expires, - ) - - async def get_object_size(self, bucket: str, path: str) -> int: - loop = asyncio.get_event_loop() - resp = await loop.run_in_executor( - None, lambda: self._s3.head_object(Bucket=bucket, Key=path) - ) - return resp["ContentLength"] -``` - -- [ ] **Step 5: 运行,确认通过** - -```bash -conda run -n label pytest tests/test_storage_client.py -v -``` - -Expected: `4 passed` - -- [ ] **Step 6: Commit** - -```bash -git add app/clients/storage/ tests/test_storage_client.py -git commit -m "feat: StorageClient ABC and RustFS S3 implementation" -``` - ---- - -## Task 6: 依赖注入 + FastAPI 应用入口 - -**Files:** -- Create: `app/core/dependencies.py` -- Create: `app/main.py` - -- [ ] **Step 1: 实现 `app/core/dependencies.py`** - -```python -from app.clients.llm.base import LLMClient -from app.clients.storage.base import StorageClient - -_llm_client: LLMClient | None = None -_storage_client: StorageClient | None = None - - -def set_clients(llm: LLMClient, storage: StorageClient) -> None: - global _llm_client, _storage_client - _llm_client, _storage_client = llm, storage - - -def get_llm_client() -> LLMClient: - return _llm_client - - -def get_storage_client() -> StorageClient: - return _storage_client -``` - -- [ ] **Step 2: 实现 `app/main.py`** - -注意:routers 在后续任务中创建,先注释掉 include_router,待各路由实现后逐步取消注释。 - -```python -import logging -from contextlib import asynccontextmanager - -from fastapi import FastAPI - -from app.core.config import get_config -from app.core.dependencies import set_clients -from app.core.exceptions import ( - LLMCallError, - LLMResponseParseError, - StorageDownloadError, - UnsupportedFileTypeError, - generic_error_handler, - llm_call_handler, - llm_parse_handler, - storage_download_handler, - unsupported_file_type_handler, -) -from app.core.logging import request_logging_middleware, setup_logging -from app.clients.llm.zhipuai_client import ZhipuAIClient -from app.clients.storage.rustfs_client import RustFSClient - - -@asynccontextmanager -async def lifespan(app: FastAPI): - cfg = get_config() - setup_logging(cfg["server"]["log_level"]) - set_clients( - llm=ZhipuAIClient(api_key=cfg["zhipuai"]["api_key"]), - storage=RustFSClient( - endpoint=cfg["storage"]["endpoint"], - access_key=cfg["storage"]["access_key"], - secret_key=cfg["storage"]["secret_key"], - ), - ) - logging.getLogger("startup").info("AI 服务启动完成") - yield - logging.getLogger("startup").info("AI 服务关闭") - - -app = FastAPI(title="Label AI Service", version="1.0.0", lifespan=lifespan) - -app.middleware("http")(request_logging_middleware) - - -@app.get("/health", tags=["Health"]) -async def health(): - return {"status": "ok"} - -app.add_exception_handler(UnsupportedFileTypeError, unsupported_file_type_handler) -app.add_exception_handler(StorageDownloadError, storage_download_handler) -app.add_exception_handler(LLMResponseParseError, llm_parse_handler) -app.add_exception_handler(LLMCallError, llm_call_handler) -app.add_exception_handler(Exception, generic_error_handler) - -# Routers registered after each task: -# from app.routers import text, image, video, qa, finetune -# app.include_router(text.router, prefix="/api/v1") -# app.include_router(image.router, prefix="/api/v1") -# app.include_router(video.router, prefix="/api/v1") -# app.include_router(qa.router, prefix="/api/v1") -# app.include_router(finetune.router, prefix="/api/v1") -``` - -- [ ] **Step 3: 验证 /health 端点** - -```bash -conda run -n label python -c " -from fastapi.testclient import TestClient -from app.main import app -client = TestClient(app) -r = client.get('/health') -assert r.status_code == 200 and r.json() == {'status': 'ok'}, r.json() -print('health check OK') -" -``` - -Expected: `health check OK` - -- [ ] **Step 4: Commit** - -```bash -git add app/core/dependencies.py app/main.py -git commit -m "feat: DI dependencies, FastAPI app entry with lifespan and /health endpoint" -``` - ---- - -## Task 7: Text Pydantic Models - -**Files:** -- Create: `app/models/text_models.py` - -- [ ] **Step 1: 实现 `app/models/text_models.py`** - -```python -from pydantic import BaseModel - - -class SourceOffset(BaseModel): - start: int - end: int - - -class TripleItem(BaseModel): - subject: str - predicate: str - object: str - source_snippet: str - source_offset: SourceOffset - - -class TextExtractRequest(BaseModel): - file_path: str - file_name: str - model: str | None = None - prompt_template: str | None = None - - -class TextExtractResponse(BaseModel): - items: list[TripleItem] -``` - -- [ ] **Step 2: 快速验证 schema** - -```bash -conda run -n label python -c " -from app.models.text_models import TextExtractRequest, TextExtractResponse, TripleItem, SourceOffset -req = TextExtractRequest(file_path='text/1.txt', file_name='1.txt') -item = TripleItem(subject='A', predicate='B', object='C', source_snippet='ABC', source_offset=SourceOffset(start=0, end=3)) -resp = TextExtractResponse(items=[item]) -print(resp.model_dump()) -" -``` - -Expected: 打印出完整字典,无报错 - -- [ ] **Step 3: Commit** - -```bash -git add app/models/text_models.py -git commit -m "feat: text Pydantic models" -``` - ---- - -## Task 8: Text Service - -**Files:** -- Create: `app/services/text_service.py` -- Create: `tests/test_text_service.py` - -- [ ] **Step 1: 编写失败测试** - -`tests/test_text_service.py`: - -```python -import pytest -from app.services.text_service import extract_triples, _extract_text_from_bytes -from app.core.exceptions import UnsupportedFileTypeError, LLMResponseParseError, StorageDownloadError - -TRIPLE_JSON = '[{"subject":"变压器","predicate":"额定电压","object":"110kV","source_snippet":"额定电压为110kV","source_offset":{"start":0,"end":10}}]' - - -@pytest.mark.asyncio -async def test_extract_triples_txt(mock_llm, mock_storage): - mock_storage.download_bytes.return_value = b"变压器额定电压为110kV" - mock_llm.chat.return_value = TRIPLE_JSON - - result = await extract_triples( - file_path="text/1.txt", - file_name="test.txt", - model="glm-4-flash", - prompt_template="提取三元组:", - llm=mock_llm, - storage=mock_storage, - ) - assert len(result) == 1 - assert result[0].subject == "变压器" - assert result[0].predicate == "额定电压" - assert result[0].object == "110kV" - assert result[0].source_offset.start == 0 - - -@pytest.mark.asyncio -async def test_extract_triples_markdown_wrapped_json(mock_llm, mock_storage): - mock_storage.download_bytes.return_value = b"some text" - mock_llm.chat.return_value = f"```json\n{TRIPLE_JSON}\n```" - - result = await extract_triples( - file_path="text/1.txt", - file_name="test.txt", - model="glm-4-flash", - prompt_template="", - llm=mock_llm, - storage=mock_storage, - ) - assert len(result) == 1 - - -@pytest.mark.asyncio -async def test_extract_triples_storage_error(mock_llm, mock_storage): - mock_storage.download_bytes.side_effect = Exception("connection refused") - - with pytest.raises(StorageDownloadError): - await extract_triples( - file_path="text/1.txt", - file_name="test.txt", - model="glm-4-flash", - prompt_template="", - llm=mock_llm, - storage=mock_storage, - ) - - -@pytest.mark.asyncio -async def test_extract_triples_llm_parse_error(mock_llm, mock_storage): - mock_storage.download_bytes.return_value = b"some text" - mock_llm.chat.return_value = "这不是JSON" - - with pytest.raises(LLMResponseParseError): - await extract_triples( - file_path="text/1.txt", - file_name="test.txt", - model="glm-4-flash", - prompt_template="", - llm=mock_llm, - storage=mock_storage, - ) - - -def test_unsupported_file_type_raises(): - with pytest.raises(UnsupportedFileTypeError): - _extract_text_from_bytes(b"content", "doc.xlsx") - - -def test_parse_txt_bytes(): - result = _extract_text_from_bytes("你好世界".encode("utf-8"), "file.txt") - assert result == "你好世界" -``` - -- [ ] **Step 2: 运行,确认失败** - -```bash -conda run -n label pytest tests/test_text_service.py -v -``` - -Expected: `ImportError` - -- [ ] **Step 3: 实现 `app/services/text_service.py`** - -```python -import logging -from pathlib import Path - -from app.clients.llm.base import LLMClient -from app.clients.storage.base import StorageClient -from app.core.exceptions import LLMCallError, LLMResponseParseError, StorageDownloadError, UnsupportedFileTypeError -from app.core.json_utils import parse_json_response -from app.models.text_models import SourceOffset, TripleItem - -logger = logging.getLogger(__name__) - -DEFAULT_PROMPT = """请从以下文本中提取知识三元组。 -对每个三元组提供: -- subject:主语实体 -- predicate:谓语关系 -- object:宾语实体 -- source_snippet:原文中的证据片段(直接引用原文) -- source_offset:证据片段字符偏移 {"start": N, "end": M} - -以 JSON 数组格式返回,例如: -[{"subject":"...","predicate":"...","object":"...","source_snippet":"...","source_offset":{"start":0,"end":50}}] - -文本内容: -""" - - -def _parse_txt(data: bytes) -> str: - return data.decode("utf-8") - - -def _parse_pdf(data: bytes) -> str: - import io - import pdfplumber - with pdfplumber.open(io.BytesIO(data)) as pdf: - return "\n".join(page.extract_text() or "" for page in pdf.pages) - - -def _parse_docx(data: bytes) -> str: - import io - import docx - doc = docx.Document(io.BytesIO(data)) - return "\n".join(p.text for p in doc.paragraphs if p.text.strip()) - - -_PARSERS = { - ".txt": _parse_txt, - ".pdf": _parse_pdf, - ".docx": _parse_docx, -} - - -def _extract_text_from_bytes(data: bytes, filename: str) -> str: - ext = Path(filename).suffix.lower() - parser = _PARSERS.get(ext) - if parser is None: - raise UnsupportedFileTypeError(ext) - return parser(data) - - -async def extract_triples( - file_path: str, - file_name: str, - model: str, - prompt_template: str, - llm: LLMClient, - storage: StorageClient, - bucket: str = "source-data", -) -> list[TripleItem]: - try: - data = await storage.download_bytes(bucket, file_path) - except Exception as e: - raise StorageDownloadError(f"下载文件失败 {file_path}: {e}") from e - - text = _extract_text_from_bytes(data, file_name) - prompt = prompt_template or DEFAULT_PROMPT - - messages = [ - {"role": "system", "content": "你是专业的知识图谱构建助手,擅长从文本中提取结构化知识三元组。"}, - {"role": "user", "content": prompt + text}, - ] - - try: - raw = await llm.chat(messages, model) - except Exception as e: - raise LLMCallError(f"GLM 调用失败: {e}") from e - - logger.info(f"text_extract file={file_path} model={model}") - - items_raw = parse_json_response(raw) - - result = [] - for item in items_raw: - try: - offset = item.get("source_offset", {}) - result.append(TripleItem( - subject=item["subject"], - predicate=item["predicate"], - object=item["object"], - source_snippet=item.get("source_snippet", ""), - source_offset=SourceOffset( - start=offset.get("start", 0), - end=offset.get("end", 0), - ), - )) - except (KeyError, TypeError) as e: - logger.warning(f"跳过不完整三元组: {item}, error: {e}") - - return result -``` - -- [ ] **Step 4: 运行,确认通过** - -```bash -conda run -n label pytest tests/test_text_service.py -v -``` - -Expected: `6 passed` - -- [ ] **Step 5: Commit** - -```bash -git add app/services/text_service.py tests/test_text_service.py -git commit -m "feat: text service with txt/pdf/docx parsing and triple extraction" -``` - ---- - -## Task 9: Text Router - -**Files:** -- Create: `app/routers/text.py` -- Create: `tests/test_text_router.py` - -- [ ] **Step 1: 编写失败测试** - -`tests/test_text_router.py`: - -```python -import pytest -from fastapi.testclient import TestClient -from unittest.mock import AsyncMock, patch -from app.main import app -from app.core.dependencies import set_clients -from app.models.text_models import TripleItem, SourceOffset - - -@pytest.fixture -def client(mock_llm, mock_storage): - set_clients(mock_llm, mock_storage) - return TestClient(app) - - -def test_text_extract_success(client, mock_llm, mock_storage): - mock_storage.download_bytes = AsyncMock(return_value=b"变压器额定电压110kV") - mock_llm.chat = AsyncMock(return_value='[{"subject":"变压器","predicate":"额定电压","object":"110kV","source_snippet":"额定电压110kV","source_offset":{"start":3,"end":10}}]') - - resp = client.post("/api/v1/text/extract", json={ - "file_path": "text/202404/1.txt", - "file_name": "规范.txt", - }) - assert resp.status_code == 200 - data = resp.json() - assert len(data["items"]) == 1 - assert data["items"][0]["subject"] == "变压器" - - -def test_text_extract_unsupported_file(client, mock_llm, mock_storage): - mock_storage.download_bytes = AsyncMock(return_value=b"content") - resp = client.post("/api/v1/text/extract", json={ - "file_path": "text/202404/1.xlsx", - "file_name": "file.xlsx", - }) - assert resp.status_code == 400 - assert resp.json()["code"] == "UNSUPPORTED_FILE_TYPE" -``` - -- [ ] **Step 2: 实现 `app/routers/text.py`** - -```python -from fastapi import APIRouter, Depends - -from app.clients.llm.base import LLMClient -from app.clients.storage.base import StorageClient -from app.core.config import get_config -from app.core.dependencies import get_llm_client, get_storage_client -from app.models.text_models import TextExtractRequest, TextExtractResponse -from app.services import text_service - -router = APIRouter(tags=["Text"]) - - -@router.post("/text/extract", response_model=TextExtractResponse) -async def extract_text( - req: TextExtractRequest, - llm: LLMClient = Depends(get_llm_client), - storage: StorageClient = Depends(get_storage_client), -): - cfg = get_config() - model = req.model or cfg["models"]["default_text"] - prompt = req.prompt_template or text_service.DEFAULT_PROMPT - - items = await text_service.extract_triples( - file_path=req.file_path, - file_name=req.file_name, - model=model, - prompt_template=prompt, - llm=llm, - storage=storage, - bucket=cfg["storage"]["buckets"]["source_data"], - ) - return TextExtractResponse(items=items) -``` - -- [ ] **Step 3: 在 `app/main.py` 注册路由** - -取消注释以下两行: - -```python -from app.routers import text -app.include_router(text.router, prefix="/api/v1") -``` - -- [ ] **Step 4: 运行测试** - -```bash -conda run -n label pytest tests/test_text_router.py -v -``` - -Expected: `2 passed` - -- [ ] **Step 5: Commit** - -```bash -git add app/routers/text.py tests/test_text_router.py app/main.py -git commit -m "feat: text router POST /api/v1/text/extract" -``` - ---- - -## Task 10: Image Models + Service - -**Files:** -- Create: `app/models/image_models.py` -- Create: `app/services/image_service.py` -- Create: `tests/test_image_service.py` - -- [ ] **Step 1: 实现 `app/models/image_models.py`** - -```python -from pydantic import BaseModel - - -class BBox(BaseModel): - x: int - y: int - w: int - h: int - - -class QuadrupleItem(BaseModel): - subject: str - predicate: str - object: str - qualifier: str - bbox: BBox - cropped_image_path: str - - -class ImageExtractRequest(BaseModel): - file_path: str - task_id: int - model: str | None = None - prompt_template: str | None = None - - -class ImageExtractResponse(BaseModel): - items: list[QuadrupleItem] -``` - -- [ ] **Step 2: 编写失败测试** - -`tests/test_image_service.py`: - -```python -import pytest -import numpy as np -import cv2 -from app.services.image_service import extract_quadruples, _crop_image -from app.models.image_models import BBox -from app.core.exceptions import LLMResponseParseError, StorageDownloadError - -QUAD_JSON = '[{"subject":"电缆接头","predicate":"位于","object":"配电箱左侧","qualifier":"2024年","bbox":{"x":10,"y":20,"w":50,"h":40}}]' - - -def _make_test_image_bytes(width=200, height=200) -> bytes: - img = np.zeros((height, width, 3), dtype=np.uint8) - img[:] = (100, 150, 200) - _, buf = cv2.imencode(".jpg", img) - return buf.tobytes() - - -def test_crop_image(): - img_bytes = _make_test_image_bytes(200, 200) - bbox = BBox(x=10, y=20, w=50, h=40) - result = _crop_image(img_bytes, bbox) - assert isinstance(result, bytes) - arr = np.frombuffer(result, dtype=np.uint8) - img = cv2.imdecode(arr, cv2.IMREAD_COLOR) - assert img.shape[0] == 40 # height - assert img.shape[1] == 50 # width - - -@pytest.mark.asyncio -async def test_extract_quadruples_success(mock_llm, mock_storage): - mock_storage.download_bytes.return_value = _make_test_image_bytes() - mock_llm.chat_vision.return_value = QUAD_JSON - mock_storage.upload_bytes.return_value = None - - result = await extract_quadruples( - file_path="image/202404/1.jpg", - task_id=789, - model="glm-4v-flash", - prompt_template="提取四元组", - llm=mock_llm, - storage=mock_storage, - ) - assert len(result) == 1 - assert result[0].subject == "电缆接头" - assert result[0].cropped_image_path == "crops/789/0.jpg" - mock_storage.upload_bytes.assert_called_once() - - -@pytest.mark.asyncio -async def test_extract_quadruples_storage_error(mock_llm, mock_storage): - mock_storage.download_bytes.side_effect = Exception("timeout") - with pytest.raises(StorageDownloadError): - await extract_quadruples( - file_path="image/1.jpg", - task_id=1, - model="glm-4v-flash", - prompt_template="", - llm=mock_llm, - storage=mock_storage, - ) - - -@pytest.mark.asyncio -async def test_extract_quadruples_parse_error(mock_llm, mock_storage): - mock_storage.download_bytes.return_value = _make_test_image_bytes() - mock_llm.chat_vision.return_value = "不是JSON" - with pytest.raises(LLMResponseParseError): - await extract_quadruples( - file_path="image/1.jpg", - task_id=1, - model="glm-4v-flash", - prompt_template="", - llm=mock_llm, - storage=mock_storage, - ) -``` - -- [ ] **Step 3: 运行,确认失败** - -```bash -conda run -n label pytest tests/test_image_service.py -v -``` - -Expected: `ImportError` - -- [ ] **Step 4: 实现 `app/services/image_service.py`** - -```python -import base64 -import logging -from pathlib import Path - -import cv2 -import numpy as np - -from app.clients.llm.base import LLMClient -from app.clients.storage.base import StorageClient -from app.core.exceptions import LLMCallError, LLMResponseParseError, StorageDownloadError -from app.core.json_utils import parse_json_response -from app.models.image_models import BBox, QuadrupleItem - -logger = logging.getLogger(__name__) - -DEFAULT_PROMPT = """请分析这张图片,提取知识四元组。 -对每个四元组提供: -- subject:主体实体 -- predicate:关系/属性 -- object:客体实体 -- qualifier:修饰信息(时间、条件、场景,无则填空字符串) -- bbox:边界框 {"x": N, "y": N, "w": N, "h": N}(像素坐标,相对原图) - -以 JSON 数组格式返回: -[{"subject":"...","predicate":"...","object":"...","qualifier":"...","bbox":{"x":0,"y":0,"w":100,"h":100}}] -""" - - -def _crop_image(image_bytes: bytes, bbox: BBox) -> bytes: - arr = np.frombuffer(image_bytes, dtype=np.uint8) - img = cv2.imdecode(arr, cv2.IMREAD_COLOR) - h, w = img.shape[:2] - x = max(0, bbox.x) - y = max(0, bbox.y) - x2 = min(w, bbox.x + bbox.w) - y2 = min(h, bbox.y + bbox.h) - cropped = img[y:y2, x:x2] - _, buf = cv2.imencode(".jpg", cropped, [cv2.IMWRITE_JPEG_QUALITY, 90]) - return buf.tobytes() - - -async def extract_quadruples( - file_path: str, - task_id: int, - model: str, - prompt_template: str, - llm: LLMClient, - storage: StorageClient, - source_bucket: str = "source-data", -) -> list[QuadrupleItem]: - try: - data = await storage.download_bytes(source_bucket, file_path) - except Exception as e: - raise StorageDownloadError(f"下载图片失败 {file_path}: {e}") from e - - ext = Path(file_path).suffix.lstrip(".") or "jpeg" - b64 = base64.b64encode(data).decode() - - messages = [ - {"role": "system", "content": "你是专业的视觉分析助手,擅长从图像中提取结构化知识四元组。"}, - {"role": "user", "content": [ - {"type": "image_url", "image_url": {"url": f"data:image/{ext};base64,{b64}"}}, - {"type": "text", "text": prompt_template or DEFAULT_PROMPT}, - ]}, - ] - - try: - raw = await llm.chat_vision(messages, model) - except Exception as e: - raise LLMCallError(f"GLM-4V 调用失败: {e}") from e - - logger.info(f"image_extract file={file_path} model={model}") - items_raw = parse_json_response(raw) - - result = [] - for i, item in enumerate(items_raw): - try: - bbox = BBox(**item["bbox"]) - cropped = _crop_image(data, bbox) - crop_path = f"crops/{task_id}/{i}.jpg" - await storage.upload_bytes(source_bucket, crop_path, cropped, "image/jpeg") - result.append(QuadrupleItem( - subject=item["subject"], - predicate=item["predicate"], - object=item["object"], - qualifier=item.get("qualifier", ""), - bbox=bbox, - cropped_image_path=crop_path, - )) - except Exception as e: - logger.warning(f"跳过不完整四元组 index={i}: {e}") - - return result -``` - -- [ ] **Step 5: 运行,确认通过** - -```bash -conda run -n label pytest tests/test_image_service.py -v -``` - -Expected: `4 passed` - -- [ ] **Step 6: Commit** - -```bash -git add app/models/image_models.py app/services/image_service.py tests/test_image_service.py -git commit -m "feat: image models, service with bbox crop and quadruple extraction" -``` - ---- - -## Task 11: Image Router - -**Files:** -- Create: `app/routers/image.py` -- Create: `tests/test_image_router.py` - -- [ ] **Step 1: 编写失败测试** - -`tests/test_image_router.py`: - -```python -import numpy as np -import cv2 -import pytest -from fastapi.testclient import TestClient -from unittest.mock import AsyncMock -from app.main import app -from app.core.dependencies import set_clients - - -def _make_image_bytes() -> bytes: - img = np.zeros((100, 100, 3), dtype=np.uint8) - _, buf = cv2.imencode(".jpg", img) - return buf.tobytes() - - -@pytest.fixture -def client(mock_llm, mock_storage): - set_clients(mock_llm, mock_storage) - return TestClient(app) - - -def test_image_extract_success(client, mock_llm, mock_storage): - mock_storage.download_bytes = AsyncMock(return_value=_make_image_bytes()) - mock_storage.upload_bytes = AsyncMock(return_value=None) - mock_llm.chat_vision = AsyncMock(return_value='[{"subject":"A","predicate":"B","object":"C","qualifier":"","bbox":{"x":0,"y":0,"w":10,"h":10}}]') - - resp = client.post("/api/v1/image/extract", json={ - "file_path": "image/202404/1.jpg", - "task_id": 42, - }) - assert resp.status_code == 200 - data = resp.json() - assert len(data["items"]) == 1 - assert data["items"][0]["cropped_image_path"] == "crops/42/0.jpg" -``` - -- [ ] **Step 2: 实现 `app/routers/image.py`** - -```python -from fastapi import APIRouter, Depends - -from app.clients.llm.base import LLMClient -from app.clients.storage.base import StorageClient -from app.core.config import get_config -from app.core.dependencies import get_llm_client, get_storage_client -from app.models.image_models import ImageExtractRequest, ImageExtractResponse -from app.services import image_service - -router = APIRouter(tags=["Image"]) - - -@router.post("/image/extract", response_model=ImageExtractResponse) -async def extract_image( - req: ImageExtractRequest, - llm: LLMClient = Depends(get_llm_client), - storage: StorageClient = Depends(get_storage_client), -): - cfg = get_config() - model = req.model or cfg["models"]["default_vision"] - prompt = req.prompt_template or image_service.DEFAULT_PROMPT - - items = await image_service.extract_quadruples( - file_path=req.file_path, - task_id=req.task_id, - model=model, - prompt_template=prompt, - llm=llm, - storage=storage, - source_bucket=cfg["storage"]["buckets"]["source_data"], - ) - return ImageExtractResponse(items=items) -``` - -- [ ] **Step 3: 在 `app/main.py` 注册路由** - -```python -from app.routers import text, image -app.include_router(image.router, prefix="/api/v1") -``` - -- [ ] **Step 4: 运行测试** - -```bash -conda run -n label pytest tests/test_image_router.py -v -``` - -Expected: `1 passed` - -- [ ] **Step 5: Commit** - -```bash -git add app/routers/image.py tests/test_image_router.py app/main.py -git commit -m "feat: image router POST /api/v1/image/extract" -``` - ---- - -## Task 12: Video Models + Service - -**Files:** -- Create: `app/models/video_models.py` -- Create: `app/services/video_service.py` -- Create: `tests/test_video_service.py` - -- [ ] **Step 1: 实现 `app/models/video_models.py`** - -```python -from pydantic import BaseModel - - -class ExtractFramesRequest(BaseModel): - file_path: str - source_id: int - job_id: int - mode: str = "interval" # interval | keyframe - frame_interval: int = 30 - - -class ExtractFramesResponse(BaseModel): - message: str - job_id: int - - -class FrameInfo(BaseModel): - frame_index: int - time_sec: float - frame_path: str - - -class VideoToTextRequest(BaseModel): - file_path: str - source_id: int - job_id: int - start_sec: float = 0.0 - end_sec: float - model: str | None = None - prompt_template: str | None = None - - -class VideoToTextResponse(BaseModel): - message: str - job_id: int - - -class VideoJobCallback(BaseModel): - job_id: int - status: str # SUCCESS | FAILED - frames: list[FrameInfo] | None = None - output_path: str | None = None - error_message: str | None = None -``` - -- [ ] **Step 2: 编写失败测试** - -`tests/test_video_service.py`: - -```python -import numpy as np -import pytest -from unittest.mock import AsyncMock, patch, MagicMock -from app.services.video_service import _is_scene_change, extract_frames_background - - -def test_is_scene_change_different_frames(): - prev = np.zeros((100, 100), dtype=np.uint8) - curr = np.full((100, 100), 200, dtype=np.uint8) - assert _is_scene_change(prev, curr, threshold=30.0) is True - - -def test_is_scene_change_similar_frames(): - prev = np.full((100, 100), 100, dtype=np.uint8) - curr = np.full((100, 100), 101, dtype=np.uint8) - assert _is_scene_change(prev, curr, threshold=30.0) is False - - -@pytest.mark.asyncio -async def test_extract_frames_background_calls_callback_on_success(mock_storage): - import cv2 - import tempfile, os - - # 创建一个有效的真实测试视频(5帧,10x10) - with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f: - tmp_path = f.name - - out = cv2.VideoWriter(tmp_path, cv2.VideoWriter_fourcc(*"mp4v"), 10, (10, 10)) - for _ in range(5): - out.write(np.zeros((10, 10, 3), dtype=np.uint8)) - out.release() - - with open(tmp_path, "rb") as f: - video_bytes = f.read() - os.unlink(tmp_path) - - mock_storage.download_bytes.return_value = video_bytes - mock_storage.upload_bytes = AsyncMock(return_value=None) - - with patch("app.services.video_service.httpx") as mock_httpx: - mock_client = AsyncMock() - mock_httpx.AsyncClient.return_value.__aenter__ = AsyncMock(return_value=mock_client) - mock_httpx.AsyncClient.return_value.__aexit__ = AsyncMock(return_value=False) - mock_client.post = AsyncMock() - - await extract_frames_background( - file_path="video/1.mp4", - source_id=10, - job_id=42, - mode="interval", - frame_interval=1, - storage=mock_storage, - callback_url="http://backend/callback", - ) - - mock_client.post.assert_called_once() - call_kwargs = mock_client.post.call_args - payload = call_kwargs.kwargs.get("json") or call_kwargs.args[1] if len(call_kwargs.args) > 1 else call_kwargs.kwargs["json"] - assert payload["job_id"] == 42 - assert payload["status"] == "SUCCESS" - - -@pytest.mark.asyncio -async def test_extract_frames_background_calls_callback_on_failure(mock_storage): - mock_storage.download_bytes.side_effect = Exception("storage error") - - with patch("app.services.video_service.httpx") as mock_httpx: - mock_client = AsyncMock() - mock_httpx.AsyncClient.return_value.__aenter__ = AsyncMock(return_value=mock_client) - mock_httpx.AsyncClient.return_value.__aexit__ = AsyncMock(return_value=False) - mock_client.post = AsyncMock() - - await extract_frames_background( - file_path="video/1.mp4", - source_id=10, - job_id=99, - mode="interval", - frame_interval=30, - storage=mock_storage, - callback_url="http://backend/callback", - ) - - mock_client.post.assert_called_once() - call_kwargs = mock_client.post.call_args - payload = call_kwargs.kwargs.get("json") or (call_kwargs.args[1] if len(call_kwargs.args) > 1 else {}) - assert payload["status"] == "FAILED" - assert payload["job_id"] == 99 -``` - -- [ ] **Step 3: 运行,确认失败** - -```bash -conda run -n label pytest tests/test_video_service.py -v -``` - -Expected: `ImportError` - -- [ ] **Step 4: 实现 `app/services/video_service.py`** - -```python -import base64 -import logging -import tempfile -import time -from pathlib import Path - -import cv2 -import httpx -import numpy as np - -from app.clients.llm.base import LLMClient -from app.clients.storage.base import StorageClient -from app.core.exceptions import LLMCallError -from app.models.video_models import FrameInfo, VideoJobCallback - -logger = logging.getLogger(__name__) - -DEFAULT_VIDEO_TO_TEXT_PROMPT = """请分析这段视频的帧序列,用中文详细描述: -1. 视频中出现的主要对象、设备、人物 -2. 发生的主要动作、操作步骤 -3. 场景的整体情况 - -请输出结构化的文字描述,适合作为知识图谱构建的文本素材。""" - - -def _is_scene_change(prev: np.ndarray, curr: np.ndarray, threshold: float = 30.0) -> bool: - """通过帧差分均值判断是否发生场景切换。""" - diff = cv2.absdiff(prev, curr) - return float(diff.mean()) > threshold - - -def _extract_frames( - video_path: str, mode: str, frame_interval: int -) -> list[tuple[int, float, bytes]]: - cap = cv2.VideoCapture(video_path) - fps = cap.get(cv2.CAP_PROP_FPS) or 25.0 - results = [] - prev_gray = None - idx = 0 - - while True: - ret, frame = cap.read() - if not ret: - break - time_sec = idx / fps - if mode == "interval": - if idx % frame_interval == 0: - _, buf = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 90]) - results.append((idx, time_sec, buf.tobytes())) - else: - gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) - if prev_gray is None or _is_scene_change(prev_gray, gray): - _, buf = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 90]) - results.append((idx, time_sec, buf.tobytes())) - prev_gray = gray - idx += 1 - - cap.release() - return results - - -def _sample_frames_as_base64( - video_path: str, start_sec: float, end_sec: float, count: int -) -> list[str]: - cap = cv2.VideoCapture(video_path) - fps = cap.get(cv2.CAP_PROP_FPS) or 25.0 - start_frame = int(start_sec * fps) - end_frame = int(end_sec * fps) - total = max(1, end_frame - start_frame) - step = max(1, total // count) - results = [] - for i in range(count): - frame_pos = start_frame + i * step - cap.set(cv2.CAP_PROP_POS_FRAMES, frame_pos) - ret, frame = cap.read() - if ret: - _, buf = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 85]) - results.append(base64.b64encode(buf.tobytes()).decode()) - cap.release() - return results - - -async def _send_callback(url: str, payload: VideoJobCallback) -> None: - async with httpx.AsyncClient(timeout=10) as client: - try: - await client.post(url, json=payload.model_dump()) - except Exception as e: - logger.warning(f"回调失败 url={url}: {e}") - - -async def extract_frames_background( - file_path: str, - source_id: int, - job_id: int, - mode: str, - frame_interval: int, - storage: StorageClient, - callback_url: str, - bucket: str = "source-data", -) -> None: - try: - data = await storage.download_bytes(bucket, file_path) - except Exception as e: - await _send_callback(callback_url, VideoJobCallback( - job_id=job_id, status="FAILED", error_message=str(e) - )) - return - - suffix = Path(file_path).suffix or ".mp4" - with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp: - tmp.write(data) - tmp_path = tmp.name - - try: - frames = _extract_frames(tmp_path, mode, frame_interval) - frame_infos = [] - for i, (frame_idx, time_sec, frame_data) in enumerate(frames): - frame_path = f"frames/{source_id}/{i}.jpg" - await storage.upload_bytes(bucket, frame_path, frame_data, "image/jpeg") - frame_infos.append(FrameInfo( - frame_index=frame_idx, - time_sec=round(time_sec, 3), - frame_path=frame_path, - )) - await _send_callback(callback_url, VideoJobCallback( - job_id=job_id, status="SUCCESS", frames=frame_infos - )) - logger.info(f"extract_frames job_id={job_id} frames={len(frame_infos)}") - except Exception as e: - logger.exception(f"extract_frames failed job_id={job_id}") - await _send_callback(callback_url, VideoJobCallback( - job_id=job_id, status="FAILED", error_message=str(e) - )) - finally: - Path(tmp_path).unlink(missing_ok=True) - - -async def video_to_text_background( - file_path: str, - source_id: int, - job_id: int, - start_sec: float, - end_sec: float, - model: str, - prompt_template: str, - frame_sample_count: int, - llm: LLMClient, - storage: StorageClient, - callback_url: str, - bucket: str = "source-data", -) -> None: - try: - data = await storage.download_bytes(bucket, file_path) - except Exception as e: - await _send_callback(callback_url, VideoJobCallback( - job_id=job_id, status="FAILED", error_message=str(e) - )) - return - - suffix = Path(file_path).suffix or ".mp4" - with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp: - tmp.write(data) - tmp_path = tmp.name - - try: - frames_b64 = _sample_frames_as_base64(tmp_path, start_sec, end_sec, frame_sample_count) - content: list = [] - for b64 in frames_b64: - content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}}) - content.append({ - "type": "text", - "text": f"以上是视频第{start_sec}秒至{end_sec}秒的均匀采样帧。\n{prompt_template}", - }) - - messages = [ - {"role": "system", "content": "你是专业的视频内容分析助手。"}, - {"role": "user", "content": content}, - ] - - try: - description = await llm.chat_vision(messages, model) - except Exception as e: - raise LLMCallError(f"GLM-4V 调用失败: {e}") from e - - timestamp = int(time.time()) - output_path = f"video-text/{source_id}/{timestamp}.txt" - await storage.upload_bytes(bucket, output_path, description.encode("utf-8"), "text/plain") - - await _send_callback(callback_url, VideoJobCallback( - job_id=job_id, status="SUCCESS", output_path=output_path - )) - logger.info(f"video_to_text job_id={job_id} output={output_path}") - except Exception as e: - logger.exception(f"video_to_text failed job_id={job_id}") - await _send_callback(callback_url, VideoJobCallback( - job_id=job_id, status="FAILED", error_message=str(e) - )) - finally: - Path(tmp_path).unlink(missing_ok=True) -``` - -- [ ] **Step 5: 运行,确认通过** - -```bash -conda run -n label pytest tests/test_video_service.py -v -``` - -Expected: `4 passed` - -- [ ] **Step 6: Commit** - -```bash -git add app/models/video_models.py app/services/video_service.py tests/test_video_service.py -git commit -m "feat: video models and service with frame extraction and video-to-text" -``` - ---- - -## Task 13: Video Router - -**Files:** -- Create: `app/routers/video.py` -- Create: `tests/test_video_router.py` - -- [ ] **Step 1: 编写失败测试** - -`tests/test_video_router.py`: - -```python -import pytest -from fastapi.testclient import TestClient -from app.main import app -from app.core.dependencies import set_clients - - -@pytest.fixture -def client(mock_llm, mock_storage): - set_clients(mock_llm, mock_storage) - return TestClient(app) - - -def test_extract_frames_returns_202(client, mock_storage): - mock_storage.get_object_size = AsyncMock(return_value=10 * 1024 * 1024) # 10MB - resp = client.post("/api/v1/video/extract-frames", json={ - "file_path": "video/202404/1.mp4", - "source_id": 10, - "job_id": 42, - "mode": "interval", - "frame_interval": 30, - }) - assert resp.status_code == 202 - assert resp.json()["job_id"] == 42 - assert "后台处理中" in resp.json()["message"] - - -def test_video_to_text_returns_202(client, mock_storage): - mock_storage.get_object_size = AsyncMock(return_value=10 * 1024 * 1024) # 10MB - resp = client.post("/api/v1/video/to-text", json={ - "file_path": "video/202404/1.mp4", - "source_id": 10, - "job_id": 43, - "start_sec": 0, - "end_sec": 60, - }) - assert resp.status_code == 202 - assert resp.json()["job_id"] == 43 - - -def test_extract_frames_rejects_oversized_video(client, mock_storage): - mock_storage.get_object_size = AsyncMock(return_value=300 * 1024 * 1024) # 300MB > 200MB limit - resp = client.post("/api/v1/video/extract-frames", json={ - "file_path": "video/202404/big.mp4", - "source_id": 10, - "job_id": 99, - "mode": "interval", - "frame_interval": 30, - }) - assert resp.status_code == 400 - assert "大小" in resp.json()["detail"] -``` - -- [ ] **Step 2: 实现 `app/routers/video.py`** - -```python -from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException - -from app.clients.llm.base import LLMClient -from app.clients.storage.base import StorageClient -from app.core.config import get_config -from app.core.dependencies import get_llm_client, get_storage_client -from app.models.video_models import ( - ExtractFramesRequest, - ExtractFramesResponse, - VideoToTextRequest, - VideoToTextResponse, -) -from app.services import video_service - -router = APIRouter(tags=["Video"]) - - -async def _check_video_size(storage: StorageClient, bucket: str, file_path: str, max_mb: int) -> None: - """在触发后台任务前校验视频文件大小,超限时抛出 HTTP 400。""" - size_bytes = await storage.get_object_size(bucket, file_path) - if size_bytes > max_mb * 1024 * 1024: - raise HTTPException( - status_code=400, - detail=f"视频文件大小超出限制(最大 {max_mb}MB,当前 {size_bytes // 1024 // 1024}MB)", - ) - - -@router.post("/video/extract-frames", response_model=ExtractFramesResponse, status_code=202) -async def extract_frames( - req: ExtractFramesRequest, - background_tasks: BackgroundTasks, - storage: StorageClient = Depends(get_storage_client), -): - cfg = get_config() - bucket = cfg["storage"]["buckets"]["source_data"] - await _check_video_size(storage, bucket, req.file_path, cfg["video"]["max_file_size_mb"]) - background_tasks.add_task( - video_service.extract_frames_background, - file_path=req.file_path, - source_id=req.source_id, - job_id=req.job_id, - mode=req.mode, - frame_interval=req.frame_interval, - storage=storage, - callback_url=cfg["backend"]["callback_url"], - bucket=bucket, - ) - return ExtractFramesResponse(message="任务已接受,后台处理中", job_id=req.job_id) - - -@router.post("/video/to-text", response_model=VideoToTextResponse, status_code=202) -async def video_to_text( - req: VideoToTextRequest, - background_tasks: BackgroundTasks, - llm: LLMClient = Depends(get_llm_client), - storage: StorageClient = Depends(get_storage_client), -): - cfg = get_config() - bucket = cfg["storage"]["buckets"]["source_data"] - await _check_video_size(storage, bucket, req.file_path, cfg["video"]["max_file_size_mb"]) - model = req.model or cfg["models"]["default_vision"] - prompt = req.prompt_template or video_service.DEFAULT_VIDEO_TO_TEXT_PROMPT - background_tasks.add_task( - video_service.video_to_text_background, - file_path=req.file_path, - source_id=req.source_id, - job_id=req.job_id, - start_sec=req.start_sec, - end_sec=req.end_sec, - model=model, - prompt_template=prompt, - frame_sample_count=cfg["video"]["frame_sample_count"], - llm=llm, - storage=storage, - callback_url=cfg["backend"]["callback_url"], - bucket=bucket, - ) - return VideoToTextResponse(message="任务已接受,后台处理中", job_id=req.job_id) -``` - -- [ ] **Step 3: 在 `app/main.py` 注册路由** - -```python -from app.routers import text, image, video -app.include_router(video.router, prefix="/api/v1") -``` - -- [ ] **Step 4: 运行测试** - -```bash -conda run -n label pytest tests/test_video_router.py -v -``` - -Expected: `3 passed` - -- [ ] **Step 5: Commit** - -```bash -git add app/routers/video.py tests/test_video_router.py app/main.py -git commit -m "feat: video router POST /api/v1/video/extract-frames and /to-text" -``` - ---- - -## Task 14: QA Models + Service - -**Files:** -- Create: `app/models/qa_models.py` -- Create: `app/services/qa_service.py` -- Create: `tests/test_qa_service.py` - -- [ ] **Step 1: 实现 `app/models/qa_models.py`** - -```python -from pydantic import BaseModel - - -class TextTripleForQA(BaseModel): - subject: str - predicate: str - object: str - source_snippet: str - - -class TextQARequest(BaseModel): - items: list[TextTripleForQA] - model: str | None = None - prompt_template: str | None = None - - -class QAPair(BaseModel): - question: str - answer: str - - -class TextQAResponse(BaseModel): - pairs: list[QAPair] - - -class ImageQuadrupleForQA(BaseModel): - subject: str - predicate: str - object: str - qualifier: str - cropped_image_path: str - - -class ImageQARequest(BaseModel): - items: list[ImageQuadrupleForQA] - model: str | None = None - prompt_template: str | None = None - - -class ImageQAPair(BaseModel): - question: str - answer: str - image_path: str - - -class ImageQAResponse(BaseModel): - pairs: list[ImageQAPair] -``` - -- [ ] **Step 2: 编写失败测试** - -`tests/test_qa_service.py`: - -```python -import pytest -from app.services.qa_service import gen_text_qa, gen_image_qa, _parse_qa_pairs -from app.models.qa_models import TextTripleForQA, ImageQuadrupleForQA -from app.core.exceptions import LLMResponseParseError, LLMCallError - -QA_JSON = '[{"question":"变压器额定电压是多少?","answer":"110kV"}]' - - -def test_parse_qa_pairs_plain_json(): - result = _parse_qa_pairs(QA_JSON) - assert len(result) == 1 - assert result[0].question == "变压器额定电压是多少?" - - -def test_parse_qa_pairs_markdown_wrapped(): - result = _parse_qa_pairs(f"```json\n{QA_JSON}\n```") - assert len(result) == 1 - - -def test_parse_qa_pairs_invalid_raises(): - with pytest.raises(LLMResponseParseError): - _parse_qa_pairs("这不是JSON") - - -@pytest.mark.asyncio -async def test_gen_text_qa(mock_llm): - mock_llm.chat.return_value = QA_JSON - items = [TextTripleForQA(subject="变压器", predicate="额定电压", object="110kV", source_snippet="额定电压为110kV")] - - result = await gen_text_qa(items=items, model="glm-4-flash", prompt_template="", llm=mock_llm) - assert len(result) == 1 - assert result[0].answer == "110kV" - - -@pytest.mark.asyncio -async def test_gen_text_qa_llm_error(mock_llm): - mock_llm.chat.side_effect = Exception("network error") - items = [TextTripleForQA(subject="A", predicate="B", object="C", source_snippet="ABC")] - - with pytest.raises(LLMCallError): - await gen_text_qa(items=items, model="glm-4-flash", prompt_template="", llm=mock_llm) - - -@pytest.mark.asyncio -async def test_gen_image_qa(mock_llm, mock_storage): - mock_llm.chat_vision.return_value = '[{"question":"图中是什么?","answer":"电缆接头"}]' - mock_storage.download_bytes.return_value = b"fake-image-bytes" - items = [ImageQuadrupleForQA( - subject="电缆接头", predicate="位于", object="配电箱", qualifier="", cropped_image_path="crops/1/0.jpg" - )] - - result = await gen_image_qa(items=items, model="glm-4v-flash", prompt_template="", llm=mock_llm, storage=mock_storage) - assert len(result) == 1 - assert result[0].image_path == "crops/1/0.jpg" - # 验证使用 download_bytes(base64),而非 presigned URL - mock_storage.download_bytes.assert_called_once_with("source-data", "crops/1/0.jpg") - # 验证发送给 GLM-4V 的消息包含 base64 data URL - call_messages = mock_llm.chat_vision.call_args[0][0] - image_content = call_messages[1]["content"][0] - assert image_content["image_url"]["url"].startswith("data:image/jpeg;base64,") -``` - -- [ ] **Step 3: 运行,确认失败** - -```bash -conda run -n label pytest tests/test_qa_service.py -v -``` - -Expected: `ImportError` - -- [ ] **Step 4: 实现 `app/services/qa_service.py`** - -```python -import base64 -import json -import logging - -from app.clients.llm.base import LLMClient -from app.clients.storage.base import StorageClient -from app.core.exceptions import LLMCallError, LLMResponseParseError, StorageDownloadError -from app.core.json_utils import parse_json_response -from app.models.qa_models import ( - ImageQAPair, - ImageQuadrupleForQA, - QAPair, - TextTripleForQA, -) - -logger = logging.getLogger(__name__) - -DEFAULT_TEXT_QA_PROMPT = """基于以下知识三元组和原文证据片段,生成高质量问答对。 -要求: -1. 问题自然、具体,不能过于宽泛 -2. 答案基于原文片段,语言流畅 -3. 每个三元组生成1-2个问答对 - -以 JSON 数组格式返回:[{"question":"...","answer":"..."}] - -三元组数据: -""" - -DEFAULT_IMAGE_QA_PROMPT = """基于图片内容和以下四元组信息,生成高质量图文问答对。 -要求: -1. 问题需要结合图片才能回答 -2. 答案基于图片中的实际内容 -3. 每个四元组生成1个问答对 - -以 JSON 数组格式返回:[{"question":"...","answer":"..."}] - -四元组信息: -""" - - -def _parse_qa_pairs(raw: str) -> list[QAPair]: - items_raw = parse_json_response(raw) - result = [] - for item in items_raw: - try: - result.append(QAPair(question=item["question"], answer=item["answer"])) - except KeyError as e: - logger.warning(f"跳过不完整问答对: {item}, error: {e}") - return result - - -async def gen_text_qa( - items: list[TextTripleForQA], - model: str, - prompt_template: str, - llm: LLMClient, -) -> list[QAPair]: - triples_text = json.dumps([i.model_dump() for i in items], ensure_ascii=False, indent=2) - messages = [ - {"role": "system", "content": "你是专业的知识问答对生成助手。"}, - {"role": "user", "content": (prompt_template or DEFAULT_TEXT_QA_PROMPT) + triples_text}, - ] - try: - raw = await llm.chat(messages, model) - except Exception as e: - raise LLMCallError(f"GLM 调用失败: {e}") from e - logger.info(f"gen_text_qa model={model} items={len(items)}") - return _parse_qa_pairs(raw) - - -async def gen_image_qa( - items: list[ImageQuadrupleForQA], - model: str, - prompt_template: str, - llm: LLMClient, - storage: StorageClient, - bucket: str = "source-data", -) -> list[ImageQAPair]: - result = [] - prompt = prompt_template or DEFAULT_IMAGE_QA_PROMPT - for item in items: - # 下载裁剪图并 base64 编码:RustFS 为内网部署,presigned URL 无法被云端 GLM-4V 访问 - try: - image_bytes = await storage.download_bytes(bucket, item.cropped_image_path) - except Exception as e: - raise StorageDownloadError(f"下载裁剪图失败 {item.cropped_image_path}: {e}") from e - b64 = base64.b64encode(image_bytes).decode() - quad_text = json.dumps( - {k: v for k, v in item.model_dump().items() if k != "cropped_image_path"}, - ensure_ascii=False, - ) - messages = [ - {"role": "system", "content": "你是专业的视觉问答对生成助手。"}, - {"role": "user", "content": [ - {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}}, - {"type": "text", "text": prompt + quad_text}, - ]}, - ] - try: - raw = await llm.chat_vision(messages, model) - except Exception as e: - raise LLMCallError(f"GLM-4V 调用失败: {e}") from e - for pair in _parse_qa_pairs(raw): - result.append(ImageQAPair(question=pair.question, answer=pair.answer, image_path=item.cropped_image_path)) - logger.info(f"gen_image_qa model={model} items={len(items)} pairs={len(result)}") - return result -``` - -- [ ] **Step 5: 运行,确认通过** - -```bash -conda run -n label pytest tests/test_qa_service.py -v -``` - -Expected: `6 passed` - -- [ ] **Step 6: Commit** - -```bash -git add app/models/qa_models.py app/services/qa_service.py tests/test_qa_service.py -git commit -m "feat: QA models and service for text and image QA generation" -``` - ---- - -## Task 15: QA Router - -**Files:** -- Create: `app/routers/qa.py` -- Create: `tests/test_qa_router.py` - -- [ ] **Step 1: 编写失败测试** - -`tests/test_qa_router.py`: - -```python -import pytest -from fastapi.testclient import TestClient -from unittest.mock import AsyncMock -from app.main import app -from app.core.dependencies import set_clients - - -@pytest.fixture -def client(mock_llm, mock_storage): - set_clients(mock_llm, mock_storage) - return TestClient(app) - - -def test_gen_text_qa_success(client, mock_llm): - mock_llm.chat = AsyncMock(return_value='[{"question":"额定电压?","answer":"110kV"}]') - resp = client.post("/api/v1/qa/gen-text", json={ - "items": [{"subject": "变压器", "predicate": "额定电压", "object": "110kV", "source_snippet": "额定电压为110kV"}], - }) - assert resp.status_code == 200 - assert resp.json()["pairs"][0]["question"] == "额定电压?" - - -def test_gen_image_qa_success(client, mock_llm, mock_storage): - mock_llm.chat_vision = AsyncMock(return_value='[{"question":"图中是什么?","answer":"接头"}]') - mock_storage.get_presigned_url.return_value = "https://example.com/crop.jpg" - resp = client.post("/api/v1/qa/gen-image", json={ - "items": [{"subject": "A", "predicate": "B", "object": "C", "qualifier": "", "cropped_image_path": "crops/1/0.jpg"}], - }) - assert resp.status_code == 200 - data = resp.json() - assert data["pairs"][0]["image_path"] == "crops/1/0.jpg" -``` - -- [ ] **Step 2: 实现 `app/routers/qa.py`** - -```python -from fastapi import APIRouter, Depends - -from app.clients.llm.base import LLMClient -from app.clients.storage.base import StorageClient -from app.core.config import get_config -from app.core.dependencies import get_llm_client, get_storage_client -from app.models.qa_models import ImageQARequest, ImageQAResponse, TextQARequest, TextQAResponse -from app.services import qa_service - -router = APIRouter(tags=["QA"]) - - -@router.post("/qa/gen-text", response_model=TextQAResponse) -async def gen_text_qa( - req: TextQARequest, - llm: LLMClient = Depends(get_llm_client), -): - cfg = get_config() - pairs = await qa_service.gen_text_qa( - items=req.items, - model=req.model or cfg["models"]["default_text"], - prompt_template=req.prompt_template or qa_service.DEFAULT_TEXT_QA_PROMPT, - llm=llm, - ) - return TextQAResponse(pairs=pairs) - - -@router.post("/qa/gen-image", response_model=ImageQAResponse) -async def gen_image_qa( - req: ImageQARequest, - llm: LLMClient = Depends(get_llm_client), - storage: StorageClient = Depends(get_storage_client), -): - cfg = get_config() - pairs = await qa_service.gen_image_qa( - items=req.items, - model=req.model or cfg["models"]["default_vision"], - prompt_template=req.prompt_template or qa_service.DEFAULT_IMAGE_QA_PROMPT, - llm=llm, - storage=storage, - bucket=cfg["storage"]["buckets"]["source_data"], - ) - return ImageQAResponse(pairs=pairs) -``` - -- [ ] **Step 3: 在 `app/main.py` 注册路由** - -```python -from app.routers import text, image, video, qa -app.include_router(qa.router, prefix="/api/v1") -``` - -- [ ] **Step 4: 运行测试** - -```bash -conda run -n label pytest tests/test_qa_router.py -v -``` - -Expected: `2 passed` - -- [ ] **Step 5: Commit** - -```bash -git add app/routers/qa.py tests/test_qa_router.py app/main.py -git commit -m "feat: QA router POST /api/v1/qa/gen-text and /gen-image" -``` - ---- - -## Task 16: Finetune Models + Service + Router - -**Files:** -- Create: `app/models/finetune_models.py` -- Create: `app/services/finetune_service.py` -- Create: `app/routers/finetune.py` -- Create: `tests/test_finetune_service.py` -- Create: `tests/test_finetune_router.py` - -- [ ] **Step 1: 实现 `app/models/finetune_models.py`** - -```python -from pydantic import BaseModel - - -class FinetuneHyperparams(BaseModel): - learning_rate: float = 1e-4 - epochs: int = 3 - - -class FinetuneStartRequest(BaseModel): - jsonl_url: str - base_model: str - hyperparams: FinetuneHyperparams = FinetuneHyperparams() - - -class FinetuneStartResponse(BaseModel): - job_id: str - - -class FinetuneStatusResponse(BaseModel): - job_id: str - status: str # RUNNING | SUCCESS | FAILED - progress: int | None = None - error_message: str | None = None -``` - -- [ ] **Step 2: 编写失败测试** - -`tests/test_finetune_service.py`: - -```python -import pytest -from unittest.mock import MagicMock -from app.services.finetune_service import start_finetune, get_finetune_status -from app.models.finetune_models import FinetuneHyperparams - - -@pytest.mark.asyncio -async def test_start_finetune(): - mock_job = MagicMock() - mock_job.id = "glm-ft-abc123" - mock_zhipuai = MagicMock() - mock_zhipuai.fine_tuning.jobs.create.return_value = mock_job - - result = await start_finetune( - jsonl_url="https://example.com/export.jsonl", - base_model="glm-4-flash", - hyperparams=FinetuneHyperparams(learning_rate=1e-4, epochs=3), - client=mock_zhipuai, - ) - assert result == "glm-ft-abc123" - mock_zhipuai.fine_tuning.jobs.create.assert_called_once() - - -@pytest.mark.asyncio -async def test_get_finetune_status_running(): - mock_job = MagicMock() - mock_job.status = "running" - mock_job.progress = 50 - mock_job.error = None - mock_zhipuai = MagicMock() - mock_zhipuai.fine_tuning.jobs.retrieve.return_value = mock_job - - result = await get_finetune_status("glm-ft-abc123", mock_zhipuai) - assert result.status == "RUNNING" - assert result.progress == 50 - assert result.job_id == "glm-ft-abc123" - - -@pytest.mark.asyncio -async def test_get_finetune_status_success(): - mock_job = MagicMock() - mock_job.status = "succeeded" - mock_job.progress = 100 - mock_job.error = None - mock_zhipuai = MagicMock() - mock_zhipuai.fine_tuning.jobs.retrieve.return_value = mock_job - - result = await get_finetune_status("glm-ft-abc123", mock_zhipuai) - assert result.status == "SUCCESS" -``` - -- [ ] **Step 3: 运行,确认失败** - -```bash -conda run -n label pytest tests/test_finetune_service.py -v -``` - -Expected: `ImportError` - -- [ ] **Step 4: 实现 `app/services/finetune_service.py`** - -```python -import logging - -from app.models.finetune_models import FinetuneHyperparams, FinetuneStatusResponse - -logger = logging.getLogger(__name__) - -_STATUS_MAP = { - "running": "RUNNING", - "succeeded": "SUCCESS", - "failed": "FAILED", -} - - -async def start_finetune( - jsonl_url: str, - base_model: str, - hyperparams: FinetuneHyperparams, - client, # ZhipuAI SDK client instance -) -> str: - job = client.fine_tuning.jobs.create( - training_file=jsonl_url, - model=base_model, - hyperparameters={ - "learning_rate_multiplier": hyperparams.learning_rate, - "n_epochs": hyperparams.epochs, - }, - ) - logger.info(f"finetune_start job_id={job.id} model={base_model}") - return job.id - - -async def get_finetune_status(job_id: str, client) -> FinetuneStatusResponse: - job = client.fine_tuning.jobs.retrieve(job_id) - status = _STATUS_MAP.get(job.status, "RUNNING") - return FinetuneStatusResponse( - job_id=job_id, - status=status, - progress=getattr(job, "progress", None), - error_message=getattr(job, "error", None), - ) -``` - -- [ ] **Step 5: 运行,确认通过** - -```bash -conda run -n label pytest tests/test_finetune_service.py -v -``` - -Expected: `3 passed` - -- [ ] **Step 6: 实现 `app/routers/finetune.py`** - -```python -from fastapi import APIRouter, Depends - -from app.clients.llm.base import LLMClient -from app.clients.llm.zhipuai_client import ZhipuAIClient -from app.core.dependencies import get_llm_client -from app.models.finetune_models import ( - FinetuneStartRequest, - FinetuneStartResponse, - FinetuneStatusResponse, -) -from app.services import finetune_service - -router = APIRouter(tags=["Finetune"]) - - -def _get_zhipuai(llm: LLMClient = Depends(get_llm_client)) -> ZhipuAIClient: - if not isinstance(llm, ZhipuAIClient): - raise RuntimeError("微调功能仅支持 ZhipuAI 后端") - return llm - - -@router.post("/finetune/start", response_model=FinetuneStartResponse) -async def start_finetune( - req: FinetuneStartRequest, - llm: ZhipuAIClient = Depends(_get_zhipuai), -): - job_id = await finetune_service.start_finetune( - jsonl_url=req.jsonl_url, - base_model=req.base_model, - hyperparams=req.hyperparams, - client=llm._client, - ) - return FinetuneStartResponse(job_id=job_id) - - -@router.get("/finetune/status/{job_id}", response_model=FinetuneStatusResponse) -async def get_finetune_status( - job_id: str, - llm: ZhipuAIClient = Depends(_get_zhipuai), -): - return await finetune_service.get_finetune_status(job_id, llm._client) -``` - -- [ ] **Step 7: 编写路由测试** - -`tests/test_finetune_router.py`: - -```python -import pytest -from fastapi.testclient import TestClient -from unittest.mock import MagicMock, patch -from app.main import app -from app.core.dependencies import set_clients -from app.clients.llm.zhipuai_client import ZhipuAIClient -from app.clients.storage.base import StorageClient - - -@pytest.fixture -def client(mock_storage): - with patch("app.clients.llm.zhipuai_client.ZhipuAI") as MockZhipuAI: - mock_sdk = MagicMock() - MockZhipuAI.return_value = mock_sdk - llm = ZhipuAIClient(api_key="test-key") - llm._mock_sdk = mock_sdk - set_clients(llm, mock_storage) - yield TestClient(app), mock_sdk - - -def test_start_finetune(client): - test_client, mock_sdk = client - mock_job = MagicMock() - mock_job.id = "glm-ft-xyz" - mock_sdk.fine_tuning.jobs.create.return_value = mock_job - - resp = test_client.post("/api/v1/finetune/start", json={ - "jsonl_url": "https://example.com/export.jsonl", - "base_model": "glm-4-flash", - "hyperparams": {"learning_rate": 1e-4, "epochs": 3}, - }) - assert resp.status_code == 200 - assert resp.json()["job_id"] == "glm-ft-xyz" - - -def test_get_finetune_status(client): - test_client, mock_sdk = client - mock_job = MagicMock() - mock_job.status = "running" - mock_job.progress = 30 - mock_job.error = None - mock_sdk.fine_tuning.jobs.retrieve.return_value = mock_job - - resp = test_client.get("/api/v1/finetune/status/glm-ft-xyz") - assert resp.status_code == 200 - data = resp.json() - assert data["status"] == "RUNNING" - assert data["progress"] == 30 -``` - -- [ ] **Step 8: 在 `app/main.py` 注册路由(最终状态)** - -```python -from app.routers import text, image, video, qa, finetune - -app.include_router(text.router, prefix="/api/v1") -app.include_router(image.router, prefix="/api/v1") -app.include_router(video.router, prefix="/api/v1") -app.include_router(qa.router, prefix="/api/v1") -app.include_router(finetune.router, prefix="/api/v1") -``` - -- [ ] **Step 9: 运行全部测试** - -```bash -conda run -n label pytest tests/ -v -``` - -Expected: 所有测试通过,无失败 - -- [ ] **Step 10: Commit** - -```bash -git add app/models/finetune_models.py app/services/finetune_service.py app/routers/finetune.py tests/test_finetune_service.py tests/test_finetune_router.py app/main.py -git commit -m "feat: finetune models, service, and router - complete all endpoints" -``` - ---- - -## Task 17: 部署文件 - -**Files:** -- Create: `Dockerfile` -- Create: `docker-compose.yml` - -- [ ] **Step 1: 创建 `Dockerfile`** - -```dockerfile -FROM python:3.12-slim - -WORKDIR /app - -# OpenCV 系统依赖 -RUN apt-get update && apt-get install -y \ - libgl1 \ - libglib2.0-0 \ - && rm -rf /var/lib/apt/lists/* - -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt - -COPY app/ ./app/ -COPY config.yaml . -COPY .env . - -EXPOSE 8000 - -CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] -``` - -- [ ] **Step 2: 创建 `docker-compose.yml`** - -```yaml -version: "3.9" - -services: - ai-service: - build: . - ports: - - "8000:8000" - env_file: - - .env - depends_on: - - rustfs - networks: - - label-net - restart: unless-stopped - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/health"] - interval: 30s - timeout: 5s - retries: 3 - start_period: 10s - - rustfs: - image: minio/minio:latest - command: server /data --console-address ":9001" - ports: - - "9000:9000" - - "9001:9001" - environment: - MINIO_ROOT_USER: minioadmin - MINIO_ROOT_PASSWORD: minioadmin - volumes: - - rustfs-data:/data - networks: - - label-net - -volumes: - rustfs-data: - -networks: - label-net: - driver: bridge -``` - -- [ ] **Step 3: 验证 Docker 构建** - -```bash -docker build -t label-ai-service:dev . -``` - -Expected: 镜像构建成功,无错误 - -- [ ] **Step 4: 运行全量测试,最终确认** - -```bash -conda run -n label pytest tests/ -v --tb=short -``` - -Expected: 所有测试通过 - -- [ ] **Step 5: Commit** - -```bash -git add Dockerfile docker-compose.yml -git commit -m "feat: Dockerfile and docker-compose for containerized deployment" -``` - ---- - -## 自审检查结果 - -**Spec coverage:** -- ✅ 文本三元组提取(TXT/PDF/DOCX)— Task 8-9 -- ✅ 图像四元组提取 + bbox 裁剪 — Task 10-11 -- ✅ 视频帧提取(interval/keyframe)— Task 12-13 -- ✅ 视频转文本(BackgroundTask)— Task 12-13 -- ✅ 文本问答对生成 — Task 14-15 -- ✅ 图像问答对生成 — Task 14-15 -- ✅ 微调任务提交与状态查询 — Task 16 -- ✅ LLMClient / StorageClient ABC 适配层 — Task 4-5 -- ✅ config.yaml + .env 分层配置 — Task 2 -- ✅ 结构化日志 + 请求日志 — Task 3 -- ✅ 全局异常处理 — Task 3 -- ✅ Swagger 文档(FastAPI 自动生成) — Task 6 -- ✅ Dockerfile + docker-compose — Task 17 -- ✅ pytest 测试覆盖全部 service 和 router — 各 Task - -**类型一致性:** `TripleItem.source_offset` 在 Task 7 定义,Task 8 使用;`VideoJobCallback` 在 Task 12 定义,Task 12 service 使用 — 一致。 - -**占位符:** 无 TBD / TODO,所有步骤均含完整代码。 - ---- - -## GSTACK REVIEW REPORT - -| Review | Trigger | Why | Runs | Status | Findings | -|--------|---------|-----|------|--------|----------| -| Eng Review | `/plan-eng-review` | Architecture & tests (required) | 1 | ✅ DONE | 4 architecture issues found and resolved (see below) | -| CEO Review | `/plan-ceo-review` | Scope & strategy | 0 | — | — | -| Codex Review | `/codex review` | Independent 2nd opinion | 0 | — | — | -| Design Review | `/plan-design-review` | UI/UX gaps | 0 | — | N/A(纯后端服务) | - -**VERDICT:** ENG REVIEW COMPLETE - -### Eng Review 发现和处理结果 - -| # | 问题 | 决策 | 影响文件 | -|---|------|------|---------| -| 1 | `asyncio.get_event_loop()` 在 Python 3.10+ async 上下文中已弃用 | **保持不变**(用户明确选择) | Task 4, Task 5 | -| 2 | 图像 QA 使用 presigned URL,但 RustFS 内网地址云端 GLM-4V 不可达 | **改为 base64**:download_bytes → base64 encode | Task 14 qa_service, test | -| 3 | 缺少 `/health` 健康检查端点 | **已添加**:`GET /health` + Docker healthcheck | Task 6 main.py, Task 17 docker-compose | -| 4 | 视频全量下载到内存,大文件 OOM 风险 | **加文件大小限制**:`get_object_size()` 路由层校验,可通过 `MAX_VIDEO_SIZE_MB` env 配置 | Task 5 StorageClient, Task 13 router | - -### 代码质量修复 - -| 问题 | 修复 | -|------|------| -| `image_service.py` 中 `except (KeyError, TypeError, Exception)` 冗余 | 改为 `except Exception` | -| `test_qa_service.py` 断言 `get_presigned_url` 被调用 | 改为断言 `download_bytes` 调用 + 验证 base64 data URL | diff --git a/docs/superpowers/specs/2026-04-10-ai-service-design.md b/docs/superpowers/specs/2026-04-10-ai-service-design.md deleted file mode 100644 index 55b01bb..0000000 --- a/docs/superpowers/specs/2026-04-10-ai-service-design.md +++ /dev/null @@ -1,835 +0,0 @@ -# 知识图谱智能标注平台 — AI 服务设计文档 - -> 版本:v1.0 | 日期:2026-04-10 -> 运行时:Python 3.12.13(conda `label` 环境)| 框架:FastAPI -> 上游系统:label-backend(Java Spring Boot)| 模型:ZhipuAI GLM 系列 - ---- - -## 一、项目定位 - -AI 服务(`label_ai_service`)是标注平台的智能计算层,独立部署为 Python FastAPI 服务,接收 Java 后端调用,完成以下核心任务: - -| 能力 | 说明 | -|------|------| -| 文本三元组提取 | 从 TXT / PDF / DOCX 文档中提取 subject / predicate / object + 原文定位信息 | -| 图像四元组提取 | 调用 GLM-4V 分析图片,提取四元组 + bbox 坐标,自动裁剪区域图 | -| 视频帧提取 | OpenCV 按间隔或关键帧模式抽帧,帧图上传 RustFS | -| 视频转文本 | GLM-4V 理解视频片段,输出结构化文字描述,降维为文本标注流程 | -| 问答对生成 | 基于三元组/四元组 + 原文/图像证据,生成 GLM 微调格式候选问答对 | -| 微调任务管理 | 向 ZhipuAI 提交微调任务、查询状态 | - -系统只有两条标注流水线(文本线、图片线),视频是两种预处理入口,不构成第三条流水线。 - ---- - -## 二、整体架构 - -### 2.1 在平台中的位置 - -``` - ┌─────────────┐ - │ Nginx 反代 │ - └──────┬──────┘ - ┌─────────────┼─────────────┐ - ▼ ▼ ▼ - ┌─────────┐ ┌──────────┐ ┌──────────┐ - │ Vue3 前端│ │ Spring │ │ FastAPI │ - │ (静态) │ │ Boot 后端 │ │ AI 服务 │◄── 本文档范围 - └─────────┘ └────┬─────┘ └────┬─────┘ - │ │ - ┌───────────┼──────────────┤ - ▼ ▼ ▼ - ┌──────────┐ ┌────────┐ ┌────────────┐ - │PostgreSQL│ │ Redis │ │ RustFS │ - └──────────┘ └────────┘ └────────────┘ -``` - -AI 服务**不直接访问数据库**,只通过: -- **RustFS S3 API**:读取原始文件、写入处理结果 -- **ZhipuAI API**:调用 GLM 系列模型 -- **Java 后端回调接口**:视频异步任务完成后回传结果 - -### 2.2 目录结构 - -``` -label_ai_service/ -├── app/ -│ ├── main.py # FastAPI 应用入口,注册路由、lifespan -│ ├── core/ -│ │ ├── config.py # YAML + .env 分层配置,lru_cache 单例 -│ │ ├── logging.py # 统一结构化日志配置 -│ │ ├── exceptions.py # 自定义异常类 + 全局异常处理器 -│ │ └── dependencies.py # FastAPI Depends 工厂函数 -│ ├── clients/ -│ │ ├── llm/ -│ │ │ ├── base.py # LLMClient ABC(抽象接口) -│ │ │ └── zhipuai_client.py # ZhipuAI 实现 -│ │ └── storage/ -│ │ ├── base.py # StorageClient ABC(抽象接口) -│ │ └── rustfs_client.py # RustFS S3 兼容实现(boto3) -│ ├── services/ -│ │ ├── text_service.py # 文档解析 + 三元组提取 -│ │ ├── image_service.py # 图像四元组提取 + bbox 裁剪 -│ │ ├── video_service.py # OpenCV 抽帧 + 视频转文本 -│ │ ├── qa_service.py # 文本/图像问答对生成 -│ │ └── finetune_service.py # 微调任务提交与状态查询 -│ ├── routers/ -│ │ ├── text.py # POST /api/v1/text/extract -│ │ ├── image.py # POST /api/v1/image/extract -│ │ ├── video.py # POST /api/v1/video/extract-frames -│ │ │ # POST /api/v1/video/to-text -│ │ ├── qa.py # POST /api/v1/qa/gen-text -│ │ │ # POST /api/v1/qa/gen-image -│ │ └── finetune.py # POST /api/v1/finetune/start -│ │ # GET /api/v1/finetune/status/{jobId} -│ └── models/ -│ ├── text_models.py # 三元组请求/响应 schema -│ ├── image_models.py # 四元组请求/响应 schema -│ ├── video_models.py # 视频处理请求/响应 schema -│ ├── qa_models.py # 问答对请求/响应 schema -│ └── finetune_models.py # 微调请求/响应 schema -├── config.yaml # 非敏感配置(提交 git) -├── .env # 密钥与环境差异项(提交 git) -├── requirements.txt -├── Dockerfile -└── docker-compose.yml -``` - ---- - -## 三、配置设计 - -### 3.1 分层配置原则 - -| 文件 | 职责 | 提交 git | -|------|------|----------| -| `config.yaml` | 稳定配置:端口、路径规范、模型名、桶名、视频参数 | ✅ | -| `.env` | 环境差异项:密钥、服务地址 | ✅ | - -环境变量优先级高于 `config.yaml`,Docker Compose 通过 `env_file` 加载 `.env`,本地开发由 `python-dotenv` 加载。 - -### 3.2 `config.yaml` - -```yaml -server: - port: 8000 - log_level: INFO - -storage: - buckets: - source_data: "source-data" - finetune_export: "finetune-export" - -backend: {} # callback_url 由 .env 注入 - -video: - frame_sample_count: 8 # 视频转文本时均匀抽取的代表帧数 - max_file_size_mb: 200 # 视频文件大小上限(超过则拒绝,防止 OOM) - -models: - default_text: "glm-4-flash" - default_vision: "glm-4v-flash" -``` - -### 3.3 `.env` - -```ini -ZHIPUAI_API_KEY=your-zhipuai-api-key -STORAGE_ACCESS_KEY=minioadmin -STORAGE_SECRET_KEY=minioadmin -STORAGE_ENDPOINT=http://rustfs:9000 -BACKEND_CALLBACK_URL=http://backend:8080/internal/video-job/callback -# MAX_VIDEO_SIZE_MB=200 # 可选,覆盖 config.yaml 中的视频大小上限 -``` - -### 3.4 config 模块实现 - -```python -# core/config.py -import os, yaml -from functools import lru_cache -from pathlib import Path -from dotenv import load_dotenv - -_ROOT = Path(__file__).parent.parent.parent - -# 环境变量 → YAML 路径映射 -_ENV_OVERRIDES = { - "ZHIPUAI_API_KEY": ["zhipuai", "api_key"], - "STORAGE_ACCESS_KEY": ["storage", "access_key"], - "STORAGE_SECRET_KEY": ["storage", "secret_key"], - "STORAGE_ENDPOINT": ["storage", "endpoint"], - "BACKEND_CALLBACK_URL": ["backend", "callback_url"], - "LOG_LEVEL": ["server", "log_level"], - "MAX_VIDEO_SIZE_MB": ["video", "max_file_size_mb"], -} - -def _set_nested(d: dict, keys: list[str], value: str): - for k in keys[:-1]: - d = d.setdefault(k, {}) - d[keys[-1]] = value - -@lru_cache(maxsize=1) -def get_config() -> dict: - load_dotenv(_ROOT / ".env") # 1. 加载 .env - with open(_ROOT / "config.yaml", encoding="utf-8") as f: - cfg = yaml.safe_load(f) # 2. 读取 YAML - for env_key, yaml_path in _ENV_OVERRIDES.items(): # 3. 环境变量覆盖 - val = os.environ.get(env_key) - if val: - _set_nested(cfg, yaml_path, val) - _validate(cfg) - return cfg - -def _validate(cfg: dict): - checks = [ - (["zhipuai", "api_key"], "ZHIPUAI_API_KEY"), - (["storage", "access_key"], "STORAGE_ACCESS_KEY"), - (["storage", "secret_key"], "STORAGE_SECRET_KEY"), - ] - for path, name in checks: - val = cfg - for k in path: - val = (val or {}).get(k, "") - if not val: - raise RuntimeError(f"缺少必要配置项:{name}") -``` - ---- - -## 四、适配层设计 - -### 4.1 LLM 适配层 - -```python -# clients/llm/base.py -from abc import ABC, abstractmethod - -class LLMClient(ABC): - @abstractmethod - async def chat(self, messages: list[dict], model: str, **kwargs) -> str: - """纯文本对话,返回模型输出文本""" - - @abstractmethod - async def chat_vision(self, messages: list[dict], model: str, **kwargs) -> str: - """多模态对话(图文混合输入),返回模型输出文本""" -``` - -```python -# clients/llm/zhipuai_client.py -import asyncio -from zhipuai import ZhipuAI -from .base import LLMClient - -class ZhipuAIClient(LLMClient): - def __init__(self, api_key: str): - self._client = ZhipuAI(api_key=api_key) - - async def chat(self, messages: list[dict], model: str, **kwargs) -> str: - loop = asyncio.get_event_loop() - resp = await loop.run_in_executor( - None, - lambda: self._client.chat.completions.create( - model=model, messages=messages, **kwargs - ), - ) - return resp.choices[0].message.content - - async def chat_vision(self, messages: list[dict], model: str, **kwargs) -> str: - # GLM-4V 与文本接口相同,通过 image_url type 区分图文消息 - return await self.chat(messages, model, **kwargs) -``` - -**扩展**:替换 GLM 只需新增 `class OpenAIClient(LLMClient)` 并在 `lifespan` 中注入,services 层零修改。 - -### 4.2 Storage 适配层 - -```python -# clients/storage/base.py -from abc import ABC, abstractmethod - -class StorageClient(ABC): - @abstractmethod - async def download_bytes(self, bucket: str, path: str) -> bytes: ... - - @abstractmethod - async def upload_bytes( - self, bucket: str, path: str, data: bytes, - content_type: str = "application/octet-stream" - ) -> None: ... - - @abstractmethod - def get_presigned_url(self, bucket: str, path: str, expires: int = 3600) -> str: ... -``` - -```python -# clients/storage/rustfs_client.py -import asyncio -import boto3 -from .base import StorageClient - -class RustFSClient(StorageClient): - def __init__(self, endpoint: str, access_key: str, secret_key: str): - self._s3 = boto3.client( - "s3", - endpoint_url=endpoint, - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, - ) - - async def download_bytes(self, bucket: str, path: str) -> bytes: - loop = asyncio.get_event_loop() - resp = await loop.run_in_executor( - None, lambda: self._s3.get_object(Bucket=bucket, Key=path) - ) - return resp["Body"].read() - - async def upload_bytes(self, bucket, path, data, content_type="application/octet-stream"): - loop = asyncio.get_event_loop() - await loop.run_in_executor( - None, - lambda: self._s3.put_object( - Bucket=bucket, Key=path, Body=data, ContentType=content_type - ), - ) - - def get_presigned_url(self, bucket: str, path: str, expires: int = 3600) -> str: - return self._s3.generate_presigned_url( - "get_object", - Params={"Bucket": bucket, "Key": path}, - ExpiresIn=expires, - ) -``` - -### 4.3 依赖注入 - -```python -# core/dependencies.py -from app.clients.llm.base import LLMClient -from app.clients.storage.base import StorageClient - -_llm_client: LLMClient | None = None -_storage_client: StorageClient | None = None - -def set_clients(llm: LLMClient, storage: StorageClient): - global _llm_client, _storage_client - _llm_client, _storage_client = llm, storage - -def get_llm_client() -> LLMClient: - return _llm_client - -def get_storage_client() -> StorageClient: - return _storage_client -``` - -```python -# main.py(lifespan 初始化) -from contextlib import asynccontextmanager -from fastapi import FastAPI -from app.core.config import get_config -from app.core.dependencies import set_clients -from app.clients.llm.zhipuai_client import ZhipuAIClient -from app.clients.storage.rustfs_client import RustFSClient - -@asynccontextmanager -async def lifespan(app: FastAPI): - cfg = get_config() - set_clients( - llm=ZhipuAIClient(api_key=cfg["zhipuai"]["api_key"]), - storage=RustFSClient( - endpoint=cfg["storage"]["endpoint"], - access_key=cfg["storage"]["access_key"], - secret_key=cfg["storage"]["secret_key"], - ), - ) - yield - -app = FastAPI(title="Label AI Service", lifespan=lifespan) -``` - ---- - -## 五、API 接口设计 - -统一前缀:`/api/v1`。FastAPI 自动生成 Swagger 文档(`/docs`)。 - -### 5.0 健康检查 - -**`GET /health`** - -```json -// 响应(200 OK) -{"status": "ok"} -``` - -用于 Docker healthcheck、Nginx 上游探测、运维监控。无需认证,不访问外部依赖。 - -### 5.1 文本三元组提取 - -**`POST /api/v1/text/extract`** - -```json -// 请求 -{ - "file_path": "text/202404/123.txt", - "file_name": "设备规范.txt", - "model": "glm-4-flash", - "prompt_template": "..." // 可选,不传使用 config 默认 -} - -// 响应 -{ - "items": [ - { - "subject": "变压器", - "predicate": "额定电压", - "object": "110kV", - "source_snippet": "该变压器额定电压为110kV,...", - "source_offset": {"start": 120, "end": 280} - } - ] -} -``` - -### 5.2 图像四元组提取 - -**`POST /api/v1/image/extract`** - -```json -// 请求 -{ - "file_path": "image/202404/456.jpg", - "task_id": 789, - "model": "glm-4v-flash", - "prompt_template": "..." -} - -// 响应 -{ - "items": [ - { - "subject": "电缆接头", - "predicate": "位于", - "object": "配电箱左侧", - "qualifier": "2024年检修现场", - "bbox": {"x": 10, "y": 20, "w": 100, "h": 80}, - "cropped_image_path": "crops/789/0.jpg" - } - ] -} -``` - -裁剪图由 AI 服务自动完成并上传 RustFS,`cropped_image_path` 直接写入响应。 - -### 5.3 视频帧提取(异步) - -**`POST /api/v1/video/extract-frames`** - -```json -// 请求 -{ - "file_path": "video/202404/001.mp4", - "source_id": 10, - "job_id": 42, - "mode": "interval", // interval | keyframe - "frame_interval": 30 // interval 模式专用,单位:帧数 -} - -// 立即响应(202 Accepted) -{ - "message": "任务已接受,后台处理中", - "job_id": 42 -} -``` - -后台完成后,AI 服务调用 Java 后端回调接口: - -```json -POST {BACKEND_CALLBACK_URL} -{ - "job_id": 42, - "status": "SUCCESS", - "frames": [ - {"frame_index": 0, "time_sec": 0.0, "frame_path": "frames/10/0.jpg"}, - {"frame_index": 30, "time_sec": 1.0, "frame_path": "frames/10/1.jpg"} - ], - "error_message": null -} -``` - -### 5.4 视频转文本(异步) - -**`POST /api/v1/video/to-text`** - -```json -// 请求 -{ - "file_path": "video/202404/001.mp4", - "source_id": 10, - "job_id": 43, - "start_sec": 0, - "end_sec": 120, - "model": "glm-4v-flash", - "prompt_template": "..." -} - -// 立即响应(202 Accepted) -{ - "message": "任务已接受,后台处理中", - "job_id": 43 -} -``` - -后台完成后回调: - -```json -POST {BACKEND_CALLBACK_URL} -{ - "job_id": 43, - "status": "SUCCESS", - "output_path": "video-text/10/1712800000.txt", - "error_message": null -} -``` - -### 5.5 文本问答对生成 - -**`POST /api/v1/qa/gen-text`** - -```json -// 请求 -{ - "items": [ - { - "subject": "变压器", - "predicate": "额定电压", - "object": "110kV", - "source_snippet": "该变压器额定电压为110kV,..." - } - ], - "model": "glm-4-flash", - "prompt_template": "..." -} - -// 响应 -{ - "pairs": [ - { - "question": "变压器的额定电压是多少?", - "answer": "该变压器额定电压为110kV。" - } - ] -} -``` - -### 5.6 图像问答对生成 - -**`POST /api/v1/qa/gen-image`** - -```json -// 请求 -{ - "items": [ - { - "subject": "电缆接头", - "predicate": "位于", - "object": "配电箱左侧", - "qualifier": "2024年检修现场", - "cropped_image_path": "crops/789/0.jpg" - } - ], - "model": "glm-4v-flash", - "prompt_template": "..." -} - -// 响应 -{ - "pairs": [ - { - "question": "图中电缆接头位于何处?", - "answer": "图中电缆接头位于配电箱左侧。", - "image_path": "crops/789/0.jpg" - } - ] -} -``` - -图像 QA 生成时,AI 服务通过 `storage.download_bytes` 重新下载裁剪图,base64 编码后直接嵌入多模态消息,避免 RustFS 内网 presigned URL 无法被云端 GLM-4V 访问的问题。 - -### 5.7 提交微调任务 - -**`POST /api/v1/finetune/start`** - -```json -// 请求 -{ - "jsonl_url": "https://rustfs.example.com/finetune-export/export/xxx.jsonl", - "base_model": "glm-4-flash", - "hyperparams": { - "learning_rate": 1e-4, - "epochs": 3 - } -} - -// 响应 -{ - "job_id": "glm-ft-xxxxxx" -} -``` - -### 5.8 查询微调状态 - -**`GET /api/v1/finetune/status/{jobId}`** - -```json -// 响应 -{ - "job_id": "glm-ft-xxxxxx", - "status": "RUNNING", // RUNNING | SUCCESS | FAILED - "progress": 45, - "error_message": null -} -``` - ---- - -## 六、Service 层设计 - -### 6.1 text_service — 文档解析 + 三元组提取 - -``` -1. storage.download_bytes("source-data", file_path) → bytes -2. 按扩展名路由解析器: - .txt → decode("utf-8") - .pdf → pdfplumber.open() 提取全文 - .docx → python-docx 遍历段落 -3. 拼装 Prompt(系统模板 + 文档正文) -4. llm.chat(messages, model) → JSON 字符串 -5. 解析 JSON → 校验字段完整性 → 返回 TripleList -``` - -解析器注册表(消除 if-else): - -```python -PARSERS: dict[str, Callable[[bytes], str]] = { - ".txt": parse_txt, - ".pdf": parse_pdf, - ".docx": parse_docx, -} -def extract_text(data: bytes, filename: str) -> str: - ext = Path(filename).suffix.lower() - if ext not in PARSERS: - raise UnsupportedFileTypeError(ext) - return PARSERS[ext](data) -``` - -### 6.2 image_service — 四元组提取 + bbox 裁剪 - -``` -1. storage.download_bytes("source-data", file_path) → bytes -2. 图片 bytes 转 base64,构造 GLM-4V image_url 消息 -3. llm.chat_vision(messages, model) → JSON 字符串 -4. 解析四元组(含 bbox) -5. 按 bbox 裁剪: - numpy 解码 bytes → cv2 裁剪区域 → cv2.imencode(".jpg") → bytes -6. storage.upload_bytes("source-data", f"crops/{task_id}/{i}.jpg", ...) -7. 返回 QuadrupleList(含 cropped_image_path) -``` - -### 6.3 video_service — OpenCV 抽帧 + 视频转文本 - -**抽帧(BackgroundTask)**: - -``` -0. storage.get_object_size(bucket, file_path) → 字节数 - 超过 video.max_file_size_mb 限制 → 回调 FAILED(路由层提前校验,返回 400) -1. storage.download_bytes → bytes → 写入 tempfile -2. cv2.VideoCapture 打开临时文件 -3. interval 模式:按 frame_interval 步进读帧 - keyframe 模式:逐帧计算与前帧的像素差均值,差值超过阈值则判定为场景切换关键帧 - (OpenCV 无原生 I 帧检测,用帧差分近似实现) -4. 每帧 cv2.imencode(".jpg") → upload_bytes("source-data", f"frames/{source_id}/{i}.jpg") -5. 清理临时文件 -6. httpx.post(BACKEND_CALLBACK_URL, json={job_id, status="SUCCESS", frames=[...]}) -异常:回调 status="FAILED", error_message=str(e) -``` - -**视频转文本(BackgroundTask)**: - -``` -1. download_bytes → tempfile -2. cv2.VideoCapture 在 start_sec~end_sec 区间均匀抽 frame_sample_count 帧 -3. 每帧转 base64,构造多图 GLM-4V 消息(含时序说明) -4. llm.chat_vision → 文字描述 -5. 描述文本 upload_bytes("source-data", f"video-text/{source_id}/{timestamp}.txt") -6. 回调 Java 后端:output_path + status="SUCCESS" -``` - -### 6.4 qa_service — 问答对生成 - -``` -文本 QA: - 批量拼入三元组 + source_snippet 到 Prompt - llm.chat(messages, model) → 解析问答对 JSON → QAPairList - -图像 QA: - 遍历四元组列表 - storage.download_bytes(bucket, cropped_image_path) → bytes → base64 编码 - 构造多模态消息(data:image/jpeg;base64,... + 问题指令) - llm.chat_vision → 解析 → 含 image_path 的 QAPairList - (注:不使用 presigned URL,因 RustFS 为内网部署,云端 GLM-4V 无法访问内网地址) -``` - -### 6.5 finetune_service — GLM 微调对接 - -微调 API 属 ZhipuAI 专有能力,无需抽象为通用接口。`finetune_service` 直接依赖 `ZhipuAIClient`(通过依赖注入获取后强转类型),不走 `LLMClient` ABC。 - -``` -提交: - zhipuai_client._client.fine_tuning.jobs.create( - training_file=jsonl_url, - model=base_model, - hyperparameters=hyperparams - ) → job_id - -查询: - zhipuai_client._client.fine_tuning.jobs.retrieve(job_id) - → 映射 status 枚举 RUNNING / SUCCESS / FAILED -``` - ---- - -## 七、日志设计 - -- 使用标准库 `logging`,JSON 格式输出,与 uvicorn 集成 -- 每个请求记录:`method / path / status_code / duration_ms` -- 每次 GLM 调用记录:`model / prompt_tokens / completion_tokens / duration_ms` -- BackgroundTask 记录:`job_id / stage / status / error` -- **不记录文件内容原文**(防止敏感数据泄露) - ---- - -## 八、异常处理 - -| 异常类 | HTTP 状态码 | 场景 | -|--------|------------|------| -| `UnsupportedFileTypeError` | 400 | 文件格式不支持 | -| `StorageDownloadError` | 502 | RustFS 不可达或文件不存在 | -| `LLMResponseParseError` | 502 | GLM 返回非合法 JSON | -| `LLMCallError` | 503 | GLM API 限流 / 超时 | -| 未捕获异常 | 500 | 记录完整 traceback | - -所有错误响应统一格式: - -```json -{"code": "ERROR_CODE", "message": "具体描述"} -``` - ---- - -## 九、RustFS 存储路径规范 - -| 资源类型 | 存储桶 | 路径格式 | -|----------|--------|----------| -| 上传文本文件 | `source-data` | `text/{年月}/{source_id}.txt` | -| 上传图片 | `source-data` | `image/{年月}/{source_id}.jpg` | -| 上传视频 | `source-data` | `video/{年月}/{source_id}.mp4` | -| 视频帧模式抽取的帧图 | `source-data` | `frames/{source_id}/{frame_index}.jpg` | -| 视频片段转译输出的文本 | `source-data` | `video-text/{source_id}/{timestamp}.txt` | -| 图像/帧 bbox 裁剪图 | `source-data` | `crops/{task_id}/{item_index}.jpg` | -| 导出 JSONL 文件 | `finetune-export` | `export/{batchUuid}.jsonl` | - ---- - -## 十、部署设计 - -### 10.1 Dockerfile - -```dockerfile -FROM python:3.12-slim - -WORKDIR /app - -# OpenCV 系统依赖 -RUN apt-get update && apt-get install -y \ - libgl1 libglib2.0-0 \ - && rm -rf /var/lib/apt/lists/* - -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt - -COPY app/ ./app/ -COPY config.yaml . -COPY .env . - -EXPOSE 8000 -CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] -``` - -### 10.2 docker-compose.yml(ai-service 片段) - -```yaml -ai-service: - build: ./label_ai_service - ports: - - "8000:8000" - env_file: - - ./label_ai_service/.env - depends_on: - - rustfs - - backend - networks: - - label-net - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/health"] - interval: 30s - timeout: 5s - retries: 3 - start_period: 10s -``` - -### 10.3 requirements.txt - -``` -fastapi>=0.111 -uvicorn[standard]>=0.29 -pydantic>=2.7 -python-dotenv>=1.0 -pyyaml>=6.0 -zhipuai>=2.1 -boto3>=1.34 -pdfplumber>=0.11 -python-docx>=1.1 -opencv-python-headless>=4.9 -numpy>=1.26 -httpx>=0.27 -``` - ---- - -## 十一、关键设计决策 - -### 11.1 为何 LLMClient / StorageClient 使用 ABC - -当前只实现 ZhipuAI 和 RustFS,但模型选型和对象存储可能随项目演进变化。ABC 约束接口契约,保证替换实现时 services 层零修改。注入点集中在 `lifespan`,一处修改全局生效。 - -### 11.2 为何 ZhipuAI 同步 SDK 在线程池中调用 - -ZhipuAI 官方 SDK 是同步阻塞调用,直接 `await` 不生效。通过 `loop.run_in_executor(None, ...)` 在线程池中运行,不阻塞 FastAPI 的 asyncio 事件循环,保持并发处理能力。 - -### 11.3 为何视频任务使用 BackgroundTasks 而非 Celery - -项目规模适中,视频处理任务由 ADMIN 手动触发,并发量可控。FastAPI `BackgroundTasks` 无需额外中间件(Redis 队列、Celery Worker),部署简单,任务状态通过回调接口传递给 Java 后端管理,符合整体架构风格。 - -### 11.4 为何图像 QA 生成用 base64 而非 presigned URL - -RustFS 部署在 Docker 内网(`http://rustfs:9000`),presigned URL 指向内网地址,云端 GLM-4V API 无法访问,会导致所有图像 QA 请求失败。因此将裁剪图重新下载为 bytes,base64 编码后直接嵌入多模态消息体,与 `image_service` 处理原图的方式保持一致,无需 RustFS 有公网地址。 - -### 11.5 config.yaml + .env 分层配置的原因 - -`config.yaml` 存结构化、稳定的非敏感配置,可读性好,适合 git 追踪变更历史;`.env` 存密钥和环境差异项,格式简单,Docker `env_file` 原生支持,本地开发和容器启动行为一致,无需维护两套配置文件。 - ---- - -*文档版本:v1.0 | 生成日期:2026-04-10* diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 78c5011..0000000 --- a/pytest.ini +++ /dev/null @@ -1,3 +0,0 @@ -[pytest] -asyncio_mode = auto -testpaths = tests diff --git a/specs/001-ai-service-requirements/checklists/requirements.md b/specs/001-ai-service-requirements/checklists/requirements.md deleted file mode 100644 index bc0c9a6..0000000 --- a/specs/001-ai-service-requirements/checklists/requirements.md +++ /dev/null @@ -1,38 +0,0 @@ -# Specification Quality Checklist: AI 服务需求文档 - -**Purpose**: Validate specification completeness and quality before proceeding to planning -**Created**: 2026-04-10 -**Feature**: [../spec.md](../spec.md) - -## Content Quality - -- [x] No implementation details (languages, frameworks, APIs) — 注:Technical Environment 节单独列出,明确标注为已确认技术约束,不影响需求层表述 -- [x] Focused on user value and business needs -- [x] Written for non-technical stakeholders(业务场景均以 ADMIN/标注员/系统为视角描述) -- [x] All mandatory sections completed - -## Requirement Completeness - -- [x] No [NEEDS CLARIFICATION] markers remain -- [x] Requirements are testable and unambiguous(每条 FR 含明确行为和可验证条件) -- [x] Success criteria are measurable(SC 含具体时间、像素精度等量化指标) -- [x] Success criteria are technology-agnostic (no implementation details) -- [x] All acceptance scenarios are defined(8 个 User Story 均含 Acceptance Scenarios) -- [x] Edge cases are identified(6 条边界情况,覆盖文件损坏、空结果、并发等) -- [x] Scope is clearly bounded(明确:不处理上传逻辑,不管理训练资源,不对外暴露) -- [x] Dependencies and assumptions identified(9 条假设,含内外网访问、ZhipuAI 托管等) - -## Feature Readiness - -- [x] All functional requirements have clear acceptance criteria(FR-001~FR-018 逐一可追溯到 User Story 验收场景) -- [x] User scenarios cover primary flows(P1: 文本/图像提取;P2: 视频/QA;P3: 微调/健康检查) -- [x] Feature meets measurable outcomes defined in Success Criteria -- [x] No implementation details leak into specification(Technical Environment 节独立,不混入 FR/SC) - -## Notes - -- Technical Environment 节超出传统需求文档范围,但用户明确要求包含环境约束(Python 3.12.13、FastAPI、conda label 环境),已单独成节并说明其性质。 -- SC-009(测试覆盖)为工程质量指标,非用户感知需求,但对服务可靠性有实质影响,保留。 -- 所有 [NEEDS CLARIFICATION] 均已通过合理默认值或设计文档确认,无待用户回答的开放问题。 - -**VERDICT**: ✅ 规格就绪,可进行 `/speckit.clarify` 或 `/speckit.plan` diff --git a/specs/001-ai-service-requirements/contracts/api.md b/specs/001-ai-service-requirements/contracts/api.md deleted file mode 100644 index 3a93151..0000000 --- a/specs/001-ai-service-requirements/contracts/api.md +++ /dev/null @@ -1,333 +0,0 @@ -# API Contract: AI 服务接口定义 - -**Branch**: `001-ai-service-requirements` | **Date**: 2026-04-10 -**Base URL**: `http://ai-service:8000` -**API Prefix**: `/api/v1` -**Swagger**: `/docs`(FastAPI 自动生成) - ---- - -## 通用约定 - -### 请求格式 -- 所有请求体:`Content-Type: application/json` -- 无认证机制(内网服务,仅 Java 后端调用) - -### 响应格式 -- 成功:HTTP 2xx,JSON 响应体 -- 错误:HTTP 4xx/5xx,统一错误格式: - ```json - {"code": "ERROR_CODE", "message": "具体描述"} - ``` - -### 错误码 - -| HTTP 状态码 | code | 触发条件 | -|------------|------|---------| -| 400 | UNSUPPORTED_FILE_TYPE | 文件格式不支持(如 .xlsx) | -| 400 | VIDEO_TOO_LARGE | 视频文件超过大小上限 | -| 502 | STORAGE_ERROR | RustFS 不可达或文件不存在 | -| 502 | LLM_PARSE_ERROR | GLM 返回非合法 JSON | -| 503 | LLM_CALL_ERROR | GLM API 限流 / 超时 | -| 500 | INTERNAL_ERROR | 未捕获异常 | - ---- - -## 端点一览 - -| 端点 | 方法 | 功能 | 响应码 | -|------|------|------|--------| -| `/health` | GET | 健康检查 | 200 | -| `/api/v1/text/extract` | POST | 文档三元组提取 | 200 | -| `/api/v1/image/extract` | POST | 图像四元组提取 | 200 | -| `/api/v1/video/extract-frames` | POST | 视频帧提取(异步) | 202 | -| `/api/v1/video/to-text` | POST | 视频转文本(异步) | 202 | -| `/api/v1/qa/gen-text` | POST | 文本问答对生成 | 200 | -| `/api/v1/qa/gen-image` | POST | 图像问答对生成 | 200 | -| `/api/v1/finetune/start` | POST | 提交微调任务 | 200 | -| `/api/v1/finetune/status/{jobId}` | GET | 查询微调状态 | 200 | - ---- - -## 端点详情 - -### GET /health - -健康检查端点,无需认证,无请求体。 - -**响应(200 OK)**: -```json -{"status": "ok"} -``` - ---- - -### POST /api/v1/text/extract - -从存储中指定路径的文档提取文本三元组。 - -**请求体**: -```json -{ - "file_path": "text/202404/123.txt", - "file_name": "设备规范.txt", - "model": "glm-4-flash", - "prompt_template": "..." -} -``` - -| 字段 | 类型 | 必填 | 说明 | -|------|------|------|------| -| file_path | string | 是 | RustFS 中的文件路径 | -| file_name | string | 是 | 带扩展名的文件名(用于判断格式) | -| model | string | 否 | 模型名,默认使用 config 中的 default_text | -| prompt_template | string | 否 | 自定义提示词,不传使用内置模板 | - -**支持格式**: `.txt`, `.pdf`, `.docx` - -**响应(200 OK)**: -```json -{ - "items": [ - { - "subject": "变压器", - "predicate": "额定电压", - "object": "110kV", - "source_snippet": "该变压器额定电压为110kV", - "source_offset": {"start": 120, "end": 150} - } - ] -} -``` - ---- - -### POST /api/v1/image/extract - -从存储中指定路径的图片提取知识四元组,并自动裁剪 bbox 区域。 - -**请求体**: -```json -{ - "file_path": "image/202404/456.jpg", - "task_id": 789, - "model": "glm-4v-flash", - "prompt_template": "..." -} -``` - -| 字段 | 类型 | 必填 | 说明 | -|------|------|------|------| -| file_path | string | 是 | RustFS 中的图片路径 | -| task_id | int | 是 | 标注任务 ID(用于构造裁剪图存储路径) | -| model | string | 否 | 默认使用 config 中的 default_vision | -| prompt_template | string | 否 | 自定义提示词 | - -**响应(200 OK)**: -```json -{ - "items": [ - { - "subject": "电缆接头", - "predicate": "位于", - "object": "配电箱左侧", - "qualifier": "2024年检修现场", - "bbox": {"x": 10, "y": 20, "w": 100, "h": 80}, - "cropped_image_path": "crops/789/0.jpg" - } - ] -} -``` - ---- - -### POST /api/v1/video/extract-frames - -触发视频帧提取后台任务,立即返回。 - -**请求体**: -```json -{ - "file_path": "video/202404/001.mp4", - "source_id": 10, - "job_id": 42, - "mode": "interval", - "frame_interval": 30 -} -``` - -| 字段 | 类型 | 必填 | 说明 | -|------|------|------|------| -| file_path | string | 是 | RustFS 中的视频路径 | -| source_id | int | 是 | 原始资料 ID(用于构造帧存储路径) | -| job_id | int | 是 | 由 Java 后端分配的任务 ID | -| mode | string | 否 | `interval`(默认)或 `keyframe` | -| frame_interval | int | 否 | interval 模式专用,按帧数步进,默认 30 | - -**响应(202 Accepted)**: -```json -{"message": "任务已接受,后台处理中", "job_id": 42} -``` - -**完成后回调 Java 后端**(POST `{BACKEND_CALLBACK_URL}`): -```json -{ - "job_id": 42, - "status": "SUCCESS", - "frames": [ - {"frame_index": 0, "time_sec": 0.0, "frame_path": "frames/10/0.jpg"} - ], - "error_message": null -} -``` - ---- - -### POST /api/v1/video/to-text - -触发视频片段转文字后台任务,立即返回。 - -**请求体**: -```json -{ - "file_path": "video/202404/001.mp4", - "source_id": 10, - "job_id": 43, - "start_sec": 0, - "end_sec": 120, - "model": "glm-4v-flash", - "prompt_template": "..." -} -``` - -| 字段 | 类型 | 必填 | 说明 | -|------|------|------|------| -| file_path | string | 是 | RustFS 中的视频路径 | -| source_id | int | 是 | 原始资料 ID | -| job_id | int | 是 | 由 Java 后端分配的任务 ID | -| start_sec | float | 是 | 分析起始时间(秒) | -| end_sec | float | 是 | 分析结束时间(秒) | -| model | string | 否 | 默认使用 config 中的 default_vision | -| prompt_template | string | 否 | 自定义提示词 | - -**响应(202 Accepted)**: -```json -{"message": "任务已接受,后台处理中", "job_id": 43} -``` - -**完成后回调 Java 后端**(POST `{BACKEND_CALLBACK_URL}`): -```json -{ - "job_id": 43, - "status": "SUCCESS", - "output_path": "video-text/10/1712800000.txt", - "error_message": null -} -``` - ---- - -### POST /api/v1/qa/gen-text - -基于文本三元组批量生成候选问答对。 - -**请求体**: -```json -{ - "items": [ - { - "subject": "变压器", - "predicate": "额定电压", - "object": "110kV", - "source_snippet": "该变压器额定电压为110kV" - } - ], - "model": "glm-4-flash", - "prompt_template": "..." -} -``` - -**响应(200 OK)**: -```json -{ - "pairs": [ - {"question": "变压器的额定电压是多少?", "answer": "该变压器额定电压为110kV。"} - ] -} -``` - ---- - -### POST /api/v1/qa/gen-image - -基于图像四元组生成候选图文问答对。图片由 AI 服务从存储自动获取,调用方只需提供路径。 - -**请求体**: -```json -{ - "items": [ - { - "subject": "电缆接头", - "predicate": "位于", - "object": "配电箱左侧", - "qualifier": "2024年检修现场", - "cropped_image_path": "crops/789/0.jpg" - } - ], - "model": "glm-4v-flash", - "prompt_template": "..." -} -``` - -**响应(200 OK)**: -```json -{ - "pairs": [ - { - "question": "图中电缆接头位于何处?", - "answer": "图中电缆接头位于配电箱左侧。", - "image_path": "crops/789/0.jpg" - } - ] -} -``` - ---- - -### POST /api/v1/finetune/start - -向 ZhipuAI 提交微调任务。 - -**请求体**: -```json -{ - "jsonl_url": "https://rustfs.example.com/finetune-export/export/xxx.jsonl", - "base_model": "glm-4-flash", - "hyperparams": {"learning_rate": 1e-4, "epochs": 3} -} -``` - -**响应(200 OK)**: -```json -{"job_id": "glm-ft-xxxxxx"} -``` - ---- - -### GET /api/v1/finetune/status/{jobId} - -查询微调任务状态。 - -**路径参数**: `jobId` — 微调任务 ID(由 `/finetune/start` 返回) - -**响应(200 OK)**: -```json -{ - "job_id": "glm-ft-xxxxxx", - "status": "RUNNING", - "progress": 45, - "error_message": null -} -``` - -`status` 取值: `RUNNING` | `SUCCESS` | `FAILED` diff --git a/specs/001-ai-service-requirements/data-model.md b/specs/001-ai-service-requirements/data-model.md deleted file mode 100644 index 5ed2438..0000000 --- a/specs/001-ai-service-requirements/data-model.md +++ /dev/null @@ -1,167 +0,0 @@ -# Data Model: AI 服务 - -**Branch**: `001-ai-service-requirements` | **Date**: 2026-04-10 - ---- - -## 实体定义 - -### TripleItem(文本三元组) - -从文档中提取的一条知识关系。 - -| 字段 | 类型 | 约束 | 说明 | -|------|------|------|------| -| subject | string | 非空 | 主语实体 | -| predicate | string | 非空 | 谓语/关系 | -| object | string | 非空 | 宾语实体 | -| source_snippet | string | 非空 | 原文中的证据片段(直接引用) | -| source_offset.start | int | ≥0 | 证据片段在全文中的起始字符偏移 | -| source_offset.end | int | >start | 证据片段在全文中的结束字符偏移 | - -**状态转换**: 无(只读输出) - ---- - -### QuadrupleItem(图像四元组) - -从图像中提取的一条知识关系,带图像位置信息。 - -| 字段 | 类型 | 约束 | 说明 | -|------|------|------|------| -| subject | string | 非空 | 主体实体 | -| predicate | string | 非空 | 关系/属性 | -| object | string | 非空 | 客体实体 | -| qualifier | string | 可为空 | 修饰信息(时间、条件、场景) | -| bbox.x | int | ≥0 | 边界框左上角 x 像素坐标 | -| bbox.y | int | ≥0 | 边界框左上角 y 像素坐标 | -| bbox.w | int | >0 | 边界框宽度(像素) | -| bbox.h | int | >0 | 边界框高度(像素) | -| cropped_image_path | string | 非空 | 裁剪图在 RustFS 中的存储路径 | - -**派生规则**: `cropped_image_path = "crops/{task_id}/{item_index}.jpg"`,由 image_service 自动生成并上传 - ---- - -### QAPair(文本问答对) - -由文本三元组生成的训练候选问答对。 - -| 字段 | 类型 | 约束 | 说明 | -|------|------|------|------| -| question | string | 非空 | 问题文本 | -| answer | string | 非空 | 答案文本 | - ---- - -### ImageQAPair(图像问答对) - -由图像四元组生成的训练候选图文问答对。 - -| 字段 | 类型 | 约束 | 说明 | -|------|------|------|------| -| question | string | 非空 | 问题文本 | -| answer | string | 非空 | 答案文本 | -| image_path | string | 非空 | 对应裁剪图的存储路径(来源于 QuadrupleItem.cropped_image_path) | - ---- - -### FrameInfo(视频帧信息) - -视频帧提取任务中单帧的元数据。 - -| 字段 | 类型 | 约束 | 说明 | -|------|------|------|------| -| frame_index | int | ≥0 | 帧在视频中的原始帧序号 | -| time_sec | float | ≥0.0 | 帧对应的时间点(秒) | -| frame_path | string | 非空 | 帧图在 RustFS 中的存储路径 | - -**派生规则**: `frame_path = "frames/{source_id}/{upload_index}.jpg"` - ---- - -### VideoJobCallback(视频任务回调) - -异步视频任务完成后发送给 Java 后端的通知载荷。 - -| 字段 | 类型 | 约束 | 说明 | -|------|------|------|------| -| job_id | int | 非空 | 由 Java 后端分配的任务 ID | -| status | string | SUCCESS \| FAILED | 任务最终状态 | -| frames | FrameInfo[] \| null | 仅帧提取时非 null | 提取的帧列表(可为空列表) | -| output_path | string \| null | 仅视频转文本时非 null | 输出文字描述的存储路径 | -| error_message | string \| null | 仅 FAILED 时非 null | 错误描述 | - ---- - -### FinetuneJob(微调任务) - -微调任务的状态快照。 - -| 字段 | 类型 | 约束 | 说明 | -|------|------|------|------| -| job_id | string | 非空 | 由 ZhipuAI 平台分配的任务 ID(如 "glm-ft-xxxxxx") | -| status | string | RUNNING \| SUCCESS \| FAILED | 当前状态 | -| progress | int \| null | 0-100 \| null | 完成百分比(ZhipuAI 支持时) | -| error_message | string \| null | 仅 FAILED 时非 null | 错误描述 | - -**状态映射**: -``` -ZhipuAI "running" → RUNNING -ZhipuAI "succeeded" → SUCCESS -ZhipuAI "failed" → FAILED -其他 → RUNNING(保守处理) -``` - ---- - -## RustFS 存储路径规范 - -| 资源类型 | 存储桶 | 路径格式 | -|----------|--------|----------| -| 上传文本文件 | `source-data` | `text/{年月}/{source_id}.txt` | -| 上传图片 | `source-data` | `image/{年月}/{source_id}.jpg` | -| 上传视频 | `source-data` | `video/{年月}/{source_id}.mp4` | -| 视频帧图 | `source-data` | `frames/{source_id}/{upload_index}.jpg` | -| 视频转译文本 | `source-data` | `video-text/{source_id}/{timestamp}.txt` | -| 图像/帧 bbox 裁剪图 | `source-data` | `crops/{task_id}/{item_index}.jpg` | -| 导出 JSONL 文件 | `finetune-export` | `export/{batchUuid}.jsonl` | - ---- - -## 配置模型 - -### config.yaml(非敏感,提交 git) - -```yaml -server: - port: 8000 - log_level: INFO - -storage: - buckets: - source_data: "source-data" - finetune_export: "finetune-export" - -backend: {} # callback_url 由 .env 注入 - -video: - frame_sample_count: 8 # 视频转文本时均匀采样帧数 - max_file_size_mb: 200 # 视频大小上限(可通过 MAX_VIDEO_SIZE_MB 覆盖) - -models: - default_text: "glm-4-flash" - default_vision: "glm-4v-flash" -``` - -### 环境变量覆盖映射 - -| 环境变量 | YAML 路径 | 说明 | -|----------|-----------|------| -| ZHIPUAI_API_KEY | zhipuai.api_key | 必填 | -| STORAGE_ACCESS_KEY | storage.access_key | 必填 | -| STORAGE_SECRET_KEY | storage.secret_key | 必填 | -| STORAGE_ENDPOINT | storage.endpoint | RustFS 地址 | -| BACKEND_CALLBACK_URL | backend.callback_url | Java 后端回调接口 | -| LOG_LEVEL | server.log_level | 日志级别 | -| MAX_VIDEO_SIZE_MB | video.max_file_size_mb | 视频大小上限 | diff --git a/specs/001-ai-service-requirements/plan.md b/specs/001-ai-service-requirements/plan.md deleted file mode 100644 index 900cffb..0000000 --- a/specs/001-ai-service-requirements/plan.md +++ /dev/null @@ -1,120 +0,0 @@ -# Implementation Plan: AI 服务需求文档 - -**Branch**: `001-ai-service-requirements` | **Date**: 2026-04-10 | **Spec**: [spec.md](spec.md) -**Input**: Feature specification from `/specs/001-ai-service-requirements/spec.md` - -> **参考实现计划(主计划)**: `docs/superpowers/plans/2026-04-10-ai-service-impl.md` -> 本文件为 speckit 规划框架文档,详细 TDD 任务(17 个步骤含完整代码)见上述主计划。 - -## Summary - -实现一个独立部署的 Python FastAPI AI 服务,为知识图谱标注平台提供文本三元组提取、图像四元组提取、视频帧处理、问答对生成和 GLM 微调管理能力。服务通过 RustFS S3 API 读写文件,通过 ZhipuAI GLM API 调用大模型,通过回调接口通知 Java 后端异步任务结果。采用 ABC 适配层(LLMClient / StorageClient)保证可扩展性,FastAPI BackgroundTasks 处理视频长任务,全量 TDD 开发。 - -## Technical Context - -**Language/Version**: Python 3.12.13(conda `label` 环境) -**Primary Dependencies**: FastAPI ≥0.111, uvicorn[standard] ≥0.29, pydantic ≥2.7, zhipuai ≥2.1, boto3 ≥1.34, pdfplumber ≥0.11, python-docx ≥1.1, opencv-python-headless ≥4.9, numpy ≥1.26, httpx ≥0.27, python-dotenv ≥1.0, pyyaml ≥6.0 -**Storage**: RustFS(S3 兼容协议,boto3 访问) -**Testing**: pytest ≥8.0 + pytest-asyncio ≥0.23,所有 service 和 router 均有单元测试 -**Target Platform**: Linux 容器(Docker + Docker Compose) -**Project Type**: web-service -**Performance Goals**: 文本提取 <60s;图像提取 <30s;视频任务接受 <1s;健康检查 <1s;QA 生成(≤10条)<90s -**Constraints**: 视频文件大小上限默认 200MB(可通过 MAX_VIDEO_SIZE_MB 环境变量配置);不访问数据库;GLM 为云端 API,图片须以 base64 传输;ZhipuAI SDK 同步阻塞,须在线程池中执行 -**Scale/Scope**: 低并发(ADMIN 手动触发),同时不超过 5 个视频任务 - -## Constitution Check - -*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.* - -> 项目 constitution 为未填充的模板,无项目特定约束规则。以下采用通用工程原则进行评估。 - -| 原则 | 状态 | 说明 | -|------|------|------| -| 测试优先(TDD) | ✅ 通过 | 实现计划采用红绿重构循环,所有模块先写测试 | -| 简单性(YAGNI) | ✅ 通过 | BackgroundTasks 而非 Celery;无数据库;适配层仅当前实现 | -| 可观测性 | ✅ 通过 | JSON 结构化日志,含请求/GLM/视频任务维度 | -| 错误分类 | ✅ 通过 | 4 种异常类(400/502/503/500),结构化响应 | -| 可扩展性 | ✅ 通过 | LLMClient / StorageClient ABC 适配层 | -| 配置分层 | ✅ 通过 | config.yaml + .env + 环境变量覆盖 | - -**GATE RESULT**: ✅ 无违规,可进入 Phase 0。 - -## Project Structure - -### Documentation (this feature) - -```text -specs/001-ai-service-requirements/ -├── plan.md # 本文件 (/speckit.plan 输出) -├── research.md # Phase 0 输出 -├── data-model.md # Phase 1 输出 -├── quickstart.md # Phase 1 输出 -├── contracts/ # Phase 1 输出 -│ └── api.md -└── tasks.md # Phase 2 输出 (/speckit.tasks - 未由本命令创建) -``` - -### Source Code (repository root) - -```text -label_ai_service/ -├── app/ -│ ├── main.py # FastAPI 应用入口,lifespan,/health 端点 -│ ├── core/ -│ │ ├── config.py # YAML + .env 分层配置,lru_cache 单例 -│ │ ├── logging.py # JSON 结构化日志,请求日志中间件 -│ │ ├── exceptions.py # 自定义异常 + 全局处理器 -│ │ ├── json_utils.py # GLM 响应 JSON 解析(兼容 Markdown 代码块) -│ │ └── dependencies.py # FastAPI Depends 工厂函数 -│ ├── clients/ -│ │ ├── llm/ -│ │ │ ├── base.py # LLMClient ABC(chat / chat_vision) -│ │ │ └── zhipuai_client.py # ZhipuAI 实现(线程池包装同步 SDK) -│ │ └── storage/ -│ │ ├── base.py # StorageClient ABC(download/upload/presigned/size) -│ │ └── rustfs_client.py # RustFS S3 兼容实现 -│ ├── services/ -│ │ ├── text_service.py # TXT/PDF/DOCX 解析 + 三元组提取 -│ │ ├── image_service.py # 四元组提取 + bbox 裁剪 -│ │ ├── video_service.py # 帧提取 + 视频转文本(BackgroundTask) -│ │ ├── qa_service.py # 文本/图像问答对生成(图像用 base64) -│ │ └── finetune_service.py # 微调任务提交与查询 -│ ├── routers/ -│ │ ├── text.py # POST /api/v1/text/extract -│ │ ├── image.py # POST /api/v1/image/extract -│ │ ├── video.py # POST /api/v1/video/extract-frames, /to-text -│ │ ├── qa.py # POST /api/v1/qa/gen-text, /gen-image -│ │ └── finetune.py # POST /api/v1/finetune/start, GET /status/{id} -│ └── models/ -│ ├── text_models.py -│ ├── image_models.py -│ ├── video_models.py -│ ├── qa_models.py -│ └── finetune_models.py -├── tests/ -│ ├── conftest.py # mock_llm, mock_storage fixtures -│ ├── test_config.py -│ ├── test_llm_client.py -│ ├── test_storage_client.py -│ ├── test_text_service.py -│ ├── test_text_router.py -│ ├── test_image_service.py -│ ├── test_image_router.py -│ ├── test_video_service.py -│ ├── test_video_router.py -│ ├── test_qa_service.py -│ ├── test_qa_router.py -│ ├── test_finetune_service.py -│ └── test_finetune_router.py -├── config.yaml -├── .env -├── requirements.txt -├── Dockerfile -└── docker-compose.yml -``` - -**Structure Decision**: 单项目结构(Option 1),分层为 routers → services → clients,测试与源码并列。 - -## Complexity Tracking - -> Constitution 无违规,此节无需填写。 diff --git a/specs/001-ai-service-requirements/quickstart.md b/specs/001-ai-service-requirements/quickstart.md deleted file mode 100644 index 53b6133..0000000 --- a/specs/001-ai-service-requirements/quickstart.md +++ /dev/null @@ -1,109 +0,0 @@ -# Quickstart: AI 服务开发指南 - -**Branch**: `001-ai-service-requirements` | **Date**: 2026-04-10 - ---- - -## 环境准备 - -```bash -# 激活 conda 环境 -conda activate label - -# 安装依赖(在 label_ai_service 目录下) -pip install -r requirements.txt -``` - ---- - -## 本地开发启动 - -```bash -# 1. 复制并配置 .env(已提交模板) -# 编辑 .env 填写真实的 ZHIPUAI_API_KEY 和 STORAGE_ENDPOINT - -# 2. 启动开发服务器 -conda run -n label uvicorn app.main:app --reload --host 0.0.0.0 --port 8000 - -# 3. 访问 Swagger 文档 -# http://localhost:8000/docs -``` - ---- - -## 运行测试 - -```bash -# 运行全部测试 -conda run -n label pytest tests/ -v - -# 运行指定模块测试 -conda run -n label pytest tests/test_text_service.py -v - -# 运行带覆盖率报告 -conda run -n label pytest tests/ --cov=app --cov-report=term-missing -``` - ---- - -## Docker 部署 - -```bash -# 构建镜像 -docker build -t label-ai-service:dev . - -# 使用 docker-compose 启动(含 RustFS) -docker-compose up -d - -# 查看日志 -docker-compose logs -f ai-service - -# 健康检查 -curl http://localhost:8000/health -``` - ---- - -## 关键配置说明 - -### 视频大小上限调整 - -无需重建镜像,在 `.env` 中添加: -```ini -MAX_VIDEO_SIZE_MB=500 -``` - -### 切换大模型 - -修改 `config.yaml`: -```yaml -models: - default_text: "glm-4-flash" # 文本模型 - default_vision: "glm-4v-flash" # 视觉模型 -``` - ---- - -## 开发流程(TDD) - -详细的 17 个任务步骤(含完整代码)见主实现计划: -`docs/superpowers/plans/2026-04-10-ai-service-impl.md` - -每个任务的开发步骤: -1. 编写失败测试(`pytest ... -v` 验证失败) -2. 实现最小代码使测试通过(`pytest ... -v` 验证通过) -3. Commit - ---- - -## 目录结构速查 - -``` -app/ -├── main.py # 入口,/health 端点,路由注册 -├── core/ # 配置、日志、异常、工具 -├── clients/ # LLM 和 Storage 适配层(ABC + 实现) -├── services/ # 业务逻辑(text/image/video/qa/finetune) -├── routers/ # HTTP 路由处理 -└── models/ # Pydantic 请求/响应 Schema -``` diff --git a/specs/001-ai-service-requirements/research.md b/specs/001-ai-service-requirements/research.md deleted file mode 100644 index b703aa8..0000000 --- a/specs/001-ai-service-requirements/research.md +++ /dev/null @@ -1,76 +0,0 @@ -# Research: AI 服务实现方案 - -**Branch**: `001-ai-service-requirements` | **Date**: 2026-04-10 -**Status**: 完成(所有决策已在设计阶段确定,无待研究项) - ---- - -## 决策记录 - -### D-001: 异步框架选型 - -**Decision**: FastAPI + uvicorn -**Rationale**: 原生 async/await 支持、Pydantic 自动校验、自动生成 Swagger 文档、Python 生态系中性能和开发效率的最优权衡。 -**Alternatives considered**: Django(过重)、Flask(无原生异步)、aiohttp(无自动文档和类型校验) - ---- - -### D-002: ZhipuAI SDK 调用方式 - -**Decision**: 同步 SDK 通过 `asyncio.get_event_loop().run_in_executor(None, ...)` 在线程池中调用 -**Rationale**: ZhipuAI 官方 SDK 为同步阻塞设计,直接在 async 函数中调用会阻塞事件循环。`run_in_executor` 将阻塞调用卸载到线程池,保持 FastAPI 事件循环响应能力。 -**Alternatives considered**: 使用 `asyncio.to_thread()`(Python 3.9+ 语法糖,等效实现,选择 run_in_executor 保持向后兼容性);使用 httpx 直接调用 ZhipuAI HTTP API(绕过 SDK 但增加维护负担) - ---- - -### D-003: 图像 QA 生成的图片传输方式 - -**Decision**: base64 编码嵌入消息体(`data:image/jpeg;base64,...`) -**Rationale**: RustFS 部署在 Docker 内网(endpoint: `http://rustfs:9000`),presigned URL 指向内网地址,云端 GLM-4V 无法访问。base64 编码将图片内容直接内联到 API 请求,不依赖网络可达性。 -**Alternatives considered**: presigned URL(不可行,内网地址云端不可达);公网 RustFS 暴露(增加安全风险) - ---- - -### D-004: 视频长任务处理机制 - -**Decision**: FastAPI BackgroundTasks + HTTP 回调通知 Java 后端 -**Rationale**: 视频处理耗时不可控(几秒到几分钟),同步等待会超时。BackgroundTasks 无需额外中间件(Redis/Celery),部署简单,任务状态通过回调接口由 Java 后端管理,符合整体架构风格。并发量有限(≤5个同时任务),BackgroundTasks 完全够用。 -**Alternatives considered**: Celery(需 Redis broker,引入额外运维负担);asyncio.create_task(进程重启会丢失任务) - ---- - -### D-005: 分层配置方案 - -**Decision**: config.yaml(稳定非敏感配置)+ .env(密钥和环境差异项),环境变量优先级高于 YAML -**Rationale**: YAML 提供结构化可读性,适合 git 追踪非敏感配置变更;.env 格式为 Docker `env_file` 原生支持;环境变量覆盖机制使容器部署时无需重建镜像即可切换配置。 -**Alternatives considered**: 纯 .env 文件(缺乏结构化,复杂配置难维护);数据库存储配置(过重) - ---- - -### D-006: 视频大文件 OOM 防护 - -**Decision**: 在视频路由层(接受请求后、启动后台任务前)通过 `storage.get_object_size()` 查询文件大小,超限返回 HTTP 400 -**Rationale**: 在下载前拒绝,避免实际 OOM;大小限制通过 config.yaml + MAX_VIDEO_SIZE_MB 环境变量运行时可配置,无需重建镜像;实现简单,无需引入流式下载的新抽象。 -**Alternatives considered**: 流式下载(Completeness: 9/10,但 YAGNI,当前规模不需要);不限制(Completeness: 4/10,有 OOM 风险) - ---- - -### D-007: 视频关键帧检测算法 - -**Decision**: 帧差分(frame difference)近似检测:计算当前帧与前帧灰度图的像素差均值,差值超过阈值(默认 30.0)判定为场景切换 -**Rationale**: OpenCV 无原生 I 帧检测 API(`CAP_PROP_POS_FRAMES` 是帧定位,非 I 帧标识)。帧差分简单有效,对场景切换检测准确,且无需视频解码器底层支持。 -**Alternatives considered**: 基于编码信息的 I 帧检测(需 FFmpeg 支持,引入额外依赖);固定间隔(不够智能,不适合关键帧模式) - ---- - -### D-008: 测试策略 - -**Decision**: pytest + pytest-asyncio,Service 层和 Router 层分别测试,使用 AsyncMock 模拟外部依赖 -**Rationale**: Service 层测试业务逻辑,不依赖 HTTP;Router 层使用 TestClient 测试完整请求流程。视频 service 测试使用真实小视频文件(OpenCV VideoWriter 生成),验证帧提取逻辑正确性。 -**Alternatives considered**: 仅集成测试(需要真实 RustFS 和 ZhipuAI,CI 成本高);全部单元测试(无法覆盖路由和异常处理器集成) - ---- - -## 无待解决项 - -所有 NEEDS CLARIFICATION 均已在设计阶段通过用户确认或合理默认值解决。本 research.md 仅作决策存档。 diff --git a/specs/001-ai-service-requirements/spec.md b/specs/001-ai-service-requirements/spec.md deleted file mode 100644 index fbc8915..0000000 --- a/specs/001-ai-service-requirements/spec.md +++ /dev/null @@ -1,258 +0,0 @@ -# Feature Specification: AI 服务需求文档 - -**Feature Branch**: `001-ai-service-requirements` -**Created**: 2026-04-10 -**Status**: Draft -**Input**: User description: "@docs/superpowers/specs/2026-04-10-ai-service-design.md 根据设计文档完成需求文档" - ---- - -## 概述 - -知识图谱智能标注平台需要一个独立的 AI 计算服务,接收 Java 后端的调用,完成文档结构化提取、图像分析、视频预处理、训练数据生成和模型微调管理等智能化任务,将大模型能力嵌入标注工作流,大幅降低人工标注成本。 - ---- - -## User Scenarios & Testing *(mandatory)* - -### User Story 1 - ADMIN 从文档中提取知识三元组 (Priority: P1) - -ADMIN 在标注平台上选择一份已上传的文本文件(TXT、PDF 或 Word 文档),触发 AI 辅助提取。AI 服务从存储系统中读取该文档,分析内容,识别其中的主谓宾知识关系(三元组),并为每个三元组标注原文出处片段和字符偏移位置,返回结构化结果供标注员审核确认。 - -**Why this priority**: 文本三元组提取是平台文本标注流水线的核心入口,所有文本类标注任务都依赖此能力。无此功能,平台的主要价值无法实现。 - -**Independent Test**: 向 AI 服务发送一个包含已知知识点的测试文档路径,验证返回结果包含正确的主语/谓语/宾语和对应的原文位置信息,即可独立验证此功能完整运行。 - -**Acceptance Scenarios**: - -1. **Given** 存储系统中存有一份 TXT 格式文档,**When** AI 服务收到该文档路径和提取请求,**Then** 返回包含至少一条三元组的结果,每条含 subject、predicate、object、原文片段和字符偏移。 -2. **Given** 存储系统中存有一份 PDF 格式文档,**When** AI 服务收到提取请求,**Then** 正确解析 PDF 内容并返回三元组结果。 -3. **Given** 存储系统中存有一份 Word(.docx)格式文档,**When** AI 服务收到提取请求,**Then** 正确解析文档内容并返回三元组结果。 -4. **Given** 请求包含不支持的文件格式(如 .xlsx),**When** AI 服务收到请求,**Then** 返回明确的格式不支持错误,不崩溃。 -5. **Given** 存储系统不可达,**When** AI 服务尝试下载文件,**Then** 返回存储故障错误,而非通用服务器错误。 - ---- - -### User Story 2 - ADMIN 从图片中提取知识四元组并自动裁剪 (Priority: P1) - -ADMIN 在标注平台选择一张已上传的图片,触发 AI 辅助提取。AI 服务读取该图片,通过多模态大模型分析图像内容,识别图中的知识实体关系(四元组:主体、关系、客体、修饰信息),同时给出每个知识点在图像中的位置框(bbox 坐标),并自动将对应区域裁剪保存,供标注员对照审核。 - -**Why this priority**: 图像四元组提取是图片标注流水线的核心入口,与文本三元组提取并列为平台两大主流水线的起点。 - -**Independent Test**: 向 AI 服务发送一张包含可识别对象关系的测试图片路径,验证返回结果包含四元组信息和裁剪图的存储路径,即可独立验证此功能完整运行。 - -**Acceptance Scenarios**: - -1. **Given** 存储系统中存有一张图片,**When** AI 服务收到该图片路径和提取请求,**Then** 返回包含至少一条四元组的结果,每条含 subject、predicate、object、qualifier 和 bbox 坐标。 -2. **Given** AI 服务成功提取四元组,**When** 处理完成,**Then** 每个四元组对应的图像区域已自动裁剪并上传至存储,响应中包含裁剪图的存储路径。 -3. **Given** bbox 坐标超出图像边界,**When** 裁剪时,**Then** 自动截断至图像有效区域,不报错。 -4. **Given** 大模型返回格式异常(非 JSON),**When** 解析响应,**Then** 返回解析失败错误,不返回部分结果。 - ---- - -### User Story 3 - ADMIN 对视频进行帧提取(帧模式预处理) (Priority: P2) - -ADMIN 在标注平台选择一段已上传的视频,选择"帧提取"模式(按固定间隔或关键帧),触发 AI 服务处理。AI 服务在后台异步完成帧提取,将每一帧图片上传至存储,处理完成后主动通知 Java 后端,后端随即为每一帧创建图片标注任务,进入图片标注流程。 - -**Why this priority**: 视频帧提取是视频进入图片标注流水线的预处理步骤,依赖图片提取流水线(P1)已就绪。 - -**Independent Test**: 向 AI 服务发送一个测试视频的存储路径和 job_id,服务立即返回 202 Accepted,稍后验证回调接口收到含帧路径列表的成功通知,即可独立验证。 - -**Acceptance Scenarios**: - -1. **Given** 存储系统中存有一段视频(大小在限制内),**When** AI 服务收到帧提取请求(interval 模式),**Then** 立即返回 202 Accepted 和 job_id,不等待处理完成。 -2. **Given** 帧提取任务在后台成功完成,**When** 处理完成,**Then** AI 服务向 Java 后端发送回调,包含 job_id、status=SUCCESS 和帧图存储路径列表。 -3. **Given** keyframe 模式,**When** AI 服务处理视频,**Then** 仅提取画面发生显著变化的帧,而非固定间隔。 -4. **Given** 视频文件大小超过系统上限(默认 200MB,可配置),**When** 收到请求,**Then** 立即返回 400 错误,不启动后台任务。 -5. **Given** 帧提取过程中发生错误,**When** 任务失败,**Then** AI 服务仍向 Java 后端发送回调,status=FAILED,包含错误描述。 - ---- - -### User Story 4 - ADMIN 将视频片段转换为文字描述(片段模式预处理) (Priority: P2) - -ADMIN 在标注平台选择一段已上传视频的时间段,触发"视频转文本"预处理。AI 服务在后台均匀采样该时间段的视频帧,用多模态大模型理解视频内容,生成结构化文字描述,将描述文本上传存储,完成后通知 Java 后端,后端将其创建为新的文本类原始资料,进入文本标注流程。 - -**Why this priority**: 视频转文本预处理使视频内容能够通过文本标注流水线处理,扩展了平台的数据来源范围。 - -**Independent Test**: 向 AI 服务发送测试视频路径、时间段和 job_id,验证回调收到 output_path 指向一个可读的文字描述文件,即可独立验证。 - -**Acceptance Scenarios**: - -1. **Given** 存储系统中存有一段视频(大小在限制内),**When** AI 服务收到视频转文本请求,**Then** 立即返回 202 Accepted 和 job_id。 -2. **Given** 视频转文本任务在后台成功完成,**When** 处理完成,**Then** AI 服务向 Java 后端发送回调,包含 job_id、status=SUCCESS 和文字描述的存储路径。 -3. **Given** 请求指定了起止时间段(start_sec、end_sec),**When** 处理视频,**Then** 仅分析该时间段内的内容,不处理其他片段。 -4. **Given** 视频文件大小超过上限,**When** 收到请求,**Then** 立即返回 400 错误。 -5. **Given** 大模型调用失败,**When** 任务异常,**Then** 回调 status=FAILED,包含错误描述。 - ---- - -### User Story 5 - 系统自动为已审批三元组生成候选问答对 (Priority: P2) - -标注员提交的文本三元组经审批员审批通过后,系统自动调用 AI 服务,将三元组列表和对应原文片段批量输入大模型,生成符合微调格式的候选问答对,作为后续训练数据的来源。 - -**Why this priority**: 问答对生成是平台训练数据产出流程的关键环节,依赖三元组提取(P1)已完成并通过审批。 - -**Independent Test**: 向 AI 服务发送一组测试三元组(含原文片段),验证返回包含可读、合理的问答对列表,即可独立验证。 - -**Acceptance Scenarios**: - -1. **Given** 一组已审批的文本三元组,**When** AI 服务收到文本 QA 生成请求,**Then** 返回包含 question 和 answer 的问答对列表,每个三元组至少对应一个问答对。 -2. **Given** 大模型返回合法 JSON,**When** 解析响应,**Then** 正确提取每对问答并返回。 -3. **Given** 大模型返回格式异常,**When** 解析响应,**Then** 返回解析失败错误。 -4. **Given** 大模型服务不可用,**When** 调用失败,**Then** 返回明确的服务不可用错误。 - ---- - -### User Story 6 - 系统自动为已审批四元组生成候选图文问答对 (Priority: P2) - -图像四元组经审批通过后,系统自动调用 AI 服务,将四元组信息与对应裁剪图一起输入多模态大模型,生成图文问答对,用于后续图像类训练数据集。 - -**Why this priority**: 图像 QA 生成是图片标注流水线产出训练数据的最终步骤,优先级与文本 QA 生成(P2)相同。 - -**Independent Test**: 向 AI 服务发送一组四元组(含裁剪图存储路径),验证返回的问答对引用了图片路径,即可独立验证。 - -**Acceptance Scenarios**: - -1. **Given** 一组已审批的图像四元组(含裁剪图路径),**When** AI 服务收到图像 QA 生成请求,**Then** 返回包含 question、answer 和 image_path 的问答对列表。 -2. **Given** 裁剪图存储路径有效,**When** AI 服务处理,**Then** 自动获取图片内容并结合四元组信息生成问答,无需调用方额外传输图片数据。 -3. **Given** 裁剪图无法从存储获取,**When** 处理请求,**Then** 返回存储错误,不返回空结果。 - ---- - -### User Story 7 - ADMIN 提交微调任务并查询进度 (Priority: P3) - -ADMIN 在标注平台完成训练数据集导出后,选择提交大模型微调任务。平台调用 AI 服务提交微调请求(包含训练数据文件地址、基础模型和超参数),获取微调任务 ID。此后,ADMIN 可随时查询该任务的运行状态(进行中/成功/失败)和完成进度。 - -**Why this priority**: 微调任务管理是平台最终目标(产出定制化模型)的关键步骤,但需要前置数据准备流程全部完成,故列为 P3。 - -**Independent Test**: 向 AI 服务发送微调请求,获取 job_id,再调用状态查询接口,验证能正确返回当前状态,即可独立验证。 - -**Acceptance Scenarios**: - -1. **Given** 训练数据 JSONL 文件已在存储中准备就绪,**When** AI 服务收到微调提交请求(含文件地址、基础模型、超参数),**Then** 返回微调任务 ID。 -2. **Given** 微调任务已提交,**When** 查询任务状态,**Then** 返回 job_id、当前状态(RUNNING/SUCCESS/FAILED)和进度百分比。 -3. **Given** 任务处于运行中,**When** 多次查询状态,**Then** 每次均返回最新状态,不缓存旧状态。 -4. **Given** 传入不存在的 job_id 查询状态,**When** 处理请求,**Then** 返回明确错误,不崩溃。 - ---- - -### User Story 8 - 运维监控服务健康状态 (Priority: P3) - -运维人员或监控系统定期探测 AI 服务的健康状态,判断服务是否正常运行,以便在异常时及时告警或自动重启。 - -**Why this priority**: 健康检查是服务稳定运行的基础保障,但不属于业务功能,列为 P3。 - -**Independent Test**: 对健康检查接口发起 HTTP GET 请求,验证收到表示正常的响应,即可独立验证。 - -**Acceptance Scenarios**: - -1. **Given** AI 服务正常运行,**When** 任何系统对健康检查接口发起请求,**Then** 立即返回服务正常的响应,响应时间不超过 1 秒。 -2. **Given** 容器运行中,**When** 容器编排系统定期发起健康探测,**Then** 通过探测的容器才被标记为可用状态并接收流量。 - ---- - -### Edge Cases - -- 文件存在于存储系统但内容损坏(如 PDF 页面为空)时,如何处理?→ 返回解析结果为空,不报错,日志记录警告。 -- 视频帧提取结果为零帧(如视频文件损坏或间隔过大)时,如何处理?→ 回调 SUCCESS,返回空帧列表,Java 后端决定是否重试。 -- 大模型返回的三元组/四元组超过合理数量(如数百条)时,如何处理?→ 全量返回,由 Java 后端或标注员筛选,AI 服务不做截断。 -- 多个视频任务并发执行时,是否会互相影响?→ 每个任务独立使用临时文件,处理完成后清理,互不干扰。 -- 视频文件大小恰好等于上限时,如何处理?→ 视为超限,拒绝处理,避免边界情况下的内存压力。 -- 大模型以 Markdown 代码块格式(\`\`\`json ... \`\`\`)返回 JSON 时,如何处理?→ 自动提取代码块内的 JSON 内容,兼容此格式。 - ---- - -## Requirements *(mandatory)* - -### Functional Requirements - -**文本处理** - -- **FR-001**: 系统 MUST 支持从 TXT、PDF、DOCX 三种格式的文档中提取知识三元组(subject / predicate / object),并为每条三元组提供原文出处片段和字符偏移位置。 -- **FR-002**: 系统 MUST 在文件格式不受支持时,返回明确的格式不支持错误(HTTP 400),拒绝处理请求。 - -**图像处理** - -- **FR-003**: 系统 MUST 支持从图片中提取知识四元组(subject / predicate / object / qualifier),并提供每个知识点在图像中的位置框(bbox:x, y, w, h 像素坐标)。 -- **FR-004**: 系统 MUST 在返回四元组结果时,自动将每个知识点对应的图像区域裁剪并保存至存储,响应中包含裁剪图的存储路径。 - -**视频处理** - -- **FR-005**: 系统 MUST 支持视频帧提取,提供两种模式:固定间隔模式(按帧数间隔)和关键帧模式(场景切换时提取)。 -- **FR-006**: 系统 MUST 以异步方式处理视频任务,接受请求后立即返回接受确认(HTTP 202),在后台完成处理后主动通知调用方。 -- **FR-007**: 系统 MUST 支持视频片段转文字描述,输入起止时间段,输出视频内容的结构化文字描述,并将描述文本保存至存储。 -- **FR-008**: 系统 MUST 在视频文件大小超过上限时,拒绝处理并返回明确错误;大小上限 MUST 支持运行时配置(默认 200MB),不需要重新构建服务即可调整。 - -**问答对生成** - -- **FR-009**: 系统 MUST 支持基于文本三元组(含原文片段)批量生成候选问答对,每条三元组至少生成一个问答对。 -- **FR-010**: 系统 MUST 支持基于图像四元组(含裁剪图存储路径)生成图文候选问答对,图片内容由系统自动从存储获取,调用方只需提供存储路径。 - -**微调管理** - -- **FR-011**: 系统 MUST 支持向大模型服务提交微调任务,输入训练数据文件地址、基础模型名称和超参数,返回微调任务 ID。 -- **FR-012**: 系统 MUST 支持通过任务 ID 查询微调任务当前状态(RUNNING / SUCCESS / FAILED)和完成进度。 - -**服务运维** - -- **FR-013**: 系统 MUST 提供轻量健康检查接口,可被容器编排系统、反向代理和监控工具调用,无需认证,响应时间不超过 1 秒。 -- **FR-014**: 系统 MUST 对每次请求记录结构化日志,包含请求路径、响应状态和耗时;对每次大模型调用记录模型名称和耗时;对视频后台任务记录任务 ID、阶段和结果;日志 MUST NOT 包含文件原文内容。 -- **FR-015**: 系统 MUST 在大模型返回非法格式时(HTTP 502)、存储不可达时(HTTP 502)、大模型服务不可用时(HTTP 503),分别返回不同的结构化错误响应,便于调用方判断根因。 -- **FR-016**: 系统 MUST 提供 Swagger/OpenAPI 自动文档,描述所有接口的请求和响应格式。 - -**可扩展性** - -- **FR-017**: 系统 MUST 将大模型调用和存储访问封装为可替换的适配层,当前实现 ZhipuAI GLM 系列和 RustFS,替换实现时业务逻辑层无需修改。 -- **FR-018**: 系统 MUST 通过配置文件和环境变量管理所有可变参数(模型名称、存储地址、密钥、视频大小上限等),支持不重建服务镜像的情况下切换环境配置。 - -### Key Entities - -- **三元组(Triple)**: 从文本中提取的知识关系,由主语(subject)、谓语(predicate)、宾语(object)、原文片段(source_snippet)和字符偏移(source_offset: start/end)组成。 -- **四元组(Quadruple)**: 从图像中提取的知识关系,在三元组基础上增加修饰信息(qualifier)和图像位置框(bbox: x/y/w/h),并关联裁剪图存储路径(cropped_image_path)。 -- **问答对(QA Pair)**: 由 question 和 answer 组成,文本类关联三元组上下文,图像类额外携带图片存储路径(image_path)。 -- **视频任务回调(Video Job Callback)**: 异步任务完成通知,包含 job_id、status(SUCCESS/FAILED)、结果数据(帧路径列表或文字描述路径)和错误信息。 -- **微调任务(Finetune Job)**: 包含任务 ID、当前状态(RUNNING/SUCCESS/FAILED)和进度百分比。 - ---- - -## Success Criteria *(mandatory)* - -### Measurable Outcomes - -- **SC-001**: 对于长度在 10,000 字以内的文档,三元组提取请求在 60 秒内完成并返回结果,满足标注员实时等待的体验预期。 -- **SC-002**: 对于分辨率在 4K 以内的图片,四元组提取和裁剪图上传在 30 秒内完成,裁剪图区域与 bbox 坐标对应准确(误差 ≤2 像素)。 -- **SC-003**: 视频帧提取和视频转文本任务提交后,接受响应在 1 秒内返回;后台处理完成后回调通知在 10 分钟内送达(针对 200MB 以内的视频)。 -- **SC-004**: 视频大小超限的请求,拒绝响应在 3 秒内返回(含存储查询耗时),不启动任何后台处理。 -- **SC-005**: 问答对生成请求(≤10 条三元组/四元组),在 90 秒内完成并返回全部问答对。 -- **SC-006**: 健康检查接口在服务正常运行时,响应时间不超过 1 秒,容器编排系统依此判断服务可用状态。 -- **SC-007**: 所有错误响应均返回结构化错误信息(含错误类型和描述),不返回通用服务器错误,便于调用方在不查看日志的情况下判断根因。 -- **SC-008**: 替换大模型服务商或存储实现时,业务逻辑层代码零修改,仅需变更配置和适配层实现。 -- **SC-009**: 所有业务接口通过自动化单元测试覆盖,包括正常路径、存储错误、大模型错误、格式解析错误等场景。 - ---- - -## Technical Environment *(mandatory)* - -> 注:本节记录项目已确定的技术约束,这些决定已由团队确认,不作为需求变更点。 - -- **运行时**: Python 3.12.13 -- **Web 框架**: FastAPI(含 uvicorn 服务器) -- **运行环境**: conda 虚拟环境,环境名称 `label` -- **大模型**: ZhipuAI GLM 系列(文本:glm-4-flash,视觉:glm-4v-flash),通过官方 SDK 调用 -- **对象存储**: RustFS,通过 S3 兼容 API(boto3)访问 -- **文档解析**: TXT(UTF-8 解码)、PDF(pdfplumber)、DOCX(python-docx) -- **视频处理**: OpenCV(帧提取 + 帧差分关键帧检测) -- **容器化**: Docker + Docker Compose,提供 Dockerfile 和 docker-compose.yml - ---- - -## Assumptions - -- Java 后端(label-backend)是 AI 服务的唯一调用方,AI 服务不对外直接暴露,无需用户认证机制。 -- 大模型服务部署在公网(ZhipuAI 云端 API),RustFS 部署在 Docker 内网;因此图片内容必须以 base64 方式传递给大模型,不能依赖 RustFS 内网地址被云端服务访问。 -- 文档、图片、视频等原始文件由 Java 后端负责上传至存储,AI 服务仅通过存储路径读取,不处理文件上传逻辑。 -- 微调任务提交后的训练过程由 ZhipuAI 平台托管,AI 服务仅负责提交和查询,不管理训练算力资源。 -- 视频任务为低频操作(由 ADMIN 手动触发),并发量有限(预计同时不超过 5 个视频任务),当前无需专用任务队列。 -- 日志仅输出到标准输出(stdout),由容器运行时或日志收集系统负责落盘和归档;不记录文件原文内容,防止敏感信息泄露。 -- ZhipuAI SDK 为同步阻塞调用;为保持服务并发能力,SDK 调用将在线程池中执行,不阻塞主事件循环。 -- 视频大小上限默认 200MB,可通过环境变量(MAX_VIDEO_SIZE_MB)在容器运行时覆盖,无需重建镜像。 diff --git a/specs/001-ai-service-requirements/tasks.md b/specs/001-ai-service-requirements/tasks.md deleted file mode 100644 index 2f26606..0000000 --- a/specs/001-ai-service-requirements/tasks.md +++ /dev/null @@ -1,318 +0,0 @@ -# Tasks: AI 服务(知识图谱标注平台 AI 计算服务) - -**Input**: Design documents from `/specs/001-ai-service-requirements/` -**Prerequisites**: plan.md ✅, spec.md ✅, research.md ✅, data-model.md ✅, contracts/api.md ✅ -**Tests**: Included — spec and plan explicitly mandate TDD(全量 TDD 开发) - -**Organization**: Tasks grouped by user story. Each phase is independently implementable and testable. - -## Format: `[ID] [P?] [Story?] Description` - -- **[P]**: Can run in parallel (different files, no shared dependencies) -- **[Story]**: Which user story this task belongs to (US1–US8) -- All paths are relative to project root `label_ai_service/` - ---- - -## Phase 1: Setup(项目初始化) - -**Purpose**: Create project skeleton and configuration files before any code is written. - -- [ ] T001 Create directory structure: `app/core/`, `app/clients/llm/`, `app/clients/storage/`, `app/services/`, `app/routers/`, `app/models/`, `tests/` -- [ ] T002 Create `requirements.txt` with pinned dependencies: fastapi≥0.111, uvicorn[standard]≥0.29, pydantic≥2.7, zhipuai≥2.1, boto3≥1.34, pdfplumber≥0.11, python-docx≥1.1, opencv-python-headless≥4.9, numpy≥1.26, httpx≥0.27, python-dotenv≥1.0, pyyaml≥6.0, pytest≥8.0, pytest-asyncio≥0.23 -- [ ] T003 [P] Create `config.yaml` with default server/storage/video/models configuration (port 8000, buckets, max_file_size_mb 200, glm-4-flash / glm-4v-flash) -- [ ] T004 [P] Create `.env` template with required env var keys (ZHIPUAI_API_KEY, STORAGE_ACCESS_KEY, STORAGE_SECRET_KEY, STORAGE_ENDPOINT, BACKEND_CALLBACK_URL, LOG_LEVEL, MAX_VIDEO_SIZE_MB) -- [ ] T005 [P] Create `Dockerfile` (python:3.12-slim base, install requirements, expose 8000, CMD uvicorn) -- [ ] T006 [P] Create `docker-compose.yml` with ai-service and rustfs services, env_file, healthcheck (curl /health every 30s) - ---- - -## Phase 2: Foundational(核心基础设施) - -**Purpose**: Core infrastructure that MUST be complete before ANY user story can be implemented. - -**⚠️ CRITICAL**: No user story work can begin until this phase is complete. - -### Config & Core Utilities - -- [ ] T007 Implement `app/core/config.py`: load `config.yaml` with PyYAML + override via `_ENV_OVERRIDES` dict mapping env vars to nested YAML paths (including `MAX_VIDEO_SIZE_MB → video.max_file_size_mb`), expose `get_config()` with `@lru_cache` -- [ ] T008 [P] Implement `app/core/logging.py`: JSON structured logging via `logging` module, `RequestLoggingMiddleware` that logs path/status/latency, helper `get_logger(name)` -- [ ] T009 [P] Implement `app/core/exceptions.py`: custom exception classes `UnsupportedFileTypeError(400)`, `VideoTooLargeError(400)`, `StorageError(502)`, `LLMParseError(502)`, `LLMCallError(503)`, plus global exception handler that returns `{"code": ..., "message": ...}` JSON -- [ ] T010 [P] Implement `app/core/json_utils.py`: `extract_json(text) -> dict` that strips Markdown code fences (` ```json ... ``` `) before `json.loads`, raises `LLMParseError` on invalid JSON -- [ ] T011 Write `tests/test_config.py`: verify YAML defaults load correctly; verify `MAX_VIDEO_SIZE_MB=500` env var overrides `video.max_file_size_mb`; verify missing required env vars surface clear errors - -### LLM Client(大模型适配层) - -- [ ] T012 [P] Implement `app/clients/llm/base.py`: `LLMClient` ABC with abstract methods `chat(model, messages) -> str` and `chat_vision(model, messages) -> str` -- [ ] T013 Implement `app/clients/llm/zhipuai_client.py`: `ZhipuAIClient(LLMClient)` that wraps synchronous ZhipuAI SDK calls via `asyncio.get_event_loop().run_in_executor(None, ...)` in a thread pool; raise `LLMCallError` on SDK exceptions -- [ ] T014 [P] Write `tests/test_llm_client.py`: mock ZhipuAI SDK to verify `chat()` and `chat_vision()` call the SDK correctly; verify `LLMCallError` is raised on SDK exception; verify thread-pool wrapping does not block the event loop - -### Storage Client(存储适配层) - -- [ ] T015 [P] Implement `app/clients/storage/base.py`: `StorageClient` ABC with abstract methods `download_bytes(bucket, path) -> bytes`, `upload_bytes(bucket, path, data, content_type) -> None`, `get_presigned_url(bucket, path, expires) -> str`, `get_object_size(bucket, path) -> int` -- [ ] T016 Implement `app/clients/storage/rustfs_client.py`: `RustFSClient(StorageClient)` using boto3 S3 client; all calls wrapped via `run_in_executor`; `get_object_size` uses `head_object`; raise `StorageError` on `ClientError` -- [ ] T017 [P] Write `tests/test_storage_client.py`: mock boto3 S3 client; verify `download_bytes` returns correct bytes; verify `get_object_size` calls `head_object` and returns `ContentLength`; verify `StorageError` raised on S3 exception - -### FastAPI Application Entry - -- [ ] T018 Implement `app/main.py`: create FastAPI app with lifespan, register `RequestLoggingMiddleware`, register global exception handlers from `exceptions.py`, mount all routers (empty stubs initially), expose `GET /health → {"status": "ok"}` -- [ ] T019 [P] Implement `app/core/dependencies.py`: `get_llm_client() -> LLMClient` and `get_storage_client() -> StorageClient` as `@lru_cache` singletons, instantiated from `get_config()` values -- [ ] T020 Write `tests/conftest.py`: `mock_llm` fixture (AsyncMock implementing LLMClient), `mock_storage` fixture (AsyncMock implementing StorageClient with `get_object_size` returning 10MB), `test_app` fixture overriding Depends, `client` fixture using `TestClient` - -**Checkpoint**: Foundation complete — all user story phases can now begin in parallel. - ---- - -## Phase 3: User Story 1 — ADMIN 从文档中提取知识三元组 (Priority: P1) 🎯 MVP - -**Goal**: `POST /api/v1/text/extract` reads a TXT/PDF/DOCX file from RustFS, calls GLM, returns structured triples with source offsets. - -**Independent Test**: Send `{"file_path": "text/test.txt", "file_name": "test.txt"}` to the endpoint; verify response contains `items` with `subject`, `predicate`, `object`, `source_snippet`, `source_offset.start/end`. - -### Tests for User Story 1 ⚠️ Write FIRST — verify FAIL before implementing - -- [ ] T021 [P] [US1] Write `tests/test_text_service.py`: test TXT parsing returns triples; test PDF parsing (mock pdfplumber); test DOCX parsing (mock python-docx); test unsupported format raises `UnsupportedFileTypeError`; test storage failure raises `StorageError`; test LLM parse error raises `LLMParseError` - -### Implementation for User Story 1 - -- [ ] T022 [P] [US1] Create `app/models/text_models.py`: `SourceOffset(start: int, end: int)`, `TripleItem(subject, predicate, object, source_snippet, source_offset)`, `TextExtractRequest(file_path, file_name, model?, prompt_template?)`, `TextExtractResponse(items: list[TripleItem])` -- [ ] T023 [US1] Implement `app/services/text_service.py`: `extract_triples(req, llm, storage) -> TextExtractResponse`; dispatch to `_parse_txt / _parse_pdf / _parse_docx` by file extension; build prompt from content + optional `prompt_template`; call `llm.chat()`; parse JSON response via `extract_json()`; validate triple fields; raise typed exceptions -- [ ] T024 [US1] Write `tests/test_text_router.py`: POST `/api/v1/text/extract` returns 200 with items; unsupported format returns 400 with `UNSUPPORTED_FILE_TYPE`; storage error returns 502 with `STORAGE_ERROR`; LLM parse error returns 502 with `LLM_PARSE_ERROR` -- [ ] T025 [US1] Implement `app/routers/text.py`: `APIRouter(prefix="/api/v1")` with `POST /text/extract` handler that injects `storage` and `llm` via Depends, calls `text_service.extract_triples()`; register router in `app/main.py` - -**Checkpoint**: `POST /api/v1/text/extract` fully functional. Run `pytest tests/test_text_service.py tests/test_text_router.py -v` — all green. - ---- - -## Phase 4: User Story 2 — ADMIN 从图片中提取知识四元组并自动裁剪 (Priority: P1) - -**Goal**: `POST /api/v1/image/extract` downloads an image from RustFS, calls GLM-4V, crops bbox regions, uploads crops, returns quads with cropped_image_path. - -**Independent Test**: Send `{"file_path": "image/test.jpg", "task_id": 1}` to the endpoint; verify response contains `items` each with `bbox`, `qualifier`, and `cropped_image_path` matching pattern `crops/1/{n}.jpg`. - -### Tests for User Story 2 ⚠️ Write FIRST — verify FAIL before implementing - -- [ ] T026 [P] [US2] Write `tests/test_image_service.py`: test full quad extraction pipeline with mock LLM returning valid JSON; test bbox crop uses correct pixel coordinates; test out-of-bounds bbox is clamped to image dimensions; test crop upload path follows `crops/{task_id}/{index}.jpg` convention; test LLM parse error raises `LLMParseError` - -### Implementation for User Story 2 - -- [ ] T027 [P] [US2] Create `app/models/image_models.py`: `BBox(x, y, w, h: int)`, `QuadrupleItem(subject, predicate, object, qualifier?, bbox, cropped_image_path)`, `ImageExtractRequest(file_path, task_id, model?, prompt_template?)`, `ImageExtractResponse(items: list[QuadrupleItem])` -- [ ] T028 [US2] Implement `app/services/image_service.py`: `extract_quads(req, llm, storage) -> ImageExtractResponse`; download image bytes → decode with OpenCV (`cv2.imdecode`); base64 encode image for GLM-4V multimodal message; call `llm.chat_vision()`; parse JSON via `extract_json()`; for each quad, clamp bbox to image dimensions, crop with numpy slicing, encode as JPEG, upload to `crops/{task_id}/{index}.jpg`; return quads with paths -- [ ] T029 [US2] Write `tests/test_image_router.py`: POST `/api/v1/image/extract` returns 200 with items; LLM parse error returns 502; storage download failure returns 502 -- [ ] T030 [US2] Implement `app/routers/image.py`: `POST /image/extract` handler; register in `app/main.py` - -**Checkpoint**: `POST /api/v1/image/extract` fully functional. Run `pytest tests/test_image_service.py tests/test_image_router.py -v` — all green. - ---- - -## Phase 5: User Stories 3 & 4 — 视频帧提取 + 视频转文本 (Priority: P2) - -**Goal**: `POST /api/v1/video/extract-frames` and `POST /api/v1/video/to-text` immediately return 202, process video in background via FastAPI BackgroundTasks, then POST callback to Java backend with results. - -**Independent Test (US3)**: Send extract-frames request; verify immediate 202 with job_id; mock storage and callback URL; verify callback received with `status=SUCCESS` and non-empty `frames` list. - -**Independent Test (US4)**: Send to-text request with `start_sec=0, end_sec=10`; verify immediate 202; verify callback received with `status=SUCCESS` and `output_path` pointing to an uploaded text file. - -### Tests for User Stories 3 & 4 ⚠️ Write FIRST — verify FAIL before implementing - -- [ ] T031 [P] [US3] Write `tests/test_video_service.py` (frame extraction tests): generate small test video via `cv2.VideoWriter`; test interval mode extracts correct frame indices; test keyframe mode only extracts frames exceeding difference threshold; test each extracted frame is uploaded to `frames/{source_id}/{index}.jpg`; test failed extraction triggers FAILED callback with error_message -- [ ] T032 [P] [US4] Append to `tests/test_video_service.py` (to-text tests): test uniform sampling selects `frame_sample_count` frames from `[start_sec, end_sec]` window; test sampled frames are passed as base64 to `llm.chat_vision()`; test output text is uploaded to `video-text/{source_id}/{timestamp}.txt`; test LLM failure triggers FAILED callback - -### Implementation for User Stories 3 & 4 - -- [ ] T033 [US3] Create `app/models/video_models.py`: `ExtractFramesRequest(file_path, source_id, job_id, mode="interval", frame_interval=30)`, `VideoToTextRequest(file_path, source_id, job_id, start_sec, end_sec, model?, prompt_template?)`, `FrameInfo(frame_index, time_sec, frame_path)`, `VideoJobCallback(job_id, status, frames?, output_path?, error_message?)`, `VideoAcceptedResponse(message, job_id)` -- [ ] T034 [US3] Implement frame extraction in `app/services/video_service.py`: `extract_frames_task(req, llm, storage, callback_url)` background function; download video to temp file; open with `cv2.VideoCapture`; interval mode: step by `frame_interval`; keyframe mode: compute grayscale frame diff, extract when diff > threshold (default 30.0); upload each frame JPEG; POST callback with `FrameInfo` list; clean up temp file; catch all exceptions and POST FAILED callback -- [ ] T035 [US4] Implement to-text in `app/services/video_service.py`: `video_to_text_task(req, llm, storage, callback_url)` background function; download video to temp file; sample `frame_sample_count` frames uniformly within `[start_sec, end_sec]`; base64 encode frames; call `llm.chat_vision()` with all frames in one multimodal message; upload text result to `video-text/{source_id}/{timestamp}.txt`; POST callback with `output_path`; clean up temp file -- [ ] T036 [US3] Write `tests/test_video_router.py`: POST `/api/v1/video/extract-frames` returns 202 immediately; video exceeding `max_file_size_mb` returns 400 with `VIDEO_TOO_LARGE`; background task is registered (mock BackgroundTasks) -- [ ] T037 [US4] Append to `tests/test_video_router.py`: POST `/api/v1/video/to-text` returns 202; size limit applies equally -- [ ] T038 [US3] Implement `app/routers/video.py`: `_check_video_size(storage, bucket, file_path, max_mb)` helper that calls `storage.get_object_size()` and raises `VideoTooLargeError`; `POST /video/extract-frames` and `POST /video/to-text` handlers check size then enqueue background task; register router in `app/main.py` - -**Checkpoint**: Both video endpoints fully functional. Run `pytest tests/test_video_service.py tests/test_video_router.py -v` — all green. - ---- - -## Phase 6: User Stories 5 & 6 — 文本QA生成 + 图像QA生成 (Priority: P2) - -**Goal**: `POST /api/v1/qa/gen-text` generates QA pairs from text triples; `POST /api/v1/qa/gen-image` generates multimodal QA pairs from image quads (images fetched and base64-encoded internally). - -**Independent Test (US5)**: Send `{"items": [{"subject":"变压器","predicate":"额定电压","object":"110kV","source_snippet":"..."}]}` to gen-text; verify response contains `pairs` with non-empty `question` and `answer`. - -**Independent Test (US6)**: Send `{"items": [{"subject":"...","cropped_image_path":"crops/1/0.jpg",...}]}` to gen-image; verify response contains `pairs` with `image_path` matching `crops/1/0.jpg`. - -### Tests for User Stories 5 & 6 ⚠️ Write FIRST — verify FAIL before implementing - -- [ ] T039 [P] [US5] Write `tests/test_qa_service.py` (text QA tests): test triples are formatted into prompt correctly; test LLM response JSON is parsed into `QAPair` list; test `LLMParseError` on malformed LLM response; test `LLMCallError` propagates correctly -- [ ] T040 [P] [US6] Append to `tests/test_qa_service.py` (image QA tests): test storage downloads cropped image and encodes as base64 before LLM call; test multimodal message includes both text (quad info) and inline image data URI; test `StorageError` on failed image download - -### Implementation for User Stories 5 & 6 - -- [ ] T041 [P] [US5] Create `app/models/qa_models.py`: `TextQAItem(subject, predicate, object, source_snippet)`, `GenTextQARequest(items, model?, prompt_template?)`, `QAPair(question, answer)`, `ImageQAItem(subject, predicate, object, qualifier?, cropped_image_path)`, `GenImageQARequest(items, model?, prompt_template?)`, `ImageQAPair(question, answer, image_path)`, `TextQAResponse(pairs)`, `ImageQAResponse(pairs)` -- [ ] T042 [US5] Implement `gen_text_qa(req, llm) -> TextQAResponse` in `app/services/qa_service.py`: format all triples + source snippets into a single batch prompt; call `llm.chat()`; parse JSON array via `extract_json()`; return `QAPair` list -- [ ] T043 [US6] Implement `gen_image_qa(req, llm, storage) -> ImageQAResponse` in `app/services/qa_service.py`: for each `ImageQAItem`, download `cropped_image_path` bytes from `source-data` bucket; base64 encode; build multimodal message with quad text + `data:image/jpeg;base64,...` inline URL; call `llm.chat_vision()`; parse JSON; return `ImageQAPair` with `image_path = item.cropped_image_path` -- [ ] T044 [US5] Write `tests/test_qa_router.py`: POST `/api/v1/qa/gen-text` returns 200 with pairs; POST `/api/v1/qa/gen-image` returns 200 with pairs including image_path; LLM errors return 502/503 -- [ ] T045 [US5] Implement `app/routers/qa.py`: `POST /qa/gen-text` and `POST /qa/gen-image` handlers; register router in `app/main.py` - -**Checkpoint**: Both QA endpoints fully functional. Run `pytest tests/test_qa_service.py tests/test_qa_router.py -v` — all green. - ---- - -## Phase 7: User Stories 7 & 8 — 微调任务管理 + 健康检查 (Priority: P3) - -**Goal**: `POST /api/v1/finetune/start` submits a ZhipuAI fine-tune job; `GET /api/v1/finetune/status/{jobId}` queries its state; `GET /health` returns service liveness. - -**Independent Test (US7)**: Call `POST /finetune/start` with mock LLM returning a job ID; then call `GET /finetune/status/{jobId}`; verify `status` is one of `RUNNING/SUCCESS/FAILED` and `progress` is an integer. - -**Independent Test (US8)**: `GET /health` returns `{"status": "ok"}` with HTTP 200 in under 1 second. - -### Tests for User Stories 7 & 8 ⚠️ Write FIRST — verify FAIL before implementing - -- [ ] T046 [P] [US7] Write `tests/test_finetune_service.py`: test `submit_finetune()` calls ZhipuAI finetune API with correct params and returns job_id; test `get_status()` maps ZhipuAI `"running"→RUNNING`, `"succeeded"→SUCCESS`, `"failed"→FAILED`, unknown status→RUNNING (conservative); test `LLMCallError` on SDK failure -- [ ] T047 [P] [US8] Write health check test in `tests/test_finetune_router.py` (or new `tests/test_health.py`): `GET /health` returns 200 with `{"status": "ok"}` - -### Implementation for User Stories 7 & 8 - -- [ ] T048 [P] [US7] Create `app/models/finetune_models.py`: `FinetuneStartRequest(jsonl_url, base_model, hyperparams?)`, `FinetuneStartResponse(job_id)`, `FinetuneStatusResponse(job_id, status, progress?, error_message?)` -- [ ] T049 [US7] Implement `app/services/finetune_service.py`: `submit_finetune(req, llm) -> FinetuneStartResponse` calls ZhipuAI fine-tune create API via `run_in_executor`; `get_finetune_status(job_id, llm) -> FinetuneStatusResponse` calls ZhipuAI fine-tune retrieve API and maps status strings; raise `LLMCallError` on failure -- [ ] T050 [US7] Write `tests/test_finetune_router.py`: `POST /api/v1/finetune/start` returns 200 with job_id; `GET /api/v1/finetune/status/{jobId}` returns 200 with status fields; unknown job_id propagates error response -- [ ] T051 [US7] Implement `app/routers/finetune.py`: `POST /finetune/start` and `GET /finetune/status/{job_id}` handlers; register router in `app/main.py` - -**Checkpoint**: All 8 user stories complete. Run `pytest tests/ -v` — all green. - ---- - -## Phase 8: Polish & Cross-Cutting Concerns - -**Purpose**: Final integration, documentation verification, and deployment readiness. - -- [ ] T052 [P] Create `.gitignore` for Python project (`.env`, `__pycache__/`, `*.pyc`, `.pytest_cache/`, `tmp/` for video temp files) -- [ ] T053 Run full test suite `conda run -n label pytest tests/ -v --cov=app --cov-report=term-missing` and fix any remaining failures or coverage gaps -- [ ] T054 [P] Verify Swagger/OpenAPI docs at `http://localhost:8000/docs` show all 9 endpoints with correct request/response schemas -- [ ] T055 Validate quickstart.md end-to-end: `conda activate label && pip install -r requirements.txt && conda run -n label uvicorn app.main:app --reload` starts cleanly; `GET /health` returns 200; `docker-compose up -d` builds and healthcheck passes - ---- - -## Dependencies & Execution Order - -### Phase Dependencies - -``` -Phase 1 (Setup) - └─→ Phase 2 (Foundational) ← BLOCKS everything - ├─→ Phase 3 (US1, P1) ─┐ - ├─→ Phase 4 (US2, P1) ─┤ Can run in parallel after Phase 2 - ├─→ Phase 5 (US3+4, P2)─┤ - ├─→ Phase 6 (US5+6, P2)─┤ - └─→ Phase 7 (US7+8, P3)─┘ - └─→ Phase 8 (Polish) -``` - -### User Story Dependencies - -| Story | Priority | Depends On | Blocking | -|-------|----------|-----------|---------| -| US1 (文本三元组) | P1 | Phase 2 only | Nothing | -| US2 (图像四元组) | P1 | Phase 2 only | US6 (shares image downloading pattern) | -| US3 (视频帧提取) | P2 | Phase 2 only | Nothing | -| US4 (视频转文本) | P2 | Phase 2, US3 (shares video_service.py) | Nothing | -| US5 (文本QA) | P2 | Phase 2 only | Nothing | -| US6 (图像QA) | P2 | Phase 2 only | Nothing | -| US7 (微调管理) | P3 | Phase 2 only | Nothing | -| US8 (健康检查) | P3 | T018 (main.py) | Nothing | - -### Within Each User Story - -1. Tests MUST be written first and verified to **FAIL** before implementation -2. Models → Services → Routers (in dependency order) -3. Register router in `main.py` after router file is complete -4. Run story-specific tests before marking story done - -### Parallel Opportunities - -All tasks marked `[P]` within a phase can run concurrently (different files): -- **Phase 2**: T008, T009, T010 (core utilities) + T012, T014 (LLM) + T015, T017 (Storage) + T019 (dependencies) -- **Phase 3**: T021 (tests) and T022 (models) can start together -- **Phase 4**: T026 (tests) and T027 (models) can start together -- **Phase 5**: T031 (US3 tests) and T032 (US4 tests) can start together -- **Phase 6**: T039 (US5 tests) and T040, T041 (US6 tests + models) can start together -- **Phase 7**: T046, T047, T048 can start together - ---- - -## Parallel Example: Phase 2 Foundational - -```bash -# Kick off these in parallel (all different files): -[T008] app/core/logging.py -[T009] app/core/exceptions.py -[T010] app/core/json_utils.py -[T012] app/clients/llm/base.py -[T014] tests/test_llm_client.py -[T015] app/clients/storage/base.py -[T017] tests/test_storage_client.py -[T019] app/core/dependencies.py - -# Then in sequence (each depends on previous): -[T007] app/core/config.py → [T011] tests/test_config.py -[T013] app/clients/llm/zhipuai_client.py (needs T012) -[T016] app/clients/storage/rustfs_client.py (needs T015) -[T018] app/main.py (needs T009, T008) -[T020] tests/conftest.py (needs T018, T013, T016) -``` - ---- - -## Implementation Strategy - -### MVP First (US1 + US2 — P1 Stories Only) - -1. Complete Phase 1: Setup -2. Complete Phase 2: Foundational (CRITICAL — blocks all stories) -3. Complete Phase 3: US1 (文本三元组提取) → validate independently -4. Complete Phase 4: US2 (图像四元组提取) → validate independently -5. **STOP and DEMO**: Core extraction pipeline is production-ready - -### Incremental Delivery - -``` -Phase 1+2 complete → Foundation ready (commit) -Phase 3 complete → Text extraction works (commit, demo) -Phase 4 complete → Image extraction works (commit, demo) -Phase 5 complete → Video processing works (commit, demo) -Phase 6 complete → QA generation works (commit, demo) -Phase 7 complete → Fine-tune management (commit, demo) -Phase 8 complete → Production-ready (tag release) -``` - -### Parallel Team Strategy - -With two developers after Phase 2 completes: -- **Dev A**: US1 (text) → US5 (text QA) → US7 (finetune) -- **Dev B**: US2 (image) → US6 (image QA) → US3+US4 (video) - ---- - -## Summary - -| Phase | Tasks | User Story | Priority | -|-------|-------|-----------|---------| -| Phase 1: Setup | T001–T006 (6) | — | — | -| Phase 2: Foundational | T007–T020 (14) | — | — | -| Phase 3 | T021–T025 (5) | US1 文本三元组 | P1 🎯 MVP | -| Phase 4 | T026–T030 (5) | US2 图像四元组 | P1 | -| Phase 5 | T031–T038 (8) | US3+US4 视频处理 | P2 | -| Phase 6 | T039–T045 (7) | US5+US6 QA生成 | P2 | -| Phase 7 | T046–T051 (6) | US7+US8 微调+健康检查 | P3 | -| Phase 8: Polish | T052–T055 (4) | — | — | -| **Total** | **55 tasks** | **8 user stories** | | - ---- - -## Notes - -- `[P]` tasks = different files, no shared dependencies within the same phase -- `[US?]` label maps each task to its user story for traceability -- Tests in `tests/conftest.py` (T020) use `AsyncMock` — no real ZhipuAI or RustFS calls in unit tests -- Video tasks use a real small video file generated by `cv2.VideoWriter` in tests — no external media needed -- All config is loaded via `get_config()` — never hardcode model names or bucket names in services -- Commit after each phase checkpoint at minimum; commit after each task for clean git history -- Stop at any checkpoint to validate the story independently before proceeding diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index ae81f4d..0000000 --- a/tests/conftest.py +++ /dev/null @@ -1,39 +0,0 @@ -import pytest -from unittest.mock import AsyncMock, MagicMock -from fastapi.testclient import TestClient - -from app.clients.llm.base import LLMClient -from app.clients.storage.base import StorageClient -from app.core.dependencies import get_llm_client, get_storage_client - - -@pytest.fixture -def mock_llm() -> LLMClient: - client = MagicMock(spec=LLMClient) - client.chat = AsyncMock(return_value='[]') - client.chat_vision = AsyncMock(return_value='[]') - return client - - -@pytest.fixture -def mock_storage() -> StorageClient: - client = MagicMock(spec=StorageClient) - client.download_bytes = AsyncMock(return_value=b"") - client.upload_bytes = AsyncMock(return_value=None) - client.get_presigned_url = AsyncMock(return_value="http://example.com/presigned") - client.get_object_size = AsyncMock(return_value=10 * 1024 * 1024) # 10 MB default - return client - - -@pytest.fixture -def test_app(mock_llm, mock_storage): - from app.main import app - app.dependency_overrides[get_llm_client] = lambda: mock_llm - app.dependency_overrides[get_storage_client] = lambda: mock_storage - yield app - app.dependency_overrides.clear() - - -@pytest.fixture -def client(test_app): - return TestClient(test_app) diff --git a/tests/test_config.py b/tests/test_config.py deleted file mode 100644 index aa8f464..0000000 --- a/tests/test_config.py +++ /dev/null @@ -1,40 +0,0 @@ -import os -import pytest - - -def test_yaml_defaults_load(monkeypatch): - # Clear lru_cache so each test gets a fresh load - from app.core import config as cfg_module - cfg_module.get_config.cache_clear() - - # Remove env overrides that might bleed from shell environment - for var in ["MAX_VIDEO_SIZE_MB", "LOG_LEVEL", "STORAGE_ENDPOINT"]: - monkeypatch.delenv(var, raising=False) - - cfg = cfg_module.get_config() - - assert cfg["server"]["port"] == 8000 - assert cfg["video"]["max_file_size_mb"] == 200 - assert cfg["models"]["default_text"] == "glm-4-flash" - assert cfg["models"]["default_vision"] == "glm-4v-flash" - assert cfg["storage"]["buckets"]["source_data"] == "source-data" - - -def test_max_video_size_env_override(monkeypatch): - from app.core import config as cfg_module - cfg_module.get_config.cache_clear() - - monkeypatch.setenv("MAX_VIDEO_SIZE_MB", "500") - cfg = cfg_module.get_config() - - assert cfg["video"]["max_file_size_mb"] == 500 - - -def test_log_level_env_override(monkeypatch): - from app.core import config as cfg_module - cfg_module.get_config.cache_clear() - - monkeypatch.setenv("LOG_LEVEL", "DEBUG") - cfg = cfg_module.get_config() - - assert cfg["server"]["log_level"] == "DEBUG" diff --git a/tests/test_finetune_router.py b/tests/test_finetune_router.py deleted file mode 100644 index 6678195..0000000 --- a/tests/test_finetune_router.py +++ /dev/null @@ -1,112 +0,0 @@ -"""T050: Integration tests for finetune router endpoints.""" -import pytest -from unittest.mock import MagicMock, patch - -from app.core.exceptions import LLMCallError -from app.models.finetune_models import FinetuneStartResponse, FinetuneStatusResponse - - -# --------------------------------------------------------------------------- -# POST /api/v1/finetune/start -# --------------------------------------------------------------------------- - -def test_finetune_start_returns_200_with_job_id(client): - start_resp = FinetuneStartResponse(job_id="glm-ft-router-test") - - with patch("app.routers.finetune.finetune_service.submit_finetune") as mock_submit: - mock_submit.return_value = start_resp - - resp = client.post( - "/api/v1/finetune/start", - json={ - "jsonl_url": "s3://bucket/train.jsonl", - "base_model": "glm-4", - "hyperparams": {"n_epochs": 3}, - }, - ) - - assert resp.status_code == 200 - data = resp.json() - assert data["job_id"] == "glm-ft-router-test" - - -def test_finetune_start_without_hyperparams(client): - start_resp = FinetuneStartResponse(job_id="glm-ft-nohp") - - with patch("app.routers.finetune.finetune_service.submit_finetune") as mock_submit: - mock_submit.return_value = start_resp - - resp = client.post( - "/api/v1/finetune/start", - json={ - "jsonl_url": "s3://bucket/train.jsonl", - "base_model": "glm-4", - }, - ) - - assert resp.status_code == 200 - assert resp.json()["job_id"] == "glm-ft-nohp" - - -def test_finetune_start_llm_call_error_returns_503(client): - with patch("app.routers.finetune.finetune_service.submit_finetune") as mock_submit: - mock_submit.side_effect = LLMCallError("SDK failed") - - resp = client.post( - "/api/v1/finetune/start", - json={ - "jsonl_url": "s3://bucket/train.jsonl", - "base_model": "glm-4", - }, - ) - - assert resp.status_code == 503 - assert resp.json()["code"] == "LLM_CALL_ERROR" - - -# --------------------------------------------------------------------------- -# GET /api/v1/finetune/status/{job_id} -# --------------------------------------------------------------------------- - -def test_finetune_status_returns_200_with_fields(client): - status_resp = FinetuneStatusResponse( - job_id="glm-ft-router-test", - status="RUNNING", - progress=30, - ) - - with patch("app.routers.finetune.finetune_service.get_finetune_status") as mock_status: - mock_status.return_value = status_resp - - resp = client.get("/api/v1/finetune/status/glm-ft-router-test") - - assert resp.status_code == 200 - data = resp.json() - assert data["job_id"] == "glm-ft-router-test" - assert data["status"] == "RUNNING" - assert data["progress"] == 30 - - -def test_finetune_status_succeeded(client): - status_resp = FinetuneStatusResponse( - job_id="glm-ft-done", - status="SUCCESS", - ) - - with patch("app.routers.finetune.finetune_service.get_finetune_status") as mock_status: - mock_status.return_value = status_resp - - resp = client.get("/api/v1/finetune/status/glm-ft-done") - - assert resp.status_code == 200 - assert resp.json()["status"] == "SUCCESS" - - -def test_finetune_status_llm_call_error_returns_503(client): - with patch("app.routers.finetune.finetune_service.get_finetune_status") as mock_status: - mock_status.side_effect = LLMCallError("SDK failed") - - resp = client.get("/api/v1/finetune/status/glm-ft-bad") - - assert resp.status_code == 503 - assert resp.json()["code"] == "LLM_CALL_ERROR" diff --git a/tests/test_finetune_service.py b/tests/test_finetune_service.py deleted file mode 100644 index 51d93dd..0000000 --- a/tests/test_finetune_service.py +++ /dev/null @@ -1,151 +0,0 @@ -"""Tests for finetune_service — uses LLMClient interface (no internal SDK access).""" -import pytest -from unittest.mock import MagicMock, AsyncMock - -from app.clients.llm.base import LLMClient -from app.core.exceptions import LLMCallError -from app.models.finetune_models import ( - FinetuneStartRequest, - FinetuneStartResponse, - FinetuneStatusResponse, -) - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -def _make_llm(job_id: str = "glm-ft-test", status: str = "running", progress: int | None = None): - """Return a MagicMock(spec=LLMClient) with submit_finetune and get_finetune_status as AsyncMocks.""" - llm = MagicMock(spec=LLMClient) - llm.submit_finetune = AsyncMock(return_value=job_id) - llm.get_finetune_status = AsyncMock(return_value={ - "job_id": job_id, - "status": status, - "progress": progress, - "error_message": None, - }) - return llm - - -# --------------------------------------------------------------------------- -# submit_finetune -# --------------------------------------------------------------------------- - -@pytest.mark.asyncio -async def test_submit_finetune_returns_job_id(): - from app.services.finetune_service import submit_finetune - - llm = _make_llm(job_id="glm-ft-abc123") - req = FinetuneStartRequest( - jsonl_url="s3://bucket/train.jsonl", - base_model="glm-4", - hyperparams={"n_epochs": 3}, - ) - - result = await submit_finetune(req, llm) - - assert isinstance(result, FinetuneStartResponse) - assert result.job_id == "glm-ft-abc123" - - -@pytest.mark.asyncio -async def test_submit_finetune_calls_interface_with_correct_params(): - from app.services.finetune_service import submit_finetune - - llm = _make_llm(job_id="glm-ft-xyz") - req = FinetuneStartRequest( - jsonl_url="s3://bucket/train.jsonl", - base_model="glm-4", - hyperparams={"n_epochs": 5}, - ) - - await submit_finetune(req, llm) - - llm.submit_finetune.assert_awaited_once_with( - "s3://bucket/train.jsonl", - "glm-4", - {"n_epochs": 5}, - ) - - -@pytest.mark.asyncio -async def test_submit_finetune_none_hyperparams_passes_empty_dict(): - """hyperparams=None should be passed as {} to the interface.""" - from app.services.finetune_service import submit_finetune - - llm = _make_llm(job_id="glm-ft-nohp") - req = FinetuneStartRequest( - jsonl_url="s3://bucket/train.jsonl", - base_model="glm-4", - ) - - await submit_finetune(req, llm) - - llm.submit_finetune.assert_awaited_once_with( - "s3://bucket/train.jsonl", - "glm-4", - {}, - ) - - -@pytest.mark.asyncio -async def test_submit_finetune_raises_llm_call_error_on_failure(): - from app.services.finetune_service import submit_finetune - - llm = MagicMock(spec=LLMClient) - llm.submit_finetune = AsyncMock(side_effect=LLMCallError("微调任务提交失败: SDK exploded")) - - req = FinetuneStartRequest( - jsonl_url="s3://bucket/train.jsonl", - base_model="glm-4", - ) - - with pytest.raises(LLMCallError): - await submit_finetune(req, llm) - - -# --------------------------------------------------------------------------- -# get_finetune_status — status mapping -# --------------------------------------------------------------------------- - -@pytest.mark.asyncio -@pytest.mark.parametrize("sdk_status,expected", [ - ("running", "RUNNING"), - ("succeeded", "SUCCESS"), - ("failed", "FAILED"), - ("pending", "RUNNING"), # unknown → conservative RUNNING - ("queued", "RUNNING"), # unknown → conservative RUNNING - ("cancelled", "RUNNING"), # unknown → conservative RUNNING -]) -async def test_get_finetune_status_maps_status(sdk_status, expected): - from app.services.finetune_service import get_finetune_status - - llm = _make_llm(status=sdk_status) - - result = await get_finetune_status("glm-ft-test", llm) - - assert isinstance(result, FinetuneStatusResponse) - assert result.status == expected - assert result.job_id == "glm-ft-test" - - -@pytest.mark.asyncio -async def test_get_finetune_status_includes_progress(): - from app.services.finetune_service import get_finetune_status - - llm = _make_llm(status="running", progress=42) - result = await get_finetune_status("glm-ft-test", llm) - - assert result.progress == 42 - - -@pytest.mark.asyncio -async def test_get_finetune_status_raises_llm_call_error_on_failure(): - from app.services.finetune_service import get_finetune_status - - llm = MagicMock(spec=LLMClient) - llm.get_finetune_status = AsyncMock(side_effect=LLMCallError("查询微调任务失败: SDK exploded")) - - with pytest.raises(LLMCallError): - await get_finetune_status("glm-ft-bad", llm) diff --git a/tests/test_health.py b/tests/test_health.py deleted file mode 100644 index 0f2b3e4..0000000 --- a/tests/test_health.py +++ /dev/null @@ -1,8 +0,0 @@ -"""T047: Health check endpoint test — GET /health → 200 {"status": "ok"}""" -from fastapi.testclient import TestClient - - -def test_health_returns_ok(client: TestClient): - response = client.get("/health") - assert response.status_code == 200 - assert response.json() == {"status": "ok"} diff --git a/tests/test_image_router.py b/tests/test_image_router.py deleted file mode 100644 index e98ce31..0000000 --- a/tests/test_image_router.py +++ /dev/null @@ -1,63 +0,0 @@ -import json -import numpy as np -import cv2 -import pytest -from unittest.mock import AsyncMock - -from app.core.exceptions import StorageError - - -def _make_test_image_bytes() -> bytes: - img = np.zeros((80, 100, 3), dtype=np.uint8) - _, buf = cv2.imencode(".jpg", img) - return buf.tobytes() - - -SAMPLE_QUADS_JSON = json.dumps([ - { - "subject": "电缆接头", - "predicate": "位于", - "object": "配电箱左侧", - "qualifier": "2024年检修", - "bbox": {"x": 5, "y": 5, "w": 20, "h": 15}, - } -]) - - -def test_image_extract_returns_200(client, mock_llm, mock_storage): - mock_storage.download_bytes = AsyncMock(return_value=_make_test_image_bytes()) - mock_llm.chat_vision = AsyncMock(return_value=SAMPLE_QUADS_JSON) - mock_storage.upload_bytes = AsyncMock(return_value=None) - - resp = client.post( - "/api/v1/image/extract", - json={"file_path": "image/test.jpg", "task_id": 1}, - ) - assert resp.status_code == 200 - data = resp.json() - assert "items" in data - assert data["items"][0]["subject"] == "电缆接头" - assert data["items"][0]["cropped_image_path"] == "crops/1/0.jpg" - - -def test_image_extract_llm_parse_error_returns_502(client, mock_llm, mock_storage): - mock_storage.download_bytes = AsyncMock(return_value=_make_test_image_bytes()) - mock_llm.chat_vision = AsyncMock(return_value="not json {{") - - resp = client.post( - "/api/v1/image/extract", - json={"file_path": "image/test.jpg", "task_id": 1}, - ) - assert resp.status_code == 502 - assert resp.json()["code"] == "LLM_PARSE_ERROR" - - -def test_image_extract_storage_error_returns_502(client, mock_storage): - mock_storage.download_bytes = AsyncMock(side_effect=StorageError("RustFS down")) - - resp = client.post( - "/api/v1/image/extract", - json={"file_path": "image/test.jpg", "task_id": 1}, - ) - assert resp.status_code == 502 - assert resp.json()["code"] == "STORAGE_ERROR" diff --git a/tests/test_image_service.py b/tests/test_image_service.py deleted file mode 100644 index ee6e8ae..0000000 --- a/tests/test_image_service.py +++ /dev/null @@ -1,102 +0,0 @@ -import io -import json -import pytest -import numpy as np -import cv2 -from unittest.mock import AsyncMock - -from app.core.exceptions import LLMParseError -from app.models.image_models import ImageExtractRequest - - -def _make_test_image_bytes(width=100, height=80) -> bytes: - img = np.zeros((height, width, 3), dtype=np.uint8) - img[10:50, 10:60] = (255, 0, 0) # blue rectangle - _, buf = cv2.imencode(".jpg", img) - return buf.tobytes() - - -SAMPLE_QUADS_JSON = json.dumps([ - { - "subject": "电缆接头", - "predicate": "位于", - "object": "配电箱左侧", - "qualifier": "2024年检修", - "bbox": {"x": 10, "y": 10, "w": 40, "h": 30}, - } -]) - - -@pytest.fixture -def image_bytes(): - return _make_test_image_bytes() - - -@pytest.fixture -def req(): - return ImageExtractRequest(file_path="image/test.jpg", task_id=1) - - -@pytest.mark.asyncio -async def test_extract_quads_returns_items(mock_llm, mock_storage, image_bytes, req): - mock_storage.download_bytes = AsyncMock(return_value=image_bytes) - mock_llm.chat_vision = AsyncMock(return_value=SAMPLE_QUADS_JSON) - mock_storage.upload_bytes = AsyncMock(return_value=None) - - from app.services.image_service import extract_quads - result = await extract_quads(req, mock_llm, mock_storage) - - assert len(result.items) == 1 - item = result.items[0] - assert item.subject == "电缆接头" - assert item.predicate == "位于" - assert item.bbox.x == 10 - assert item.bbox.y == 10 - assert item.cropped_image_path == "crops/1/0.jpg" - - -@pytest.mark.asyncio -async def test_crop_is_uploaded(mock_llm, mock_storage, image_bytes, req): - mock_storage.download_bytes = AsyncMock(return_value=image_bytes) - mock_llm.chat_vision = AsyncMock(return_value=SAMPLE_QUADS_JSON) - mock_storage.upload_bytes = AsyncMock(return_value=None) - - from app.services.image_service import extract_quads - await extract_quads(req, mock_llm, mock_storage) - - # upload_bytes called once for the crop - mock_storage.upload_bytes.assert_called_once() - call_args = mock_storage.upload_bytes.call_args - assert call_args.args[1] == "crops/1/0.jpg" - - -@pytest.mark.asyncio -async def test_out_of_bounds_bbox_is_clamped(mock_llm, mock_storage, req): - img = _make_test_image_bytes(width=50, height=40) - mock_storage.download_bytes = AsyncMock(return_value=img) - - # bbox goes outside image boundary - oob_json = json.dumps([{ - "subject": "test", - "predicate": "rel", - "object": "obj", - "qualifier": None, - "bbox": {"x": 30, "y": 20, "w": 100, "h": 100}, # extends beyond 50x40 - }]) - mock_llm.chat_vision = AsyncMock(return_value=oob_json) - mock_storage.upload_bytes = AsyncMock(return_value=None) - - from app.services.image_service import extract_quads - # Should not raise; bbox is clamped - result = await extract_quads(req, mock_llm, mock_storage) - assert len(result.items) == 1 - - -@pytest.mark.asyncio -async def test_llm_parse_error_raised(mock_llm, mock_storage, image_bytes, req): - mock_storage.download_bytes = AsyncMock(return_value=image_bytes) - mock_llm.chat_vision = AsyncMock(return_value="bad json {{") - - from app.services.image_service import extract_quads - with pytest.raises(LLMParseError): - await extract_quads(req, mock_llm, mock_storage) diff --git a/tests/test_llm_client.py b/tests/test_llm_client.py deleted file mode 100644 index e5d0734..0000000 --- a/tests/test_llm_client.py +++ /dev/null @@ -1,81 +0,0 @@ -import pytest -from unittest.mock import MagicMock, patch - -from app.clients.llm.zhipuai_client import ZhipuAIClient -from app.core.exceptions import LLMCallError - - -@pytest.fixture -def mock_sdk_response(): - resp = MagicMock() - resp.choices[0].message.content = '{"result": "ok"}' - return resp - - -@pytest.fixture -def client(): - with patch("app.clients.llm.zhipuai_client.ZhipuAI"): - c = ZhipuAIClient(api_key="test-key") - return c - - -@pytest.mark.asyncio -async def test_chat_returns_content(client, mock_sdk_response): - client._client.chat.completions.create.return_value = mock_sdk_response - result = await client.chat("glm-4-flash", [{"role": "user", "content": "hello"}]) - assert result == '{"result": "ok"}' - - -@pytest.mark.asyncio -async def test_chat_vision_returns_content(client, mock_sdk_response): - client._client.chat.completions.create.return_value = mock_sdk_response - result = await client.chat_vision("glm-4v-flash", [{"role": "user", "content": []}]) - assert result == '{"result": "ok"}' - - -@pytest.mark.asyncio -async def test_llm_call_error_on_sdk_exception(client): - client._client.chat.completions.create.side_effect = RuntimeError("quota exceeded") - with pytest.raises(LLMCallError, match="大模型调用失败"): - await client.chat("glm-4-flash", [{"role": "user", "content": "hi"}]) - - -@pytest.mark.asyncio -async def test_submit_finetune_returns_job_id(client): - """submit_finetune should call the SDK and return the job id.""" - resp = MagicMock() - resp.id = "glm-ft-newjob" - client._client.fine_tuning.jobs.create.return_value = resp - - job_id = await client.submit_finetune( - jsonl_url="s3://bucket/train.jsonl", - base_model="glm-4", - hyperparams={"n_epochs": 2}, - ) - - assert job_id == "glm-ft-newjob" - client._client.fine_tuning.jobs.create.assert_called_once_with( - training_file="s3://bucket/train.jsonl", - model="glm-4", - hyperparameters={"n_epochs": 2}, - ) - - -@pytest.mark.asyncio -async def test_get_finetune_status_returns_correct_dict(client): - """get_finetune_status should return a normalized dict with progress coerced to int.""" - resp = MagicMock() - resp.id = "glm-ft-abc" - resp.status = "running" - resp.progress = "75" # SDK may return string; should be coerced to int - resp.error_message = None - client._client.fine_tuning.jobs.retrieve.return_value = resp - - result = await client.get_finetune_status("glm-ft-abc") - - assert result == { - "job_id": "glm-ft-abc", - "status": "running", - "progress": 75, - "error_message": None, - } diff --git a/tests/test_qa_router.py b/tests/test_qa_router.py deleted file mode 100644 index 8f82575..0000000 --- a/tests/test_qa_router.py +++ /dev/null @@ -1,121 +0,0 @@ -"""Tests for QA router: /api/v1/qa/gen-text and /api/v1/qa/gen-image.""" -import json -import pytest -from unittest.mock import AsyncMock - -from app.core.exceptions import LLMCallError, LLMParseError, StorageError - - -SAMPLE_QA_JSON = json.dumps([ - {"question": "电缆接头位于哪里?", "answer": "配电箱左侧"}, -]) - -FAKE_IMAGE_BYTES = b"\xff\xd8\xff\xe0fake_jpeg_content" - -TEXT_QA_PAYLOAD = { - "items": [ - { - "subject": "电缆接头", - "predicate": "位于", - "object": "配电箱左侧", - "source_snippet": "电缆接头位于配电箱左侧", - } - ] -} - -IMAGE_QA_PAYLOAD = { - "items": [ - { - "subject": "电缆接头", - "predicate": "位于", - "object": "配电箱左侧", - "cropped_image_path": "crops/1/0.jpg", - } - ] -} - - -# --------------------------------------------------------------------------- -# POST /api/v1/qa/gen-text -# --------------------------------------------------------------------------- - - -def test_gen_text_qa_returns_200(client, mock_llm): - mock_llm.chat = AsyncMock(return_value=SAMPLE_QA_JSON) - - resp = client.post("/api/v1/qa/gen-text", json=TEXT_QA_PAYLOAD) - - assert resp.status_code == 200 - data = resp.json() - assert "pairs" in data - assert len(data["pairs"]) == 1 - assert data["pairs"][0]["question"] == "电缆接头位于哪里?" - assert data["pairs"][0]["answer"] == "配电箱左侧" - - -def test_gen_text_qa_llm_parse_error_returns_502(client, mock_llm): - mock_llm.chat = AsyncMock(return_value="not valid json {{") - - resp = client.post("/api/v1/qa/gen-text", json=TEXT_QA_PAYLOAD) - - assert resp.status_code == 502 - assert resp.json()["code"] == "LLM_PARSE_ERROR" - - -def test_gen_text_qa_llm_call_error_returns_503(client, mock_llm): - mock_llm.chat = AsyncMock(side_effect=LLMCallError("GLM timeout")) - - resp = client.post("/api/v1/qa/gen-text", json=TEXT_QA_PAYLOAD) - - assert resp.status_code == 503 - assert resp.json()["code"] == "LLM_CALL_ERROR" - - -# --------------------------------------------------------------------------- -# POST /api/v1/qa/gen-image -# --------------------------------------------------------------------------- - - -def test_gen_image_qa_returns_200(client, mock_llm, mock_storage): - mock_storage.download_bytes = AsyncMock(return_value=FAKE_IMAGE_BYTES) - mock_llm.chat_vision = AsyncMock(return_value=SAMPLE_QA_JSON) - - resp = client.post("/api/v1/qa/gen-image", json=IMAGE_QA_PAYLOAD) - - assert resp.status_code == 200 - data = resp.json() - assert "pairs" in data - assert len(data["pairs"]) == 1 - pair = data["pairs"][0] - assert pair["question"] == "电缆接头位于哪里?" - assert pair["answer"] == "配电箱左侧" - assert pair["image_path"] == "crops/1/0.jpg" - - -def test_gen_image_qa_llm_parse_error_returns_502(client, mock_llm, mock_storage): - mock_storage.download_bytes = AsyncMock(return_value=FAKE_IMAGE_BYTES) - mock_llm.chat_vision = AsyncMock(return_value="bad json {{") - - resp = client.post("/api/v1/qa/gen-image", json=IMAGE_QA_PAYLOAD) - - assert resp.status_code == 502 - assert resp.json()["code"] == "LLM_PARSE_ERROR" - - -def test_gen_image_qa_llm_call_error_returns_503(client, mock_llm, mock_storage): - mock_storage.download_bytes = AsyncMock(return_value=FAKE_IMAGE_BYTES) - mock_llm.chat_vision = AsyncMock(side_effect=LLMCallError("GLM vision timeout")) - - resp = client.post("/api/v1/qa/gen-image", json=IMAGE_QA_PAYLOAD) - - assert resp.status_code == 503 - assert resp.json()["code"] == "LLM_CALL_ERROR" - - -def test_gen_image_qa_storage_error_returns_502(client, mock_storage): - mock_storage.download_bytes = AsyncMock(side_effect=StorageError("RustFS down")) - - resp = client.post("/api/v1/qa/gen-image", json=IMAGE_QA_PAYLOAD) - - assert resp.status_code == 502 - assert resp.json()["code"] == "STORAGE_ERROR" diff --git a/tests/test_qa_service.py b/tests/test_qa_service.py deleted file mode 100644 index 7a6e258..0000000 --- a/tests/test_qa_service.py +++ /dev/null @@ -1,236 +0,0 @@ -"""Tests for qa_service: text QA (US5) and image QA (US6).""" -import base64 -import json -import pytest -from unittest.mock import AsyncMock - -from app.core.exceptions import LLMCallError, LLMParseError, StorageError - - -# --------------------------------------------------------------------------- -# Shared fixtures / helpers -# --------------------------------------------------------------------------- - -SAMPLE_QA_JSON = json.dumps([ - {"question": "电缆接头位于哪里?", "answer": "配电箱左侧"}, -]) - - -# --------------------------------------------------------------------------- -# T039 — Text QA service tests (US5) -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_gen_text_qa_prompt_contains_triples(mock_llm): - """Triple fields and source_snippet must appear in the message sent to LLM.""" - from app.models.qa_models import GenTextQARequest, TextQAItem - from app.services.qa_service import gen_text_qa - - mock_llm.chat = AsyncMock(return_value=SAMPLE_QA_JSON) - - req = GenTextQARequest(items=[ - TextQAItem( - subject="电缆接头", - predicate="位于", - object="配电箱左侧", - source_snippet="电缆接头位于配电箱左侧", - ) - ]) - - await gen_text_qa(req, mock_llm) - - assert mock_llm.chat.called - call_args = mock_llm.chat.call_args - messages = call_args.args[1] if call_args.args else call_args.kwargs["messages"] - prompt_text = messages[0]["content"] - assert "电缆接头" in prompt_text - assert "位于" in prompt_text - assert "配电箱左侧" in prompt_text - assert "电缆接头位于配电箱左侧" in prompt_text - - -@pytest.mark.asyncio -async def test_gen_text_qa_returns_qa_pair_list(mock_llm): - """Parsed JSON must be returned as QAPair list.""" - from app.models.qa_models import GenTextQARequest, QAPair, TextQAItem - from app.services.qa_service import gen_text_qa - - mock_llm.chat = AsyncMock(return_value=SAMPLE_QA_JSON) - - req = GenTextQARequest(items=[ - TextQAItem( - subject="电缆接头", - predicate="位于", - object="配电箱左侧", - source_snippet="电缆接头位于配电箱左侧", - ) - ]) - - result = await gen_text_qa(req, mock_llm) - - assert len(result.pairs) == 1 - pair = result.pairs[0] - assert isinstance(pair, QAPair) - assert pair.question == "电缆接头位于哪里?" - assert pair.answer == "配电箱左侧" - - -@pytest.mark.asyncio -async def test_gen_text_qa_llm_parse_error_on_malformed_response(mock_llm): - """LLMParseError must be raised when LLM returns non-JSON.""" - from app.models.qa_models import GenTextQARequest, TextQAItem - from app.services.qa_service import gen_text_qa - - mock_llm.chat = AsyncMock(return_value="this is not json {{") - - req = GenTextQARequest(items=[ - TextQAItem(subject="s", predicate="p", object="o", source_snippet="snip") - ]) - - with pytest.raises(LLMParseError): - await gen_text_qa(req, mock_llm) - - -@pytest.mark.asyncio -async def test_gen_text_qa_llm_call_error_propagates(mock_llm): - """LLMCallError from LLM client must propagate unchanged.""" - from app.models.qa_models import GenTextQARequest, TextQAItem - from app.services.qa_service import gen_text_qa - - mock_llm.chat = AsyncMock(side_effect=LLMCallError("GLM timeout")) - - req = GenTextQARequest(items=[ - TextQAItem(subject="s", predicate="p", object="o", source_snippet="snip") - ]) - - with pytest.raises(LLMCallError): - await gen_text_qa(req, mock_llm) - - -# --------------------------------------------------------------------------- -# T040 — Image QA service tests (US6) -# --------------------------------------------------------------------------- - -FAKE_IMAGE_BYTES = b"\xff\xd8\xff\xe0fake_jpeg_content" - - -@pytest.mark.asyncio -async def test_gen_image_qa_downloads_image_and_encodes_base64(mock_llm, mock_storage): - """Storage.download_bytes must be called, result base64-encoded in LLM message.""" - from app.models.qa_models import GenImageQARequest, ImageQAItem - from app.services.qa_service import gen_image_qa - - mock_storage.download_bytes = AsyncMock(return_value=FAKE_IMAGE_BYTES) - mock_llm.chat_vision = AsyncMock(return_value=SAMPLE_QA_JSON) - - req = GenImageQARequest(items=[ - ImageQAItem( - subject="电缆接头", - predicate="位于", - object="配电箱左侧", - cropped_image_path="crops/1/0.jpg", - ) - ]) - - await gen_image_qa(req, mock_llm, mock_storage) - - # Storage download must have been called with the correct path - mock_storage.download_bytes.assert_called_once() - call_args = mock_storage.download_bytes.call_args - path_arg = call_args.args[1] if len(call_args.args) > 1 else call_args.kwargs.get("path", call_args.kwargs.get("key")) - assert path_arg == "crops/1/0.jpg" - - -@pytest.mark.asyncio -async def test_gen_image_qa_multimodal_message_format(mock_llm, mock_storage): - """Multimodal message must contain inline base64 image_url and text.""" - from app.models.qa_models import GenImageQARequest, ImageQAItem - from app.services.qa_service import gen_image_qa - - mock_storage.download_bytes = AsyncMock(return_value=FAKE_IMAGE_BYTES) - mock_llm.chat_vision = AsyncMock(return_value=SAMPLE_QA_JSON) - - req = GenImageQARequest(items=[ - ImageQAItem( - subject="电缆接头", - predicate="位于", - object="配电箱左侧", - qualifier="2024检修", - cropped_image_path="crops/1/0.jpg", - ) - ]) - - await gen_image_qa(req, mock_llm, mock_storage) - - assert mock_llm.chat_vision.called - call_args = mock_llm.chat_vision.call_args - messages = call_args.args[1] if call_args.args else call_args.kwargs["messages"] - - # Find the content list in messages - content = messages[0]["content"] - assert isinstance(content, list) - - # Must have an image_url part with inline base64 data URI - image_parts = [p for p in content if p.get("type") == "image_url"] - assert len(image_parts) >= 1 - url = image_parts[0]["image_url"]["url"] - expected_b64 = base64.b64encode(FAKE_IMAGE_BYTES).decode() - assert url == f"data:image/jpeg;base64,{expected_b64}" - - # Must have a text part containing quad info - text_parts = [p for p in content if p.get("type") == "text"] - assert len(text_parts) >= 1 - text = text_parts[0]["text"] - assert "电缆接头" in text - assert "位于" in text - assert "配电箱左侧" in text - - -@pytest.mark.asyncio -async def test_gen_image_qa_returns_image_qa_pair_with_image_path(mock_llm, mock_storage): - """Result ImageQAPair must include image_path from the item.""" - from app.models.qa_models import GenImageQARequest, ImageQAItem, ImageQAPair - from app.services.qa_service import gen_image_qa - - mock_storage.download_bytes = AsyncMock(return_value=FAKE_IMAGE_BYTES) - mock_llm.chat_vision = AsyncMock(return_value=SAMPLE_QA_JSON) - - req = GenImageQARequest(items=[ - ImageQAItem( - subject="电缆接头", - predicate="位于", - object="配电箱左侧", - cropped_image_path="crops/1/0.jpg", - ) - ]) - - result = await gen_image_qa(req, mock_llm, mock_storage) - - assert len(result.pairs) == 1 - pair = result.pairs[0] - assert isinstance(pair, ImageQAPair) - assert pair.question == "电缆接头位于哪里?" - assert pair.answer == "配电箱左侧" - assert pair.image_path == "crops/1/0.jpg" - - -@pytest.mark.asyncio -async def test_gen_image_qa_storage_error_propagates(mock_llm, mock_storage): - """StorageError from download must propagate unchanged.""" - from app.models.qa_models import GenImageQARequest, ImageQAItem - from app.services.qa_service import gen_image_qa - - mock_storage.download_bytes = AsyncMock(side_effect=StorageError("RustFS down")) - - req = GenImageQARequest(items=[ - ImageQAItem( - subject="s", - predicate="p", - object="o", - cropped_image_path="crops/1/0.jpg", - ) - ]) - - with pytest.raises(StorageError): - await gen_image_qa(req, mock_llm, mock_storage) diff --git a/tests/test_storage_client.py b/tests/test_storage_client.py deleted file mode 100644 index d124563..0000000 --- a/tests/test_storage_client.py +++ /dev/null @@ -1,62 +0,0 @@ -import pytest -from unittest.mock import MagicMock, patch -from botocore.exceptions import ClientError - -from app.clients.storage.rustfs_client import RustFSClient -from app.core.exceptions import StorageError - - -@pytest.fixture -def client(): - with patch("app.clients.storage.rustfs_client.boto3") as mock_boto3: - c = RustFSClient( - endpoint="http://rustfs:9000", - access_key="key", - secret_key="secret", - ) - c._s3 = MagicMock() - return c - - -@pytest.mark.asyncio -async def test_download_bytes_returns_bytes(client): - client._s3.get_object.return_value = {"Body": MagicMock(read=lambda: b"hello")} - result = await client.download_bytes("source-data", "text/test.txt") - assert result == b"hello" - client._s3.get_object.assert_called_once_with(Bucket="source-data", Key="text/test.txt") - - -@pytest.mark.asyncio -async def test_download_bytes_raises_storage_error(client): - client._s3.get_object.side_effect = ClientError( - {"Error": {"Code": "NoSuchKey", "Message": "Not Found"}}, "GetObject" - ) - with pytest.raises(StorageError, match="存储下载失败"): - await client.download_bytes("source-data", "missing.txt") - - -@pytest.mark.asyncio -async def test_get_object_size_returns_content_length(client): - client._s3.head_object.return_value = {"ContentLength": 1024} - size = await client.get_object_size("source-data", "video/test.mp4") - assert size == 1024 - client._s3.head_object.assert_called_once_with(Bucket="source-data", Key="video/test.mp4") - - -@pytest.mark.asyncio -async def test_get_object_size_raises_storage_error(client): - client._s3.head_object.side_effect = ClientError( - {"Error": {"Code": "NoSuchKey", "Message": "Not Found"}}, "HeadObject" - ) - with pytest.raises(StorageError, match="获取文件大小失败"): - await client.get_object_size("source-data", "video/missing.mp4") - - -@pytest.mark.asyncio -async def test_upload_bytes_calls_put_object(client): - client._s3.put_object.return_value = {} - await client.upload_bytes("source-data", "frames/1/0.jpg", b"jpeg-data", "image/jpeg") - client._s3.put_object.assert_called_once() - call_kwargs = client._s3.put_object.call_args - assert call_kwargs.kwargs["Bucket"] == "source-data" - assert call_kwargs.kwargs["Key"] == "frames/1/0.jpg" diff --git a/tests/test_text_router.py b/tests/test_text_router.py deleted file mode 100644 index cbee082..0000000 --- a/tests/test_text_router.py +++ /dev/null @@ -1,63 +0,0 @@ -import pytest -from unittest.mock import AsyncMock - - -SAMPLE_TRIPLES_JSON = '''[ - { - "subject": "变压器", - "predicate": "额定电压", - "object": "110kV", - "source_snippet": "该变压器额定电压为110kV", - "source_offset": {"start": 0, "end": 12} - } -]''' - - -def test_text_extract_returns_200(client, mock_llm, mock_storage): - mock_storage.download_bytes = AsyncMock(return_value=b"some text content") - mock_llm.chat = AsyncMock(return_value=SAMPLE_TRIPLES_JSON) - - resp = client.post( - "/api/v1/text/extract", - json={"file_path": "text/test.txt", "file_name": "test.txt"}, - ) - assert resp.status_code == 200 - data = resp.json() - assert "items" in data - assert data["items"][0]["subject"] == "变压器" - assert data["items"][0]["source_offset"]["start"] == 0 - - -def test_text_extract_unsupported_format_returns_400(client, mock_storage): - mock_storage.download_bytes = AsyncMock(return_value=b"data") - - resp = client.post( - "/api/v1/text/extract", - json={"file_path": "text/test.xlsx", "file_name": "data.xlsx"}, - ) - assert resp.status_code == 400 - assert resp.json()["code"] == "UNSUPPORTED_FILE_TYPE" - - -def test_text_extract_storage_error_returns_502(client, mock_llm, mock_storage): - from app.core.exceptions import StorageError - mock_storage.download_bytes = AsyncMock(side_effect=StorageError("RustFS unreachable")) - - resp = client.post( - "/api/v1/text/extract", - json={"file_path": "text/test.txt", "file_name": "test.txt"}, - ) - assert resp.status_code == 502 - assert resp.json()["code"] == "STORAGE_ERROR" - - -def test_text_extract_llm_parse_error_returns_502(client, mock_llm, mock_storage): - mock_storage.download_bytes = AsyncMock(return_value=b"content") - mock_llm.chat = AsyncMock(return_value="not json {{{{") - - resp = client.post( - "/api/v1/text/extract", - json={"file_path": "text/test.txt", "file_name": "test.txt"}, - ) - assert resp.status_code == 502 - assert resp.json()["code"] == "LLM_PARSE_ERROR" diff --git a/tests/test_text_service.py b/tests/test_text_service.py deleted file mode 100644 index 4f85e45..0000000 --- a/tests/test_text_service.py +++ /dev/null @@ -1,122 +0,0 @@ -import pytest -from unittest.mock import AsyncMock, MagicMock - -from app.core.exceptions import LLMParseError, StorageError, UnsupportedFileTypeError -from app.models.text_models import TextExtractRequest - - -SAMPLE_TRIPLES_JSON = '''[ - { - "subject": "变压器", - "predicate": "额定电压", - "object": "110kV", - "source_snippet": "该变压器额定电压为110kV", - "source_offset": {"start": 0, "end": 12} - } -]''' - - -@pytest.fixture -def req_txt(): - return TextExtractRequest(file_path="text/test.txt", file_name="test.txt") - - -@pytest.fixture -def req_pdf(): - return TextExtractRequest(file_path="text/test.pdf", file_name="report.pdf") - - -@pytest.fixture -def req_docx(): - return TextExtractRequest(file_path="text/test.docx", file_name="doc.docx") - - -@pytest.fixture -def llm(mock_llm): - mock_llm.chat = AsyncMock(return_value=SAMPLE_TRIPLES_JSON) - return mock_llm - - -@pytest.mark.asyncio -async def test_txt_extraction_returns_triples(llm, mock_storage): - mock_storage.download_bytes = AsyncMock(return_value=b"test content") - from app.services.text_service import extract_triples - req = TextExtractRequest(file_path="text/test.txt", file_name="test.txt") - result = await extract_triples(req, llm, mock_storage) - assert len(result.items) == 1 - assert result.items[0].subject == "变压器" - assert result.items[0].predicate == "额定电压" - assert result.items[0].object == "110kV" - assert result.items[0].source_offset.start == 0 - assert result.items[0].source_offset.end == 12 - - -@pytest.mark.asyncio -async def test_pdf_extraction(llm, mock_storage, tmp_path): - import pdfplumber, io - # We mock download_bytes to return a minimal PDF-like response - # and mock pdfplumber.open to return pages with text - mock_storage.download_bytes = AsyncMock(return_value=b"%PDF fake") - - with pytest.MonkeyPatch().context() as mp: - mock_page = MagicMock() - mock_page.extract_text.return_value = "PDF content here" - mock_pdf = MagicMock() - mock_pdf.__enter__ = lambda s: s - mock_pdf.__exit__ = MagicMock(return_value=False) - mock_pdf.pages = [mock_page] - mp.setattr("pdfplumber.open", lambda f: mock_pdf) - - from app.services import text_service - import importlib - importlib.reload(text_service) - req = TextExtractRequest(file_path="text/test.pdf", file_name="doc.pdf") - result = await text_service.extract_triples(req, llm, mock_storage) - assert len(result.items) == 1 - - -@pytest.mark.asyncio -async def test_docx_extraction(llm, mock_storage): - mock_storage.download_bytes = AsyncMock(return_value=b"PK fake docx bytes") - - with pytest.MonkeyPatch().context() as mp: - mock_para = MagicMock() - mock_para.text = "Word paragraph content" - mock_doc = MagicMock() - mock_doc.paragraphs = [mock_para] - mp.setattr("docx.Document", lambda f: mock_doc) - - from app.services import text_service - import importlib - importlib.reload(text_service) - req = TextExtractRequest(file_path="text/test.docx", file_name="doc.docx") - result = await text_service.extract_triples(req, llm, mock_storage) - assert len(result.items) == 1 - - -@pytest.mark.asyncio -async def test_unsupported_format_raises_error(llm, mock_storage): - mock_storage.download_bytes = AsyncMock(return_value=b"data") - from app.services.text_service import extract_triples - req = TextExtractRequest(file_path="text/test.xlsx", file_name="data.xlsx") - with pytest.raises(UnsupportedFileTypeError): - await extract_triples(req, llm, mock_storage) - - -@pytest.mark.asyncio -async def test_storage_error_propagates(llm, mock_storage): - mock_storage.download_bytes = AsyncMock(side_effect=StorageError("not found")) - from app.services.text_service import extract_triples - req = TextExtractRequest(file_path="text/test.txt", file_name="test.txt") - with pytest.raises(StorageError): - await extract_triples(req, llm, mock_storage) - - -@pytest.mark.asyncio -async def test_llm_parse_error_propagates(mock_llm, mock_storage): - mock_storage.download_bytes = AsyncMock(return_value=b"content") - mock_llm.chat = AsyncMock(return_value="not json {{") - from app.services.text_service import extract_triples - req = TextExtractRequest(file_path="text/test.txt", file_name="test.txt") - with pytest.raises(LLMParseError): - await extract_triples(req, mock_llm, mock_storage) diff --git a/tests/test_video_router.py b/tests/test_video_router.py deleted file mode 100644 index 703dc40..0000000 --- a/tests/test_video_router.py +++ /dev/null @@ -1,71 +0,0 @@ -import pytest -from unittest.mock import AsyncMock, patch - -from app.core.exceptions import VideoTooLargeError - - -def test_extract_frames_returns_202(client, mock_storage): - mock_storage.get_object_size = AsyncMock(return_value=10 * 1024 * 1024) # 10 MB - - with patch("app.routers.video.BackgroundTasks.add_task"): - resp = client.post( - "/api/v1/video/extract-frames", - json={ - "file_path": "video/test.mp4", - "source_id": 10, - "job_id": 42, - }, - ) - assert resp.status_code == 202 - data = resp.json() - assert data["job_id"] == 42 - - -def test_extract_frames_video_too_large_returns_400(client, mock_storage): - mock_storage.get_object_size = AsyncMock(return_value=300 * 1024 * 1024) # 300 MB > 200 MB - - resp = client.post( - "/api/v1/video/extract-frames", - json={ - "file_path": "video/big.mp4", - "source_id": 10, - "job_id": 99, - }, - ) - assert resp.status_code == 400 - assert resp.json()["code"] == "VIDEO_TOO_LARGE" - - -def test_video_to_text_returns_202(client, mock_storage): - mock_storage.get_object_size = AsyncMock(return_value=10 * 1024 * 1024) - - with patch("app.routers.video.BackgroundTasks.add_task"): - resp = client.post( - "/api/v1/video/to-text", - json={ - "file_path": "video/test.mp4", - "source_id": 10, - "job_id": 43, - "start_sec": 0, - "end_sec": 60, - }, - ) - assert resp.status_code == 202 - assert resp.json()["job_id"] == 43 - - -def test_video_to_text_too_large_returns_400(client, mock_storage): - mock_storage.get_object_size = AsyncMock(return_value=300 * 1024 * 1024) - - resp = client.post( - "/api/v1/video/to-text", - json={ - "file_path": "video/big.mp4", - "source_id": 10, - "job_id": 99, - "start_sec": 0, - "end_sec": 60, - }, - ) - assert resp.status_code == 400 - assert resp.json()["code"] == "VIDEO_TOO_LARGE" diff --git a/tests/test_video_service.py b/tests/test_video_service.py deleted file mode 100644 index 3e33483..0000000 --- a/tests/test_video_service.py +++ /dev/null @@ -1,195 +0,0 @@ -import io -import json -import os -import tempfile -import pytest -import numpy as np -import cv2 -from unittest.mock import AsyncMock, MagicMock, patch - -from app.models.video_models import ExtractFramesRequest, VideoToTextRequest - - -def _make_test_video(path: str, num_frames: int = 10, fps: float = 10.0, width=64, height=64): - """Write a small test video to `path` using cv2.VideoWriter.""" - fourcc = cv2.VideoWriter_fourcc(*"mp4v") - out = cv2.VideoWriter(path, fourcc, fps, (width, height)) - for i in range(num_frames): - frame = np.full((height, width, 3), (i * 20) % 256, dtype=np.uint8) - out.write(frame) - out.release() - - -# ── US3: Frame Extraction ────────────────────────────────────────────────────── - -@pytest.fixture -def frames_req(): - return ExtractFramesRequest( - file_path="video/test.mp4", - source_id=10, - job_id=42, - mode="interval", - frame_interval=3, - ) - - -@pytest.mark.asyncio -async def test_interval_mode_extracts_correct_frames(mock_storage, frames_req, tmp_path): - video_path = str(tmp_path / "test.mp4") - _make_test_video(video_path, num_frames=10, fps=10.0) - - with open(video_path, "rb") as f: - video_bytes = f.read() - - mock_storage.download_bytes = AsyncMock(return_value=video_bytes) - mock_storage.upload_bytes = AsyncMock(return_value=None) - - callback_payloads = [] - - async def fake_callback(url, payload): - callback_payloads.append(payload) - - with patch("app.services.video_service._post_callback", new=fake_callback): - from app.services.video_service import extract_frames_task - await extract_frames_task(frames_req, mock_storage, "http://backend/callback") - - assert len(callback_payloads) == 1 - cb = callback_payloads[0] - assert cb["status"] == "SUCCESS" - assert cb["job_id"] == 42 - # With 10 frames and interval=3, we expect frames at indices 0, 3, 6, 9 → 4 frames - assert len(cb["frames"]) == 4 - - -@pytest.mark.asyncio -async def test_keyframe_mode_extracts_scene_changes(mock_storage, tmp_path): - video_path = str(tmp_path / "kf.mp4") - # Create video with 2 distinct scenes separated by sudden color change - fourcc = cv2.VideoWriter_fourcc(*"mp4v") - out = cv2.VideoWriter(video_path, fourcc, 10.0, (64, 64)) - for _ in range(5): - out.write(np.zeros((64, 64, 3), dtype=np.uint8)) # black frames - for _ in range(5): - out.write(np.full((64, 64, 3), 200, dtype=np.uint8)) # bright frames - out.release() - - with open(video_path, "rb") as f: - video_bytes = f.read() - - mock_storage.download_bytes = AsyncMock(return_value=video_bytes) - mock_storage.upload_bytes = AsyncMock(return_value=None) - - callback_payloads = [] - - async def fake_callback(url, payload): - callback_payloads.append(payload) - - req = ExtractFramesRequest( - file_path="video/kf.mp4", - source_id=10, - job_id=43, - mode="keyframe", - ) - with patch("app.services.video_service._post_callback", new=fake_callback): - from app.services.video_service import extract_frames_task - await extract_frames_task(req, mock_storage, "http://backend/callback") - - cb = callback_payloads[0] - assert cb["status"] == "SUCCESS" - # Should capture at least the scene-change frame - assert len(cb["frames"]) >= 1 - - -@pytest.mark.asyncio -async def test_frame_upload_path_convention(mock_storage, frames_req, tmp_path): - video_path = str(tmp_path / "test.mp4") - _make_test_video(video_path, num_frames=3, fps=10.0) - with open(video_path, "rb") as f: - mock_storage.download_bytes = AsyncMock(return_value=f.read()) - mock_storage.upload_bytes = AsyncMock(return_value=None) - - callback_payloads = [] - async def fake_callback(url, payload): - callback_payloads.append(payload) - - req = ExtractFramesRequest( - file_path="video/test.mp4", source_id=10, job_id=99, mode="interval", frame_interval=1 - ) - with patch("app.services.video_service._post_callback", new=fake_callback): - from app.services.video_service import extract_frames_task - await extract_frames_task(req, mock_storage, "http://backend/callback") - - uploaded_paths = [call.args[1] for call in mock_storage.upload_bytes.call_args_list] - for i, path in enumerate(uploaded_paths): - assert path == f"frames/10/{i}.jpg" - - -@pytest.mark.asyncio -async def test_failed_extraction_sends_failed_callback(mock_storage, frames_req): - mock_storage.download_bytes = AsyncMock(side_effect=Exception("storage failure")) - - callback_payloads = [] - async def fake_callback(url, payload): - callback_payloads.append(payload) - - with patch("app.services.video_service._post_callback", new=fake_callback): - from app.services.video_service import extract_frames_task - await extract_frames_task(frames_req, mock_storage, "http://backend/callback") - - assert callback_payloads[0]["status"] == "FAILED" - assert callback_payloads[0]["error_message"] is not None - - -# ── US4: Video To Text ───────────────────────────────────────────────────────── - -@pytest.fixture -def totext_req(): - return VideoToTextRequest( - file_path="video/test.mp4", - source_id=10, - job_id=44, - start_sec=0.0, - end_sec=1.0, - ) - - -@pytest.mark.asyncio -async def test_video_to_text_samples_frames_and_calls_llm(mock_llm, mock_storage, totext_req, tmp_path): - video_path = str(tmp_path / "totext.mp4") - _make_test_video(video_path, num_frames=20, fps=10.0) - with open(video_path, "rb") as f: - mock_storage.download_bytes = AsyncMock(return_value=f.read()) - mock_llm.chat_vision = AsyncMock(return_value="视频描述内容") - mock_storage.upload_bytes = AsyncMock(return_value=None) - - callback_payloads = [] - async def fake_callback(url, payload): - callback_payloads.append(payload) - - with patch("app.services.video_service._post_callback", new=fake_callback): - from app.services.video_service import video_to_text_task - await video_to_text_task(totext_req, mock_llm, mock_storage, "http://backend/callback") - - assert callback_payloads[0]["status"] == "SUCCESS" - assert "output_path" in callback_payloads[0] - assert callback_payloads[0]["output_path"].startswith("video-text/10/") - mock_llm.chat_vision.assert_called_once() - - -@pytest.mark.asyncio -async def test_video_to_text_llm_failure_sends_failed_callback(mock_llm, mock_storage, totext_req, tmp_path): - video_path = str(tmp_path / "fail.mp4") - _make_test_video(video_path, num_frames=5, fps=10.0) - with open(video_path, "rb") as f: - mock_storage.download_bytes = AsyncMock(return_value=f.read()) - mock_llm.chat_vision = AsyncMock(side_effect=Exception("LLM unavailable")) - - callback_payloads = [] - async def fake_callback(url, payload): - callback_payloads.append(payload) - - with patch("app.services.video_service._post_callback", new=fake_callback): - from app.services.video_service import video_to_text_task - await video_to_text_task(totext_req, mock_llm, mock_storage, "http://backend/callback") - - assert callback_payloads[0]["status"] == "FAILED"