From 68567b98b38ebf6e9d6aa8b20c05719fe073574e Mon Sep 17 00:00:00 2001
From: wh <wanghao@qq.com>
Date: Wed, 15 Apr 2026 11:12:50 +0800
Subject: [PATCH] =?UTF-8?q?=E5=81=9C=E6=AD=A2=E8=B7=9F=E8=B8=AA=E6=96=87?=
 =?UTF-8?q?=E6=A1=A3=E5=92=8C=E5=8D=95=E5=85=83=E6=B5=8B=E8=AF=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CLAUDE.md                                     |   30 -
 .../plans/2026-04-10-ai-service-impl.md       | 3004 -----------------
 .../specs/2026-04-10-ai-service-design.md     |  835 -----
 pytest.ini                                    |    3 -
 .../checklists/requirements.md                |   38 -
 .../contracts/api.md                          |  333 --
 .../001-ai-service-requirements/data-model.md |  167 -
 specs/001-ai-service-requirements/plan.md     |  120 -
 .../001-ai-service-requirements/quickstart.md |  109 -
 specs/001-ai-service-requirements/research.md |   76 -
 specs/001-ai-service-requirements/spec.md     |  258 --
 specs/001-ai-service-requirements/tasks.md    |  318 --
 tests/__init__.py                             |    0
 tests/conftest.py                             |   39 -
 tests/test_config.py                          |   40 -
 tests/test_finetune_router.py                 |  112 -
 tests/test_finetune_service.py                |  151 -
 tests/test_health.py                          |    8 -
 tests/test_image_router.py                    |   63 -
 tests/test_image_service.py                   |  102 -
 tests/test_llm_client.py                      |   81 -
 tests/test_qa_router.py                       |  121 -
 tests/test_qa_service.py                      |  236 --
 tests/test_storage_client.py                  |   62 -
 tests/test_text_router.py                     |   63 -
 tests/test_text_service.py                    |  122 -
 tests/test_video_router.py                    |   71 -
 tests/test_video_service.py                   |  195 --
 28 files changed, 6757 deletions(-)
 delete mode 100644 CLAUDE.md
 delete mode 100644 docs/superpowers/plans/2026-04-10-ai-service-impl.md
 delete mode 100644 docs/superpowers/specs/2026-04-10-ai-service-design.md
 delete mode 100644 pytest.ini
 delete mode 100644 specs/001-ai-service-requirements/checklists/requirements.md
 delete mode 100644 specs/001-ai-service-requirements/contracts/api.md
 delete mode 100644 specs/001-ai-service-requirements/data-model.md
 delete mode 100644 specs/001-ai-service-requirements/plan.md
 delete mode 100644 specs/001-ai-service-requirements/quickstart.md
 delete mode 100644 specs/001-ai-service-requirements/research.md
 delete mode 100644 specs/001-ai-service-requirements/spec.md
 delete mode 100644 specs/001-ai-service-requirements/tasks.md
 delete mode 100644 tests/__init__.py
 delete mode 100644 tests/conftest.py
 delete mode 100644 tests/test_config.py
 delete mode 100644 tests/test_finetune_router.py
 delete mode 100644 tests/test_finetune_service.py
 delete mode 100644 tests/test_health.py
 delete mode 100644 tests/test_image_router.py
 delete mode 100644 tests/test_image_service.py
 delete mode 100644 tests/test_llm_client.py
 delete mode 100644 tests/test_qa_router.py
 delete mode 100644 tests/test_qa_service.py
 delete mode 100644 tests/test_storage_client.py
 delete mode 100644 tests/test_text_router.py
 delete mode 100644 tests/test_text_service.py
 delete mode 100644 tests/test_video_router.py
 delete mode 100644 tests/test_video_service.py

diff --git a/CLAUDE.md b/CLAUDE.md
deleted file mode 100644
index c635081..0000000
--- a/CLAUDE.md
+++ /dev/null
@@ -1,30 +0,0 @@
-﻿# label_ai_service Development Guidelines
-
-Auto-generated from all feature plans. Last updated: 2026-04-10
-
-## Active Technologies
-
-- Python 3.12.13（conda `label` 环境） + FastAPI ≥0.111, uvicorn[standard] ≥0.29, pydantic ≥2.7, zhipuai ≥2.1, boto3 ≥1.34, pdfplumber ≥0.11, python-docx ≥1.1, opencv-python-headless ≥4.9, numpy ≥1.26, httpx ≥0.27, python-dotenv ≥1.0, pyyaml ≥6.0 (001-ai-service-requirements)
-
-## Project Structure
-
-```text
-backend/
-frontend/
-tests/
-```
-
-## Commands
-
-cd src; pytest; ruff check .
-
-## Code Style
-
-Python 3.12.13（conda `label` 环境）: Follow standard conventions
-
-## Recent Changes
-
-- 001-ai-service-requirements: Added Python 3.12.13（conda `label` 环境） + FastAPI ≥0.111, uvicorn[standard] ≥0.29, pydantic ≥2.7, zhipuai ≥2.1, boto3 ≥1.34, pdfplumber ≥0.11, python-docx ≥1.1, opencv-python-headless ≥4.9, numpy ≥1.26, httpx ≥0.27, python-dotenv ≥1.0, pyyaml ≥6.0
-
-<!-- MANUAL ADDITIONS START -->
-<!-- MANUAL ADDITIONS END -->
diff --git a/docs/superpowers/plans/2026-04-10-ai-service-impl.md b/docs/superpowers/plans/2026-04-10-ai-service-impl.md
deleted file mode 100644
index eaff9ee..0000000
--- a/docs/superpowers/plans/2026-04-10-ai-service-impl.md
+++ /dev/null
@@ -1,3004 +0,0 @@
-# AI Service Implementation Plan
-
-> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
-
-**Goal:** 实现 label_ai_service，一个 Python FastAPI 服务，为知识图谱标注平台提供文本三元组提取、图像四元组提取、视频处理、问答对生成和 GLM 微调管理能力。
-
-**Architecture:** 分层架构：routers（HTTP 入口）→ services（业务逻辑）→ clients（外部适配层）。LLMClient 和 StorageClient 均为 ABC，当前分别实现 ZhipuAIClient 和 RustFSClient，通过 FastAPI Depends 注入，services 层不感知具体实现。视频任务用 FastAPI BackgroundTasks 异步执行，完成后回调 Java 后端。
-
-**Tech Stack:** Python 3.12（conda `label` 环境），FastAPI，ZhipuAI SDK，boto3（S3），OpenCV，pdfplumber，python-docx，httpx，pytest
-
----
-
-## Task 1: 项目脚手架
-
-**Files:**
-- Create: `app/__init__.py`
-- Create: `app/core/__init__.py`
-- Create: `app/clients/__init__.py`
-- Create: `app/clients/llm/__init__.py`
-- Create: `app/clients/storage/__init__.py`
-- Create: `app/services/__init__.py`
-- Create: `app/routers/__init__.py`
-- Create: `app/models/__init__.py`
-- Create: `tests/__init__.py`
-- Create: `tests/conftest.py`
-- Create: `config.yaml`
-- Create: `.env`
-- Create: `requirements.txt`
-
-- [ ] **Step 1: 创建包目录结构**
-
-```bash
-mkdir -p app/core app/clients/llm app/clients/storage app/services app/routers app/models tests
-touch app/__init__.py app/core/__init__.py
-touch app/clients/__init__.py app/clients/llm/__init__.py app/clients/storage/__init__.py
-touch app/services/__init__.py app/routers/__init__.py app/models/__init__.py
-touch tests/__init__.py
-```
-
-- [ ] **Step 2: 创建 `config.yaml`**
-
-```yaml
-server:
-  port: 8000
-  log_level: INFO
-
-storage:
-  buckets:
-    source_data: "source-data"
-    finetune_export: "finetune-export"
-
-backend: {}
-
-video:
-  frame_sample_count: 8
-  max_file_size_mb: 200
-
-models:
-  default_text: "glm-4-flash"
-  default_vision: "glm-4v-flash"
-```
-
-- [ ] **Step 3: 创建 `.env`**
-
-```ini
-ZHIPUAI_API_KEY=your-zhipuai-api-key
-STORAGE_ACCESS_KEY=minioadmin
-STORAGE_SECRET_KEY=minioadmin
-STORAGE_ENDPOINT=http://rustfs:9000
-BACKEND_CALLBACK_URL=http://backend:8080/internal/video-job/callback
-# MAX_VIDEO_SIZE_MB=200   # 可选，覆盖 config.yaml 中的视频大小上限
-```
-
-- [ ] **Step 4: 创建 `requirements.txt`**
-
-```
-fastapi>=0.111
-uvicorn[standard]>=0.29
-pydantic>=2.7
-python-dotenv>=1.0
-pyyaml>=6.0
-zhipuai>=2.1
-boto3>=1.34
-pdfplumber>=0.11
-python-docx>=1.1
-opencv-python-headless>=4.9
-numpy>=1.26
-httpx>=0.27
-pytest>=8.0
-pytest-asyncio>=0.23
-```
-
-- [ ] **Step 5: 创建 `tests/conftest.py`**
-
-```python
-import pytest
-from unittest.mock import AsyncMock, MagicMock
-from app.clients.llm.base import LLMClient
-from app.clients.storage.base import StorageClient
-
-
-@pytest.fixture
-def mock_llm():
-    client = MagicMock(spec=LLMClient)
-    client.chat = AsyncMock()
-    client.chat_vision = AsyncMock()
-    return client
-
-
-@pytest.fixture
-def mock_storage():
-    client = MagicMock(spec=StorageClient)
-    client.download_bytes = AsyncMock()
-    client.upload_bytes = AsyncMock()
-    client.get_presigned_url = MagicMock(return_value="https://example.com/presigned/crop.jpg")
-    client.get_object_size = AsyncMock(return_value=10 * 1024 * 1024)  # 默认 10MB，小于限制
-    return client
-```
-
-- [ ] **Step 6: 安装依赖**
-
-```bash
-conda run -n label pip install -r requirements.txt
-```
-
-Expected: 所有包安装成功，无错误
-
-- [ ] **Step 7: Commit**
-
-```bash
-git add app/ tests/ config.yaml .env requirements.txt
-git commit -m "feat: project scaffold - directory structure and config files"
-```
-
----
-
-## Task 2: Core Config 模块
-
-**Files:**
-- Create: `app/core/config.py`
-- Create: `tests/test_config.py`
-
-- [ ] **Step 1: 编写失败测试**
-
-`tests/test_config.py`:
-
-```python
-import pytest
-from unittest.mock import patch, mock_open
-from app.core.config import get_config
-
-MOCK_YAML = """
-server:
-  port: 8000
-  log_level: INFO
-storage:
-  buckets:
-    source_data: "source-data"
-    finetune_export: "finetune-export"
-backend: {}
-video:
-  frame_sample_count: 8
-models:
-  default_text: "glm-4-flash"
-  default_vision: "glm-4v-flash"
-"""
-
-
-def _fresh_config(monkeypatch, extra_env: dict = None):
-    """每次测试前清除 lru_cache，设置环境变量。"""
-    get_config.cache_clear()
-    base_env = {
-        "ZHIPUAI_API_KEY": "test-key",
-        "STORAGE_ACCESS_KEY": "test-access",
-        "STORAGE_SECRET_KEY": "test-secret",
-        "STORAGE_ENDPOINT": "http://localhost:9000",
-        "BACKEND_CALLBACK_URL": "http://localhost:8080/callback",
-    }
-    if extra_env:
-        base_env.update(extra_env)
-    for k, v in base_env.items():
-        monkeypatch.setenv(k, v)
-
-
-def test_env_overrides_yaml(monkeypatch):
-    _fresh_config(monkeypatch)
-    with patch("builtins.open", mock_open(read_data=MOCK_YAML)):
-        with patch("app.core.config.load_dotenv"):
-            cfg = get_config()
-    assert cfg["zhipuai"]["api_key"] == "test-key"
-    assert cfg["storage"]["access_key"] == "test-access"
-    assert cfg["storage"]["endpoint"] == "http://localhost:9000"
-    assert cfg["backend"]["callback_url"] == "http://localhost:8080/callback"
-    get_config.cache_clear()
-
-
-def test_yaml_values_preserved(monkeypatch):
-    _fresh_config(monkeypatch)
-    with patch("builtins.open", mock_open(read_data=MOCK_YAML)):
-        with patch("app.core.config.load_dotenv"):
-            cfg = get_config()
-    assert cfg["models"]["default_text"] == "glm-4-flash"
-    assert cfg["video"]["frame_sample_count"] == 8
-    assert cfg["storage"]["buckets"]["source_data"] == "source-data"
-    get_config.cache_clear()
-
-
-def test_missing_api_key_raises(monkeypatch):
-    get_config.cache_clear()
-    monkeypatch.delenv("ZHIPUAI_API_KEY", raising=False)
-    monkeypatch.setenv("STORAGE_ACCESS_KEY", "a")
-    monkeypatch.setenv("STORAGE_SECRET_KEY", "b")
-    with patch("builtins.open", mock_open(read_data=MOCK_YAML)):
-        with patch("app.core.config.load_dotenv"):
-            with pytest.raises(RuntimeError, match="ZHIPUAI_API_KEY"):
-                get_config()
-    get_config.cache_clear()
-
-
-def test_missing_storage_key_raises(monkeypatch):
-    get_config.cache_clear()
-    monkeypatch.setenv("ZHIPUAI_API_KEY", "key")
-    monkeypatch.delenv("STORAGE_ACCESS_KEY", raising=False)
-    monkeypatch.setenv("STORAGE_SECRET_KEY", "b")
-    with patch("builtins.open", mock_open(read_data=MOCK_YAML)):
-        with patch("app.core.config.load_dotenv"):
-            with pytest.raises(RuntimeError, match="STORAGE_ACCESS_KEY"):
-                get_config()
-    get_config.cache_clear()
-```
-
-- [ ] **Step 2: 运行，确认失败**
-
-```bash
-conda run -n label pytest tests/test_config.py -v
-```
-
-Expected: `ImportError: cannot import name 'get_config'`
-
-- [ ] **Step 3: 实现 `app/core/config.py`**
-
-```python
-import os
-import yaml
-from functools import lru_cache
-from pathlib import Path
-from dotenv import load_dotenv
-
-_ROOT = Path(__file__).parent.parent.parent
-
-_ENV_OVERRIDES = {
-    "ZHIPUAI_API_KEY":       ["zhipuai", "api_key"],
-    "STORAGE_ACCESS_KEY":    ["storage", "access_key"],
-    "STORAGE_SECRET_KEY":    ["storage", "secret_key"],
-    "STORAGE_ENDPOINT":      ["storage", "endpoint"],
-    "BACKEND_CALLBACK_URL":  ["backend", "callback_url"],
-    "LOG_LEVEL":             ["server", "log_level"],
-    "MAX_VIDEO_SIZE_MB":     ["video", "max_file_size_mb"],
-}
-
-
-def _set_nested(d: dict, keys: list[str], value: str) -> None:
-    for k in keys[:-1]:
-        d = d.setdefault(k, {})
-    d[keys[-1]] = value
-
-
-@lru_cache(maxsize=1)
-def get_config() -> dict:
-    load_dotenv(_ROOT / ".env")
-    with open(_ROOT / "config.yaml", encoding="utf-8") as f:
-        cfg = yaml.safe_load(f)
-    for env_key, yaml_path in _ENV_OVERRIDES.items():
-        val = os.environ.get(env_key)
-        if val:
-            _set_nested(cfg, yaml_path, val)
-    _validate(cfg)
-    return cfg
-
-
-def _validate(cfg: dict) -> None:
-    checks = [
-        (["zhipuai", "api_key"],    "ZHIPUAI_API_KEY"),
-        (["storage", "access_key"], "STORAGE_ACCESS_KEY"),
-        (["storage", "secret_key"], "STORAGE_SECRET_KEY"),
-    ]
-    for path, name in checks:
-        val = cfg
-        for k in path:
-            val = (val or {}).get(k, "")
-        if not val:
-            raise RuntimeError(f"缺少必要配置项：{name}")
-```
-
-- [ ] **Step 4: 运行，确认通过**
-
-```bash
-conda run -n label pytest tests/test_config.py -v
-```
-
-Expected: `4 passed`
-
-- [ ] **Step 5: Commit**
-
-```bash
-git add app/core/config.py tests/test_config.py
-git commit -m "feat: core config module with YAML + env layered loading"
-```
-
----
-
-## Task 3: Core Logging、Exceptions、JSON Utils
-
-**Files:**
-- Create: `app/core/logging.py`
-- Create: `app/core/exceptions.py`
-- Create: `app/core/json_utils.py`
-
-- [ ] **Step 1: 实现 `app/core/logging.py`**
-
-```python
-import json
-import logging
-import time
-from typing import Callable
-
-from fastapi import Request, Response
-
-
-class _JSONFormatter(logging.Formatter):
-    def format(self, record: logging.LogRecord) -> str:
-        entry: dict = {
-            "time": self.formatTime(record),
-            "level": record.levelname,
-            "logger": record.name,
-            "message": record.getMessage(),
-        }
-        if record.exc_info:
-            entry["exception"] = self.formatException(record.exc_info)
-        return json.dumps(entry, ensure_ascii=False)
-
-
-def setup_logging(log_level: str = "INFO") -> None:
-    handler = logging.StreamHandler()
-    handler.setFormatter(_JSONFormatter())
-    root = logging.getLogger()
-    root.handlers.clear()
-    root.addHandler(handler)
-    root.setLevel(getattr(logging, log_level.upper(), logging.INFO))
-
-
-async def request_logging_middleware(request: Request, call_next: Callable) -> Response:
-    start = time.monotonic()
-    response = await call_next(request)
-    duration_ms = round((time.monotonic() - start) * 1000, 2)
-    logging.getLogger("api").info(
-        f"method={request.method} path={request.url.path} "
-        f"status={response.status_code} duration_ms={duration_ms}"
-    )
-    return response
-```
-
-- [ ] **Step 2: 实现 `app/core/exceptions.py`**
-
-```python
-import logging
-from fastapi import Request
-from fastapi.responses import JSONResponse
-
-
-class UnsupportedFileTypeError(Exception):
-    def __init__(self, ext: str):
-        super().__init__(f"不支持的文件类型：{ext}")
-
-
-class StorageDownloadError(Exception):
-    pass
-
-
-class LLMResponseParseError(Exception):
-    pass
-
-
-class LLMCallError(Exception):
-    pass
-
-
-async def unsupported_file_type_handler(request: Request, exc: UnsupportedFileTypeError):
-    return JSONResponse(
-        status_code=400,
-        content={"code": "UNSUPPORTED_FILE_TYPE", "message": str(exc)},
-    )
-
-
-async def storage_download_handler(request: Request, exc: StorageDownloadError):
-    return JSONResponse(
-        status_code=502,
-        content={"code": "STORAGE_ERROR", "message": str(exc)},
-    )
-
-
-async def llm_parse_handler(request: Request, exc: LLMResponseParseError):
-    return JSONResponse(
-        status_code=502,
-        content={"code": "LLM_PARSE_ERROR", "message": str(exc)},
-    )
-
-
-async def llm_call_handler(request: Request, exc: LLMCallError):
-    return JSONResponse(
-        status_code=503,
-        content={"code": "LLM_CALL_ERROR", "message": str(exc)},
-    )
-
-
-async def generic_error_handler(request: Request, exc: Exception):
-    logging.getLogger("error").exception("未捕获异常")
-    return JSONResponse(
-        status_code=500,
-        content={"code": "INTERNAL_ERROR", "message": "服务器内部错误"},
-    )
-```
-
-- [ ] **Step 3: 实现 `app/core/json_utils.py`**
-
-```python
-import json
-from app.core.exceptions import LLMResponseParseError
-
-
-def parse_json_response(raw: str) -> list | dict:
-    """从 GLM 响应中解析 JSON，兼容 markdown 代码块包裹格式。"""
-    content = raw.strip()
-    if "```json" in content:
-        content = content.split("```json")[1].split("```")[0]
-    elif "```" in content:
-        content = content.split("```")[1].split("```")[0]
-    content = content.strip()
-    try:
-        return json.loads(content)
-    except json.JSONDecodeError as e:
-        raise LLMResponseParseError(
-            f"GLM 返回内容无法解析为 JSON: {raw[:200]}"
-        ) from e
-```
-
-- [ ] **Step 4: Commit**
-
-```bash
-git add app/core/logging.py app/core/exceptions.py app/core/json_utils.py
-git commit -m "feat: core logging, exceptions, json utils"
-```
-
----
-
-## Task 4: LLM 适配层
-
-**Files:**
-- Create: `app/clients/llm/base.py`
-- Create: `app/clients/llm/zhipuai_client.py`
-- Create: `tests/test_llm_client.py`
-
-- [ ] **Step 1: 编写失败测试**
-
-`tests/test_llm_client.py`:
-
-```python
-import asyncio
-import pytest
-from unittest.mock import MagicMock, patch
-from app.clients.llm.zhipuai_client import ZhipuAIClient
-
-
-@pytest.fixture
-def zhipuai_client():
-    with patch("app.clients.llm.zhipuai_client.ZhipuAI") as MockZhipuAI:
-        mock_sdk = MagicMock()
-        MockZhipuAI.return_value = mock_sdk
-        client = ZhipuAIClient(api_key="test-key")
-        client._mock_sdk = mock_sdk
-        yield client
-
-
-def test_chat_returns_content(zhipuai_client):
-    mock_resp = MagicMock()
-    mock_resp.choices[0].message.content = "三元组提取结果"
-    zhipuai_client._mock_sdk.chat.completions.create.return_value = mock_resp
-
-    result = asyncio.run(
-        zhipuai_client.chat(
-            messages=[{"role": "user", "content": "提取三元组"}],
-            model="glm-4-flash",
-        )
-    )
-    assert result == "三元组提取结果"
-    zhipuai_client._mock_sdk.chat.completions.create.assert_called_once()
-
-
-def test_chat_vision_calls_same_endpoint(zhipuai_client):
-    mock_resp = MagicMock()
-    mock_resp.choices[0].message.content = "图像分析结果"
-    zhipuai_client._mock_sdk.chat.completions.create.return_value = mock_resp
-
-    result = asyncio.run(
-        zhipuai_client.chat_vision(
-            messages=[{"role": "user", "content": [{"type": "text", "text": "分析"}]}],
-            model="glm-4v-flash",
-        )
-    )
-    assert result == "图像分析结果"
-```
-
-- [ ] **Step 2: 运行，确认失败**
-
-```bash
-conda run -n label pytest tests/test_llm_client.py -v
-```
-
-Expected: `ImportError`
-
-- [ ] **Step 3: 实现 `app/clients/llm/base.py`**
-
-```python
-from abc import ABC, abstractmethod
-
-
-class LLMClient(ABC):
-    @abstractmethod
-    async def chat(self, messages: list[dict], model: str, **kwargs) -> str:
-        """纯文本对话，返回模型输出文本。"""
-
-    @abstractmethod
-    async def chat_vision(self, messages: list[dict], model: str, **kwargs) -> str:
-        """多模态对话（图文混合输入），返回模型输出文本。"""
-```
-
-- [ ] **Step 4: 实现 `app/clients/llm/zhipuai_client.py`**
-
-```python
-import asyncio
-from zhipuai import ZhipuAI
-from app.clients.llm.base import LLMClient
-
-
-class ZhipuAIClient(LLMClient):
-    def __init__(self, api_key: str):
-        self._client = ZhipuAI(api_key=api_key)
-
-    async def chat(self, messages: list[dict], model: str, **kwargs) -> str:
-        loop = asyncio.get_event_loop()
-        resp = await loop.run_in_executor(
-            None,
-            lambda: self._client.chat.completions.create(
-                model=model, messages=messages, **kwargs
-            ),
-        )
-        return resp.choices[0].message.content
-
-    async def chat_vision(self, messages: list[dict], model: str, **kwargs) -> str:
-        # GLM-4V 与文本接口相同，通过 image_url type 区分图文消息
-        return await self.chat(messages, model, **kwargs)
-```
-
-- [ ] **Step 5: 运行，确认通过**
-
-```bash
-conda run -n label pytest tests/test_llm_client.py -v
-```
-
-Expected: `2 passed`
-
-- [ ] **Step 6: Commit**
-
-```bash
-git add app/clients/llm/ tests/test_llm_client.py
-git commit -m "feat: LLMClient ABC and ZhipuAI implementation"
-```
-
----
-
-## Task 5: Storage 适配层
-
-**Files:**
-- Create: `app/clients/storage/base.py`
-- Create: `app/clients/storage/rustfs_client.py`
-- Create: `tests/test_storage_client.py`
-
-- [ ] **Step 1: 编写失败测试**
-
-`tests/test_storage_client.py`:
-
-```python
-import asyncio
-import pytest
-from unittest.mock import MagicMock, patch
-from app.clients.storage.rustfs_client import RustFSClient
-
-
-@pytest.fixture
-def rustfs_client():
-    with patch("app.clients.storage.rustfs_client.boto3") as mock_boto3:
-        mock_s3 = MagicMock()
-        mock_boto3.client.return_value = mock_s3
-        client = RustFSClient(
-            endpoint="http://localhost:9000",
-            access_key="minioadmin",
-            secret_key="minioadmin",
-        )
-        client._mock_s3 = mock_s3
-        yield client
-
-
-def test_download_bytes(rustfs_client):
-    mock_body = MagicMock()
-    mock_body.read.return_value = b"file content"
-    rustfs_client._mock_s3.get_object.return_value = {"Body": mock_body}
-
-    result = asyncio.run(
-        rustfs_client.download_bytes("source-data", "text/202404/1.txt")
-    )
-    assert result == b"file content"
-    rustfs_client._mock_s3.get_object.assert_called_once_with(
-        Bucket="source-data", Key="text/202404/1.txt"
-    )
-
-
-def test_upload_bytes(rustfs_client):
-    asyncio.run(
-        rustfs_client.upload_bytes("source-data", "crops/1/0.jpg", b"img", "image/jpeg")
-    )
-    rustfs_client._mock_s3.put_object.assert_called_once_with(
-        Bucket="source-data", Key="crops/1/0.jpg", Body=b"img", ContentType="image/jpeg"
-    )
-
-
-def test_get_presigned_url(rustfs_client):
-    rustfs_client._mock_s3.generate_presigned_url.return_value = "https://example.com/signed"
-    url = rustfs_client.get_presigned_url("source-data", "crops/1/0.jpg", expires=3600)
-    assert url == "https://example.com/signed"
-    rustfs_client._mock_s3.generate_presigned_url.assert_called_once_with(
-        "get_object",
-        Params={"Bucket": "source-data", "Key": "crops/1/0.jpg"},
-        ExpiresIn=3600,
-    )
-
-
-def test_get_object_size(rustfs_client):
-    rustfs_client._mock_s3.head_object.return_value = {"ContentLength": 1024 * 1024 * 50}
-    size = asyncio.run(rustfs_client.get_object_size("source-data", "video/1.mp4"))
-    assert size == 1024 * 1024 * 50
-    rustfs_client._mock_s3.head_object.assert_called_once_with(
-        Bucket="source-data", Key="video/1.mp4"
-    )
-```
-
-- [ ] **Step 2: 运行，确认失败**
-
-```bash
-conda run -n label pytest tests/test_storage_client.py -v
-```
-
-Expected: `ImportError`
-
-- [ ] **Step 3: 实现 `app/clients/storage/base.py`**
-
-```python
-from abc import ABC, abstractmethod
-
-
-class StorageClient(ABC):
-    @abstractmethod
-    async def download_bytes(self, bucket: str, path: str) -> bytes:
-        """从对象存储下载文件，返回字节内容。"""
-
-    @abstractmethod
-    async def upload_bytes(
-        self,
-        bucket: str,
-        path: str,
-        data: bytes,
-        content_type: str = "application/octet-stream",
-    ) -> None:
-        """上传字节内容到对象存储。"""
-
-    @abstractmethod
-    def get_presigned_url(self, bucket: str, path: str, expires: int = 3600) -> str:
-        """生成预签名访问 URL。"""
-
-    @abstractmethod
-    async def get_object_size(self, bucket: str, path: str) -> int:
-        """返回对象字节大小，用于在下载前进行大小校验。"""
-```
-
-- [ ] **Step 4: 实现 `app/clients/storage/rustfs_client.py`**
-
-```python
-import asyncio
-import boto3
-from app.clients.storage.base import StorageClient
-
-
-class RustFSClient(StorageClient):
-    def __init__(self, endpoint: str, access_key: str, secret_key: str):
-        self._s3 = boto3.client(
-            "s3",
-            endpoint_url=endpoint,
-            aws_access_key_id=access_key,
-            aws_secret_access_key=secret_key,
-        )
-
-    async def download_bytes(self, bucket: str, path: str) -> bytes:
-        loop = asyncio.get_event_loop()
-        resp = await loop.run_in_executor(
-            None, lambda: self._s3.get_object(Bucket=bucket, Key=path)
-        )
-        return resp["Body"].read()
-
-    async def upload_bytes(
-        self,
-        bucket: str,
-        path: str,
-        data: bytes,
-        content_type: str = "application/octet-stream",
-    ) -> None:
-        loop = asyncio.get_event_loop()
-        await loop.run_in_executor(
-            None,
-            lambda: self._s3.put_object(
-                Bucket=bucket, Key=path, Body=data, ContentType=content_type
-            ),
-        )
-
-    def get_presigned_url(self, bucket: str, path: str, expires: int = 3600) -> str:
-        return self._s3.generate_presigned_url(
-            "get_object",
-            Params={"Bucket": bucket, "Key": path},
-            ExpiresIn=expires,
-        )
-
-    async def get_object_size(self, bucket: str, path: str) -> int:
-        loop = asyncio.get_event_loop()
-        resp = await loop.run_in_executor(
-            None, lambda: self._s3.head_object(Bucket=bucket, Key=path)
-        )
-        return resp["ContentLength"]
-```
-
-- [ ] **Step 5: 运行，确认通过**
-
-```bash
-conda run -n label pytest tests/test_storage_client.py -v
-```
-
-Expected: `4 passed`
-
-- [ ] **Step 6: Commit**
-
-```bash
-git add app/clients/storage/ tests/test_storage_client.py
-git commit -m "feat: StorageClient ABC and RustFS S3 implementation"
-```
-
----
-
-## Task 6: 依赖注入 + FastAPI 应用入口
-
-**Files:**
-- Create: `app/core/dependencies.py`
-- Create: `app/main.py`
-
-- [ ] **Step 1: 实现 `app/core/dependencies.py`**
-
-```python
-from app.clients.llm.base import LLMClient
-from app.clients.storage.base import StorageClient
-
-_llm_client: LLMClient | None = None
-_storage_client: StorageClient | None = None
-
-
-def set_clients(llm: LLMClient, storage: StorageClient) -> None:
-    global _llm_client, _storage_client
-    _llm_client, _storage_client = llm, storage
-
-
-def get_llm_client() -> LLMClient:
-    return _llm_client
-
-
-def get_storage_client() -> StorageClient:
-    return _storage_client
-```
-
-- [ ] **Step 2: 实现 `app/main.py`**
-
-注意：routers 在后续任务中创建，先注释掉 include_router，待各路由实现后逐步取消注释。
-
-```python
-import logging
-from contextlib import asynccontextmanager
-
-from fastapi import FastAPI
-
-from app.core.config import get_config
-from app.core.dependencies import set_clients
-from app.core.exceptions import (
-    LLMCallError,
-    LLMResponseParseError,
-    StorageDownloadError,
-    UnsupportedFileTypeError,
-    generic_error_handler,
-    llm_call_handler,
-    llm_parse_handler,
-    storage_download_handler,
-    unsupported_file_type_handler,
-)
-from app.core.logging import request_logging_middleware, setup_logging
-from app.clients.llm.zhipuai_client import ZhipuAIClient
-from app.clients.storage.rustfs_client import RustFSClient
-
-
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    cfg = get_config()
-    setup_logging(cfg["server"]["log_level"])
-    set_clients(
-        llm=ZhipuAIClient(api_key=cfg["zhipuai"]["api_key"]),
-        storage=RustFSClient(
-            endpoint=cfg["storage"]["endpoint"],
-            access_key=cfg["storage"]["access_key"],
-            secret_key=cfg["storage"]["secret_key"],
-        ),
-    )
-    logging.getLogger("startup").info("AI 服务启动完成")
-    yield
-    logging.getLogger("startup").info("AI 服务关闭")
-
-
-app = FastAPI(title="Label AI Service", version="1.0.0", lifespan=lifespan)
-
-app.middleware("http")(request_logging_middleware)
-
-
-@app.get("/health", tags=["Health"])
-async def health():
-    return {"status": "ok"}
-
-app.add_exception_handler(UnsupportedFileTypeError, unsupported_file_type_handler)
-app.add_exception_handler(StorageDownloadError, storage_download_handler)
-app.add_exception_handler(LLMResponseParseError, llm_parse_handler)
-app.add_exception_handler(LLMCallError, llm_call_handler)
-app.add_exception_handler(Exception, generic_error_handler)
-
-# Routers registered after each task:
-# from app.routers import text, image, video, qa, finetune
-# app.include_router(text.router, prefix="/api/v1")
-# app.include_router(image.router, prefix="/api/v1")
-# app.include_router(video.router, prefix="/api/v1")
-# app.include_router(qa.router, prefix="/api/v1")
-# app.include_router(finetune.router, prefix="/api/v1")
-```
-
-- [ ] **Step 3: 验证 /health 端点**
-
-```bash
-conda run -n label python -c "
-from fastapi.testclient import TestClient
-from app.main import app
-client = TestClient(app)
-r = client.get('/health')
-assert r.status_code == 200 and r.json() == {'status': 'ok'}, r.json()
-print('health check OK')
-"
-```
-
-Expected: `health check OK`
-
-- [ ] **Step 4: Commit**
-
-```bash
-git add app/core/dependencies.py app/main.py
-git commit -m "feat: DI dependencies, FastAPI app entry with lifespan and /health endpoint"
-```
-
----
-
-## Task 7: Text Pydantic Models
-
-**Files:**
-- Create: `app/models/text_models.py`
-
-- [ ] **Step 1: 实现 `app/models/text_models.py`**
-
-```python
-from pydantic import BaseModel
-
-
-class SourceOffset(BaseModel):
-    start: int
-    end: int
-
-
-class TripleItem(BaseModel):
-    subject: str
-    predicate: str
-    object: str
-    source_snippet: str
-    source_offset: SourceOffset
-
-
-class TextExtractRequest(BaseModel):
-    file_path: str
-    file_name: str
-    model: str | None = None
-    prompt_template: str | None = None
-
-
-class TextExtractResponse(BaseModel):
-    items: list[TripleItem]
-```
-
-- [ ] **Step 2: 快速验证 schema**
-
-```bash
-conda run -n label python -c "
-from app.models.text_models import TextExtractRequest, TextExtractResponse, TripleItem, SourceOffset
-req = TextExtractRequest(file_path='text/1.txt', file_name='1.txt')
-item = TripleItem(subject='A', predicate='B', object='C', source_snippet='ABC', source_offset=SourceOffset(start=0, end=3))
-resp = TextExtractResponse(items=[item])
-print(resp.model_dump())
-"
-```
-
-Expected: 打印出完整字典，无报错
-
-- [ ] **Step 3: Commit**
-
-```bash
-git add app/models/text_models.py
-git commit -m "feat: text Pydantic models"
-```
-
----
-
-## Task 8: Text Service
-
-**Files:**
-- Create: `app/services/text_service.py`
-- Create: `tests/test_text_service.py`
-
-- [ ] **Step 1: 编写失败测试**
-
-`tests/test_text_service.py`:
-
-```python
-import pytest
-from app.services.text_service import extract_triples, _extract_text_from_bytes
-from app.core.exceptions import UnsupportedFileTypeError, LLMResponseParseError, StorageDownloadError
-
-TRIPLE_JSON = '[{"subject":"变压器","predicate":"额定电压","object":"110kV","source_snippet":"额定电压为110kV","source_offset":{"start":0,"end":10}}]'
-
-
-@pytest.mark.asyncio
-async def test_extract_triples_txt(mock_llm, mock_storage):
-    mock_storage.download_bytes.return_value = b"变压器额定电压为110kV"
-    mock_llm.chat.return_value = TRIPLE_JSON
-
-    result = await extract_triples(
-        file_path="text/1.txt",
-        file_name="test.txt",
-        model="glm-4-flash",
-        prompt_template="提取三元组：",
-        llm=mock_llm,
-        storage=mock_storage,
-    )
-    assert len(result) == 1
-    assert result[0].subject == "变压器"
-    assert result[0].predicate == "额定电压"
-    assert result[0].object == "110kV"
-    assert result[0].source_offset.start == 0
-
-
-@pytest.mark.asyncio
-async def test_extract_triples_markdown_wrapped_json(mock_llm, mock_storage):
-    mock_storage.download_bytes.return_value = b"some text"
-    mock_llm.chat.return_value = f"```json\n{TRIPLE_JSON}\n```"
-
-    result = await extract_triples(
-        file_path="text/1.txt",
-        file_name="test.txt",
-        model="glm-4-flash",
-        prompt_template="",
-        llm=mock_llm,
-        storage=mock_storage,
-    )
-    assert len(result) == 1
-
-
-@pytest.mark.asyncio
-async def test_extract_triples_storage_error(mock_llm, mock_storage):
-    mock_storage.download_bytes.side_effect = Exception("connection refused")
-
-    with pytest.raises(StorageDownloadError):
-        await extract_triples(
-            file_path="text/1.txt",
-            file_name="test.txt",
-            model="glm-4-flash",
-            prompt_template="",
-            llm=mock_llm,
-            storage=mock_storage,
-        )
-
-
-@pytest.mark.asyncio
-async def test_extract_triples_llm_parse_error(mock_llm, mock_storage):
-    mock_storage.download_bytes.return_value = b"some text"
-    mock_llm.chat.return_value = "这不是JSON"
-
-    with pytest.raises(LLMResponseParseError):
-        await extract_triples(
-            file_path="text/1.txt",
-            file_name="test.txt",
-            model="glm-4-flash",
-            prompt_template="",
-            llm=mock_llm,
-            storage=mock_storage,
-        )
-
-
-def test_unsupported_file_type_raises():
-    with pytest.raises(UnsupportedFileTypeError):
-        _extract_text_from_bytes(b"content", "doc.xlsx")
-
-
-def test_parse_txt_bytes():
-    result = _extract_text_from_bytes("你好世界".encode("utf-8"), "file.txt")
-    assert result == "你好世界"
-```
-
-- [ ] **Step 2: 运行，确认失败**
-
-```bash
-conda run -n label pytest tests/test_text_service.py -v
-```
-
-Expected: `ImportError`
-
-- [ ] **Step 3: 实现 `app/services/text_service.py`**
-
-```python
-import logging
-from pathlib import Path
-
-from app.clients.llm.base import LLMClient
-from app.clients.storage.base import StorageClient
-from app.core.exceptions import LLMCallError, LLMResponseParseError, StorageDownloadError, UnsupportedFileTypeError
-from app.core.json_utils import parse_json_response
-from app.models.text_models import SourceOffset, TripleItem
-
-logger = logging.getLogger(__name__)
-
-DEFAULT_PROMPT = """请从以下文本中提取知识三元组。
-对每个三元组提供：
-- subject：主语实体
-- predicate：谓语关系
-- object：宾语实体
-- source_snippet：原文中的证据片段（直接引用原文）
-- source_offset：证据片段字符偏移 {"start": N, "end": M}
-
-以 JSON 数组格式返回，例如：
-[{"subject":"...","predicate":"...","object":"...","source_snippet":"...","source_offset":{"start":0,"end":50}}]
-
-文本内容：
-"""
-
-
-def _parse_txt(data: bytes) -> str:
-    return data.decode("utf-8")
-
-
-def _parse_pdf(data: bytes) -> str:
-    import io
-    import pdfplumber
-    with pdfplumber.open(io.BytesIO(data)) as pdf:
-        return "\n".join(page.extract_text() or "" for page in pdf.pages)
-
-
-def _parse_docx(data: bytes) -> str:
-    import io
-    import docx
-    doc = docx.Document(io.BytesIO(data))
-    return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
-
-
-_PARSERS = {
-    ".txt": _parse_txt,
-    ".pdf": _parse_pdf,
-    ".docx": _parse_docx,
-}
-
-
-def _extract_text_from_bytes(data: bytes, filename: str) -> str:
-    ext = Path(filename).suffix.lower()
-    parser = _PARSERS.get(ext)
-    if parser is None:
-        raise UnsupportedFileTypeError(ext)
-    return parser(data)
-
-
-async def extract_triples(
-    file_path: str,
-    file_name: str,
-    model: str,
-    prompt_template: str,
-    llm: LLMClient,
-    storage: StorageClient,
-    bucket: str = "source-data",
-) -> list[TripleItem]:
-    try:
-        data = await storage.download_bytes(bucket, file_path)
-    except Exception as e:
-        raise StorageDownloadError(f"下载文件失败 {file_path}: {e}") from e
-
-    text = _extract_text_from_bytes(data, file_name)
-    prompt = prompt_template or DEFAULT_PROMPT
-
-    messages = [
-        {"role": "system", "content": "你是专业的知识图谱构建助手，擅长从文本中提取结构化知识三元组。"},
-        {"role": "user", "content": prompt + text},
-    ]
-
-    try:
-        raw = await llm.chat(messages, model)
-    except Exception as e:
-        raise LLMCallError(f"GLM 调用失败: {e}") from e
-
-    logger.info(f"text_extract file={file_path} model={model}")
-
-    items_raw = parse_json_response(raw)
-
-    result = []
-    for item in items_raw:
-        try:
-            offset = item.get("source_offset", {})
-            result.append(TripleItem(
-                subject=item["subject"],
-                predicate=item["predicate"],
-                object=item["object"],
-                source_snippet=item.get("source_snippet", ""),
-                source_offset=SourceOffset(
-                    start=offset.get("start", 0),
-                    end=offset.get("end", 0),
-                ),
-            ))
-        except (KeyError, TypeError) as e:
-            logger.warning(f"跳过不完整三元组: {item}, error: {e}")
-
-    return result
-```
-
-- [ ] **Step 4: 运行，确认通过**
-
-```bash
-conda run -n label pytest tests/test_text_service.py -v
-```
-
-Expected: `6 passed`
-
-- [ ] **Step 5: Commit**
-
-```bash
-git add app/services/text_service.py tests/test_text_service.py
-git commit -m "feat: text service with txt/pdf/docx parsing and triple extraction"
-```
-
----
-
-## Task 9: Text Router
-
-**Files:**
-- Create: `app/routers/text.py`
-- Create: `tests/test_text_router.py`
-
-- [ ] **Step 1: 编写失败测试**
-
-`tests/test_text_router.py`:
-
-```python
-import pytest
-from fastapi.testclient import TestClient
-from unittest.mock import AsyncMock, patch
-from app.main import app
-from app.core.dependencies import set_clients
-from app.models.text_models import TripleItem, SourceOffset
-
-
-@pytest.fixture
-def client(mock_llm, mock_storage):
-    set_clients(mock_llm, mock_storage)
-    return TestClient(app)
-
-
-def test_text_extract_success(client, mock_llm, mock_storage):
-    mock_storage.download_bytes = AsyncMock(return_value=b"变压器额定电压110kV")
-    mock_llm.chat = AsyncMock(return_value='[{"subject":"变压器","predicate":"额定电压","object":"110kV","source_snippet":"额定电压110kV","source_offset":{"start":3,"end":10}}]')
-
-    resp = client.post("/api/v1/text/extract", json={
-        "file_path": "text/202404/1.txt",
-        "file_name": "规范.txt",
-    })
-    assert resp.status_code == 200
-    data = resp.json()
-    assert len(data["items"]) == 1
-    assert data["items"][0]["subject"] == "变压器"
-
-
-def test_text_extract_unsupported_file(client, mock_llm, mock_storage):
-    mock_storage.download_bytes = AsyncMock(return_value=b"content")
-    resp = client.post("/api/v1/text/extract", json={
-        "file_path": "text/202404/1.xlsx",
-        "file_name": "file.xlsx",
-    })
-    assert resp.status_code == 400
-    assert resp.json()["code"] == "UNSUPPORTED_FILE_TYPE"
-```
-
-- [ ] **Step 2: 实现 `app/routers/text.py`**
-
-```python
-from fastapi import APIRouter, Depends
-
-from app.clients.llm.base import LLMClient
-from app.clients.storage.base import StorageClient
-from app.core.config import get_config
-from app.core.dependencies import get_llm_client, get_storage_client
-from app.models.text_models import TextExtractRequest, TextExtractResponse
-from app.services import text_service
-
-router = APIRouter(tags=["Text"])
-
-
-@router.post("/text/extract", response_model=TextExtractResponse)
-async def extract_text(
-    req: TextExtractRequest,
-    llm: LLMClient = Depends(get_llm_client),
-    storage: StorageClient = Depends(get_storage_client),
-):
-    cfg = get_config()
-    model = req.model or cfg["models"]["default_text"]
-    prompt = req.prompt_template or text_service.DEFAULT_PROMPT
-
-    items = await text_service.extract_triples(
-        file_path=req.file_path,
-        file_name=req.file_name,
-        model=model,
-        prompt_template=prompt,
-        llm=llm,
-        storage=storage,
-        bucket=cfg["storage"]["buckets"]["source_data"],
-    )
-    return TextExtractResponse(items=items)
-```
-
-- [ ] **Step 3: 在 `app/main.py` 注册路由**
-
-取消注释以下两行：
-
-```python
-from app.routers import text
-app.include_router(text.router, prefix="/api/v1")
-```
-
-- [ ] **Step 4: 运行测试**
-
-```bash
-conda run -n label pytest tests/test_text_router.py -v
-```
-
-Expected: `2 passed`
-
-- [ ] **Step 5: Commit**
-
-```bash
-git add app/routers/text.py tests/test_text_router.py app/main.py
-git commit -m "feat: text router POST /api/v1/text/extract"
-```
-
----
-
-## Task 10: Image Models + Service
-
-**Files:**
-- Create: `app/models/image_models.py`
-- Create: `app/services/image_service.py`
-- Create: `tests/test_image_service.py`
-
-- [ ] **Step 1: 实现 `app/models/image_models.py`**
-
-```python
-from pydantic import BaseModel
-
-
-class BBox(BaseModel):
-    x: int
-    y: int
-    w: int
-    h: int
-
-
-class QuadrupleItem(BaseModel):
-    subject: str
-    predicate: str
-    object: str
-    qualifier: str
-    bbox: BBox
-    cropped_image_path: str
-
-
-class ImageExtractRequest(BaseModel):
-    file_path: str
-    task_id: int
-    model: str | None = None
-    prompt_template: str | None = None
-
-
-class ImageExtractResponse(BaseModel):
-    items: list[QuadrupleItem]
-```
-
-- [ ] **Step 2: 编写失败测试**
-
-`tests/test_image_service.py`:
-
-```python
-import pytest
-import numpy as np
-import cv2
-from app.services.image_service import extract_quadruples, _crop_image
-from app.models.image_models import BBox
-from app.core.exceptions import LLMResponseParseError, StorageDownloadError
-
-QUAD_JSON = '[{"subject":"电缆接头","predicate":"位于","object":"配电箱左侧","qualifier":"2024年","bbox":{"x":10,"y":20,"w":50,"h":40}}]'
-
-
-def _make_test_image_bytes(width=200, height=200) -> bytes:
-    img = np.zeros((height, width, 3), dtype=np.uint8)
-    img[:] = (100, 150, 200)
-    _, buf = cv2.imencode(".jpg", img)
-    return buf.tobytes()
-
-
-def test_crop_image():
-    img_bytes = _make_test_image_bytes(200, 200)
-    bbox = BBox(x=10, y=20, w=50, h=40)
-    result = _crop_image(img_bytes, bbox)
-    assert isinstance(result, bytes)
-    arr = np.frombuffer(result, dtype=np.uint8)
-    img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
-    assert img.shape[0] == 40  # height
-    assert img.shape[1] == 50  # width
-
-
-@pytest.mark.asyncio
-async def test_extract_quadruples_success(mock_llm, mock_storage):
-    mock_storage.download_bytes.return_value = _make_test_image_bytes()
-    mock_llm.chat_vision.return_value = QUAD_JSON
-    mock_storage.upload_bytes.return_value = None
-
-    result = await extract_quadruples(
-        file_path="image/202404/1.jpg",
-        task_id=789,
-        model="glm-4v-flash",
-        prompt_template="提取四元组",
-        llm=mock_llm,
-        storage=mock_storage,
-    )
-    assert len(result) == 1
-    assert result[0].subject == "电缆接头"
-    assert result[0].cropped_image_path == "crops/789/0.jpg"
-    mock_storage.upload_bytes.assert_called_once()
-
-
-@pytest.mark.asyncio
-async def test_extract_quadruples_storage_error(mock_llm, mock_storage):
-    mock_storage.download_bytes.side_effect = Exception("timeout")
-    with pytest.raises(StorageDownloadError):
-        await extract_quadruples(
-            file_path="image/1.jpg",
-            task_id=1,
-            model="glm-4v-flash",
-            prompt_template="",
-            llm=mock_llm,
-            storage=mock_storage,
-        )
-
-
-@pytest.mark.asyncio
-async def test_extract_quadruples_parse_error(mock_llm, mock_storage):
-    mock_storage.download_bytes.return_value = _make_test_image_bytes()
-    mock_llm.chat_vision.return_value = "不是JSON"
-    with pytest.raises(LLMResponseParseError):
-        await extract_quadruples(
-            file_path="image/1.jpg",
-            task_id=1,
-            model="glm-4v-flash",
-            prompt_template="",
-            llm=mock_llm,
-            storage=mock_storage,
-        )
-```
-
-- [ ] **Step 3: 运行，确认失败**
-
-```bash
-conda run -n label pytest tests/test_image_service.py -v
-```
-
-Expected: `ImportError`
-
-- [ ] **Step 4: 实现 `app/services/image_service.py`**
-
-```python
-import base64
-import logging
-from pathlib import Path
-
-import cv2
-import numpy as np
-
-from app.clients.llm.base import LLMClient
-from app.clients.storage.base import StorageClient
-from app.core.exceptions import LLMCallError, LLMResponseParseError, StorageDownloadError
-from app.core.json_utils import parse_json_response
-from app.models.image_models import BBox, QuadrupleItem
-
-logger = logging.getLogger(__name__)
-
-DEFAULT_PROMPT = """请分析这张图片，提取知识四元组。
-对每个四元组提供：
-- subject：主体实体
-- predicate：关系/属性
-- object：客体实体
-- qualifier：修饰信息（时间、条件、场景，无则填空字符串）
-- bbox：边界框 {"x": N, "y": N, "w": N, "h": N}（像素坐标，相对原图）
-
-以 JSON 数组格式返回：
-[{"subject":"...","predicate":"...","object":"...","qualifier":"...","bbox":{"x":0,"y":0,"w":100,"h":100}}]
-"""
-
-
-def _crop_image(image_bytes: bytes, bbox: BBox) -> bytes:
-    arr = np.frombuffer(image_bytes, dtype=np.uint8)
-    img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
-    h, w = img.shape[:2]
-    x = max(0, bbox.x)
-    y = max(0, bbox.y)
-    x2 = min(w, bbox.x + bbox.w)
-    y2 = min(h, bbox.y + bbox.h)
-    cropped = img[y:y2, x:x2]
-    _, buf = cv2.imencode(".jpg", cropped, [cv2.IMWRITE_JPEG_QUALITY, 90])
-    return buf.tobytes()
-
-
-async def extract_quadruples(
-    file_path: str,
-    task_id: int,
-    model: str,
-    prompt_template: str,
-    llm: LLMClient,
-    storage: StorageClient,
-    source_bucket: str = "source-data",
-) -> list[QuadrupleItem]:
-    try:
-        data = await storage.download_bytes(source_bucket, file_path)
-    except Exception as e:
-        raise StorageDownloadError(f"下载图片失败 {file_path}: {e}") from e
-
-    ext = Path(file_path).suffix.lstrip(".") or "jpeg"
-    b64 = base64.b64encode(data).decode()
-
-    messages = [
-        {"role": "system", "content": "你是专业的视觉分析助手，擅长从图像中提取结构化知识四元组。"},
-        {"role": "user", "content": [
-            {"type": "image_url", "image_url": {"url": f"data:image/{ext};base64,{b64}"}},
-            {"type": "text", "text": prompt_template or DEFAULT_PROMPT},
-        ]},
-    ]
-
-    try:
-        raw = await llm.chat_vision(messages, model)
-    except Exception as e:
-        raise LLMCallError(f"GLM-4V 调用失败: {e}") from e
-
-    logger.info(f"image_extract file={file_path} model={model}")
-    items_raw = parse_json_response(raw)
-
-    result = []
-    for i, item in enumerate(items_raw):
-        try:
-            bbox = BBox(**item["bbox"])
-            cropped = _crop_image(data, bbox)
-            crop_path = f"crops/{task_id}/{i}.jpg"
-            await storage.upload_bytes(source_bucket, crop_path, cropped, "image/jpeg")
-            result.append(QuadrupleItem(
-                subject=item["subject"],
-                predicate=item["predicate"],
-                object=item["object"],
-                qualifier=item.get("qualifier", ""),
-                bbox=bbox,
-                cropped_image_path=crop_path,
-            ))
-        except Exception as e:
-            logger.warning(f"跳过不完整四元组 index={i}: {e}")
-
-    return result
-```
-
-- [ ] **Step 5: 运行，确认通过**
-
-```bash
-conda run -n label pytest tests/test_image_service.py -v
-```
-
-Expected: `4 passed`
-
-- [ ] **Step 6: Commit**
-
-```bash
-git add app/models/image_models.py app/services/image_service.py tests/test_image_service.py
-git commit -m "feat: image models, service with bbox crop and quadruple extraction"
-```
-
----
-
-## Task 11: Image Router
-
-**Files:**
-- Create: `app/routers/image.py`
-- Create: `tests/test_image_router.py`
-
-- [ ] **Step 1: 编写失败测试**
-
-`tests/test_image_router.py`:
-
-```python
-import numpy as np
-import cv2
-import pytest
-from fastapi.testclient import TestClient
-from unittest.mock import AsyncMock
-from app.main import app
-from app.core.dependencies import set_clients
-
-
-def _make_image_bytes() -> bytes:
-    img = np.zeros((100, 100, 3), dtype=np.uint8)
-    _, buf = cv2.imencode(".jpg", img)
-    return buf.tobytes()
-
-
-@pytest.fixture
-def client(mock_llm, mock_storage):
-    set_clients(mock_llm, mock_storage)
-    return TestClient(app)
-
-
-def test_image_extract_success(client, mock_llm, mock_storage):
-    mock_storage.download_bytes = AsyncMock(return_value=_make_image_bytes())
-    mock_storage.upload_bytes = AsyncMock(return_value=None)
-    mock_llm.chat_vision = AsyncMock(return_value='[{"subject":"A","predicate":"B","object":"C","qualifier":"","bbox":{"x":0,"y":0,"w":10,"h":10}}]')
-
-    resp = client.post("/api/v1/image/extract", json={
-        "file_path": "image/202404/1.jpg",
-        "task_id": 42,
-    })
-    assert resp.status_code == 200
-    data = resp.json()
-    assert len(data["items"]) == 1
-    assert data["items"][0]["cropped_image_path"] == "crops/42/0.jpg"
-```
-
-- [ ] **Step 2: 实现 `app/routers/image.py`**
-
-```python
-from fastapi import APIRouter, Depends
-
-from app.clients.llm.base import LLMClient
-from app.clients.storage.base import StorageClient
-from app.core.config import get_config
-from app.core.dependencies import get_llm_client, get_storage_client
-from app.models.image_models import ImageExtractRequest, ImageExtractResponse
-from app.services import image_service
-
-router = APIRouter(tags=["Image"])
-
-
-@router.post("/image/extract", response_model=ImageExtractResponse)
-async def extract_image(
-    req: ImageExtractRequest,
-    llm: LLMClient = Depends(get_llm_client),
-    storage: StorageClient = Depends(get_storage_client),
-):
-    cfg = get_config()
-    model = req.model or cfg["models"]["default_vision"]
-    prompt = req.prompt_template or image_service.DEFAULT_PROMPT
-
-    items = await image_service.extract_quadruples(
-        file_path=req.file_path,
-        task_id=req.task_id,
-        model=model,
-        prompt_template=prompt,
-        llm=llm,
-        storage=storage,
-        source_bucket=cfg["storage"]["buckets"]["source_data"],
-    )
-    return ImageExtractResponse(items=items)
-```
-
-- [ ] **Step 3: 在 `app/main.py` 注册路由**
-
-```python
-from app.routers import text, image
-app.include_router(image.router, prefix="/api/v1")
-```
-
-- [ ] **Step 4: 运行测试**
-
-```bash
-conda run -n label pytest tests/test_image_router.py -v
-```
-
-Expected: `1 passed`
-
-- [ ] **Step 5: Commit**
-
-```bash
-git add app/routers/image.py tests/test_image_router.py app/main.py
-git commit -m "feat: image router POST /api/v1/image/extract"
-```
-
----
-
-## Task 12: Video Models + Service
-
-**Files:**
-- Create: `app/models/video_models.py`
-- Create: `app/services/video_service.py`
-- Create: `tests/test_video_service.py`
-
-- [ ] **Step 1: 实现 `app/models/video_models.py`**
-
-```python
-from pydantic import BaseModel
-
-
-class ExtractFramesRequest(BaseModel):
-    file_path: str
-    source_id: int
-    job_id: int
-    mode: str = "interval"      # interval | keyframe
-    frame_interval: int = 30
-
-
-class ExtractFramesResponse(BaseModel):
-    message: str
-    job_id: int
-
-
-class FrameInfo(BaseModel):
-    frame_index: int
-    time_sec: float
-    frame_path: str
-
-
-class VideoToTextRequest(BaseModel):
-    file_path: str
-    source_id: int
-    job_id: int
-    start_sec: float = 0.0
-    end_sec: float
-    model: str | None = None
-    prompt_template: str | None = None
-
-
-class VideoToTextResponse(BaseModel):
-    message: str
-    job_id: int
-
-
-class VideoJobCallback(BaseModel):
-    job_id: int
-    status: str                         # SUCCESS | FAILED
-    frames: list[FrameInfo] | None = None
-    output_path: str | None = None
-    error_message: str | None = None
-```
-
-- [ ] **Step 2: 编写失败测试**
-
-`tests/test_video_service.py`:
-
-```python
-import numpy as np
-import pytest
-from unittest.mock import AsyncMock, patch, MagicMock
-from app.services.video_service import _is_scene_change, extract_frames_background
-
-
-def test_is_scene_change_different_frames():
-    prev = np.zeros((100, 100), dtype=np.uint8)
-    curr = np.full((100, 100), 200, dtype=np.uint8)
-    assert _is_scene_change(prev, curr, threshold=30.0) is True
-
-
-def test_is_scene_change_similar_frames():
-    prev = np.full((100, 100), 100, dtype=np.uint8)
-    curr = np.full((100, 100), 101, dtype=np.uint8)
-    assert _is_scene_change(prev, curr, threshold=30.0) is False
-
-
-@pytest.mark.asyncio
-async def test_extract_frames_background_calls_callback_on_success(mock_storage):
-    import cv2
-    import tempfile, os
-
-    # 创建一个有效的真实测试视频（5帧，10x10）
-    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
-        tmp_path = f.name
-
-    out = cv2.VideoWriter(tmp_path, cv2.VideoWriter_fourcc(*"mp4v"), 10, (10, 10))
-    for _ in range(5):
-        out.write(np.zeros((10, 10, 3), dtype=np.uint8))
-    out.release()
-
-    with open(tmp_path, "rb") as f:
-        video_bytes = f.read()
-    os.unlink(tmp_path)
-
-    mock_storage.download_bytes.return_value = video_bytes
-    mock_storage.upload_bytes = AsyncMock(return_value=None)
-
-    with patch("app.services.video_service.httpx") as mock_httpx:
-        mock_client = AsyncMock()
-        mock_httpx.AsyncClient.return_value.__aenter__ = AsyncMock(return_value=mock_client)
-        mock_httpx.AsyncClient.return_value.__aexit__ = AsyncMock(return_value=False)
-        mock_client.post = AsyncMock()
-
-        await extract_frames_background(
-            file_path="video/1.mp4",
-            source_id=10,
-            job_id=42,
-            mode="interval",
-            frame_interval=1,
-            storage=mock_storage,
-            callback_url="http://backend/callback",
-        )
-
-        mock_client.post.assert_called_once()
-        call_kwargs = mock_client.post.call_args
-        payload = call_kwargs.kwargs.get("json") or call_kwargs.args[1] if len(call_kwargs.args) > 1 else call_kwargs.kwargs["json"]
-        assert payload["job_id"] == 42
-        assert payload["status"] == "SUCCESS"
-
-
-@pytest.mark.asyncio
-async def test_extract_frames_background_calls_callback_on_failure(mock_storage):
-    mock_storage.download_bytes.side_effect = Exception("storage error")
-
-    with patch("app.services.video_service.httpx") as mock_httpx:
-        mock_client = AsyncMock()
-        mock_httpx.AsyncClient.return_value.__aenter__ = AsyncMock(return_value=mock_client)
-        mock_httpx.AsyncClient.return_value.__aexit__ = AsyncMock(return_value=False)
-        mock_client.post = AsyncMock()
-
-        await extract_frames_background(
-            file_path="video/1.mp4",
-            source_id=10,
-            job_id=99,
-            mode="interval",
-            frame_interval=30,
-            storage=mock_storage,
-            callback_url="http://backend/callback",
-        )
-
-        mock_client.post.assert_called_once()
-        call_kwargs = mock_client.post.call_args
-        payload = call_kwargs.kwargs.get("json") or (call_kwargs.args[1] if len(call_kwargs.args) > 1 else {})
-        assert payload["status"] == "FAILED"
-        assert payload["job_id"] == 99
-```
-
-- [ ] **Step 3: 运行，确认失败**
-
-```bash
-conda run -n label pytest tests/test_video_service.py -v
-```
-
-Expected: `ImportError`
-
-- [ ] **Step 4: 实现 `app/services/video_service.py`**
-
-```python
-import base64
-import logging
-import tempfile
-import time
-from pathlib import Path
-
-import cv2
-import httpx
-import numpy as np
-
-from app.clients.llm.base import LLMClient
-from app.clients.storage.base import StorageClient
-from app.core.exceptions import LLMCallError
-from app.models.video_models import FrameInfo, VideoJobCallback
-
-logger = logging.getLogger(__name__)
-
-DEFAULT_VIDEO_TO_TEXT_PROMPT = """请分析这段视频的帧序列，用中文详细描述：
-1. 视频中出现的主要对象、设备、人物
-2. 发生的主要动作、操作步骤
-3. 场景的整体情况
-
-请输出结构化的文字描述，适合作为知识图谱构建的文本素材。"""
-
-
-def _is_scene_change(prev: np.ndarray, curr: np.ndarray, threshold: float = 30.0) -> bool:
-    """通过帧差分均值判断是否发生场景切换。"""
-    diff = cv2.absdiff(prev, curr)
-    return float(diff.mean()) > threshold
-
-
-def _extract_frames(
-    video_path: str, mode: str, frame_interval: int
-) -> list[tuple[int, float, bytes]]:
-    cap = cv2.VideoCapture(video_path)
-    fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
-    results = []
-    prev_gray = None
-    idx = 0
-
-    while True:
-        ret, frame = cap.read()
-        if not ret:
-            break
-        time_sec = idx / fps
-        if mode == "interval":
-            if idx % frame_interval == 0:
-                _, buf = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 90])
-                results.append((idx, time_sec, buf.tobytes()))
-        else:
-            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-            if prev_gray is None or _is_scene_change(prev_gray, gray):
-                _, buf = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 90])
-                results.append((idx, time_sec, buf.tobytes()))
-            prev_gray = gray
-        idx += 1
-
-    cap.release()
-    return results
-
-
-def _sample_frames_as_base64(
-    video_path: str, start_sec: float, end_sec: float, count: int
-) -> list[str]:
-    cap = cv2.VideoCapture(video_path)
-    fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
-    start_frame = int(start_sec * fps)
-    end_frame = int(end_sec * fps)
-    total = max(1, end_frame - start_frame)
-    step = max(1, total // count)
-    results = []
-    for i in range(count):
-        frame_pos = start_frame + i * step
-        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_pos)
-        ret, frame = cap.read()
-        if ret:
-            _, buf = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
-            results.append(base64.b64encode(buf.tobytes()).decode())
-    cap.release()
-    return results
-
-
-async def _send_callback(url: str, payload: VideoJobCallback) -> None:
-    async with httpx.AsyncClient(timeout=10) as client:
-        try:
-            await client.post(url, json=payload.model_dump())
-        except Exception as e:
-            logger.warning(f"回调失败 url={url}: {e}")
-
-
-async def extract_frames_background(
-    file_path: str,
-    source_id: int,
-    job_id: int,
-    mode: str,
-    frame_interval: int,
-    storage: StorageClient,
-    callback_url: str,
-    bucket: str = "source-data",
-) -> None:
-    try:
-        data = await storage.download_bytes(bucket, file_path)
-    except Exception as e:
-        await _send_callback(callback_url, VideoJobCallback(
-            job_id=job_id, status="FAILED", error_message=str(e)
-        ))
-        return
-
-    suffix = Path(file_path).suffix or ".mp4"
-    with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
-        tmp.write(data)
-        tmp_path = tmp.name
-
-    try:
-        frames = _extract_frames(tmp_path, mode, frame_interval)
-        frame_infos = []
-        for i, (frame_idx, time_sec, frame_data) in enumerate(frames):
-            frame_path = f"frames/{source_id}/{i}.jpg"
-            await storage.upload_bytes(bucket, frame_path, frame_data, "image/jpeg")
-            frame_infos.append(FrameInfo(
-                frame_index=frame_idx,
-                time_sec=round(time_sec, 3),
-                frame_path=frame_path,
-            ))
-        await _send_callback(callback_url, VideoJobCallback(
-            job_id=job_id, status="SUCCESS", frames=frame_infos
-        ))
-        logger.info(f"extract_frames job_id={job_id} frames={len(frame_infos)}")
-    except Exception as e:
-        logger.exception(f"extract_frames failed job_id={job_id}")
-        await _send_callback(callback_url, VideoJobCallback(
-            job_id=job_id, status="FAILED", error_message=str(e)
-        ))
-    finally:
-        Path(tmp_path).unlink(missing_ok=True)
-
-
-async def video_to_text_background(
-    file_path: str,
-    source_id: int,
-    job_id: int,
-    start_sec: float,
-    end_sec: float,
-    model: str,
-    prompt_template: str,
-    frame_sample_count: int,
-    llm: LLMClient,
-    storage: StorageClient,
-    callback_url: str,
-    bucket: str = "source-data",
-) -> None:
-    try:
-        data = await storage.download_bytes(bucket, file_path)
-    except Exception as e:
-        await _send_callback(callback_url, VideoJobCallback(
-            job_id=job_id, status="FAILED", error_message=str(e)
-        ))
-        return
-
-    suffix = Path(file_path).suffix or ".mp4"
-    with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
-        tmp.write(data)
-        tmp_path = tmp.name
-
-    try:
-        frames_b64 = _sample_frames_as_base64(tmp_path, start_sec, end_sec, frame_sample_count)
-        content: list = []
-        for b64 in frames_b64:
-            content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}})
-        content.append({
-            "type": "text",
-            "text": f"以上是视频第{start_sec}秒至{end_sec}秒的均匀采样帧。\n{prompt_template}",
-        })
-
-        messages = [
-            {"role": "system", "content": "你是专业的视频内容分析助手。"},
-            {"role": "user", "content": content},
-        ]
-
-        try:
-            description = await llm.chat_vision(messages, model)
-        except Exception as e:
-            raise LLMCallError(f"GLM-4V 调用失败: {e}") from e
-
-        timestamp = int(time.time())
-        output_path = f"video-text/{source_id}/{timestamp}.txt"
-        await storage.upload_bytes(bucket, output_path, description.encode("utf-8"), "text/plain")
-
-        await _send_callback(callback_url, VideoJobCallback(
-            job_id=job_id, status="SUCCESS", output_path=output_path
-        ))
-        logger.info(f"video_to_text job_id={job_id} output={output_path}")
-    except Exception as e:
-        logger.exception(f"video_to_text failed job_id={job_id}")
-        await _send_callback(callback_url, VideoJobCallback(
-            job_id=job_id, status="FAILED", error_message=str(e)
-        ))
-    finally:
-        Path(tmp_path).unlink(missing_ok=True)
-```
-
-- [ ] **Step 5: 运行，确认通过**
-
-```bash
-conda run -n label pytest tests/test_video_service.py -v
-```
-
-Expected: `4 passed`
-
-- [ ] **Step 6: Commit**
-
-```bash
-git add app/models/video_models.py app/services/video_service.py tests/test_video_service.py
-git commit -m "feat: video models and service with frame extraction and video-to-text"
-```
-
----
-
-## Task 13: Video Router
-
-**Files:**
-- Create: `app/routers/video.py`
-- Create: `tests/test_video_router.py`
-
-- [ ] **Step 1: 编写失败测试**
-
-`tests/test_video_router.py`:
-
-```python
-import pytest
-from fastapi.testclient import TestClient
-from app.main import app
-from app.core.dependencies import set_clients
-
-
-@pytest.fixture
-def client(mock_llm, mock_storage):
-    set_clients(mock_llm, mock_storage)
-    return TestClient(app)
-
-
-def test_extract_frames_returns_202(client, mock_storage):
-    mock_storage.get_object_size = AsyncMock(return_value=10 * 1024 * 1024)  # 10MB
-    resp = client.post("/api/v1/video/extract-frames", json={
-        "file_path": "video/202404/1.mp4",
-        "source_id": 10,
-        "job_id": 42,
-        "mode": "interval",
-        "frame_interval": 30,
-    })
-    assert resp.status_code == 202
-    assert resp.json()["job_id"] == 42
-    assert "后台处理中" in resp.json()["message"]
-
-
-def test_video_to_text_returns_202(client, mock_storage):
-    mock_storage.get_object_size = AsyncMock(return_value=10 * 1024 * 1024)  # 10MB
-    resp = client.post("/api/v1/video/to-text", json={
-        "file_path": "video/202404/1.mp4",
-        "source_id": 10,
-        "job_id": 43,
-        "start_sec": 0,
-        "end_sec": 60,
-    })
-    assert resp.status_code == 202
-    assert resp.json()["job_id"] == 43
-
-
-def test_extract_frames_rejects_oversized_video(client, mock_storage):
-    mock_storage.get_object_size = AsyncMock(return_value=300 * 1024 * 1024)  # 300MB > 200MB limit
-    resp = client.post("/api/v1/video/extract-frames", json={
-        "file_path": "video/202404/big.mp4",
-        "source_id": 10,
-        "job_id": 99,
-        "mode": "interval",
-        "frame_interval": 30,
-    })
-    assert resp.status_code == 400
-    assert "大小" in resp.json()["detail"]
-```
-
-- [ ] **Step 2: 实现 `app/routers/video.py`**
-
-```python
-from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
-
-from app.clients.llm.base import LLMClient
-from app.clients.storage.base import StorageClient
-from app.core.config import get_config
-from app.core.dependencies import get_llm_client, get_storage_client
-from app.models.video_models import (
-    ExtractFramesRequest,
-    ExtractFramesResponse,
-    VideoToTextRequest,
-    VideoToTextResponse,
-)
-from app.services import video_service
-
-router = APIRouter(tags=["Video"])
-
-
-async def _check_video_size(storage: StorageClient, bucket: str, file_path: str, max_mb: int) -> None:
-    """在触发后台任务前校验视频文件大小，超限时抛出 HTTP 400。"""
-    size_bytes = await storage.get_object_size(bucket, file_path)
-    if size_bytes > max_mb * 1024 * 1024:
-        raise HTTPException(
-            status_code=400,
-            detail=f"视频文件大小超出限制（最大 {max_mb}MB，当前 {size_bytes // 1024 // 1024}MB）",
-        )
-
-
-@router.post("/video/extract-frames", response_model=ExtractFramesResponse, status_code=202)
-async def extract_frames(
-    req: ExtractFramesRequest,
-    background_tasks: BackgroundTasks,
-    storage: StorageClient = Depends(get_storage_client),
-):
-    cfg = get_config()
-    bucket = cfg["storage"]["buckets"]["source_data"]
-    await _check_video_size(storage, bucket, req.file_path, cfg["video"]["max_file_size_mb"])
-    background_tasks.add_task(
-        video_service.extract_frames_background,
-        file_path=req.file_path,
-        source_id=req.source_id,
-        job_id=req.job_id,
-        mode=req.mode,
-        frame_interval=req.frame_interval,
-        storage=storage,
-        callback_url=cfg["backend"]["callback_url"],
-        bucket=bucket,
-    )
-    return ExtractFramesResponse(message="任务已接受，后台处理中", job_id=req.job_id)
-
-
-@router.post("/video/to-text", response_model=VideoToTextResponse, status_code=202)
-async def video_to_text(
-    req: VideoToTextRequest,
-    background_tasks: BackgroundTasks,
-    llm: LLMClient = Depends(get_llm_client),
-    storage: StorageClient = Depends(get_storage_client),
-):
-    cfg = get_config()
-    bucket = cfg["storage"]["buckets"]["source_data"]
-    await _check_video_size(storage, bucket, req.file_path, cfg["video"]["max_file_size_mb"])
-    model = req.model or cfg["models"]["default_vision"]
-    prompt = req.prompt_template or video_service.DEFAULT_VIDEO_TO_TEXT_PROMPT
-    background_tasks.add_task(
-        video_service.video_to_text_background,
-        file_path=req.file_path,
-        source_id=req.source_id,
-        job_id=req.job_id,
-        start_sec=req.start_sec,
-        end_sec=req.end_sec,
-        model=model,
-        prompt_template=prompt,
-        frame_sample_count=cfg["video"]["frame_sample_count"],
-        llm=llm,
-        storage=storage,
-        callback_url=cfg["backend"]["callback_url"],
-        bucket=bucket,
-    )
-    return VideoToTextResponse(message="任务已接受，后台处理中", job_id=req.job_id)
-```
-
-- [ ] **Step 3: 在 `app/main.py` 注册路由**
-
-```python
-from app.routers import text, image, video
-app.include_router(video.router, prefix="/api/v1")
-```
-
-- [ ] **Step 4: 运行测试**
-
-```bash
-conda run -n label pytest tests/test_video_router.py -v
-```
-
-Expected: `3 passed`
-
-- [ ] **Step 5: Commit**
-
-```bash
-git add app/routers/video.py tests/test_video_router.py app/main.py
-git commit -m "feat: video router POST /api/v1/video/extract-frames and /to-text"
-```
-
----
-
-## Task 14: QA Models + Service
-
-**Files:**
-- Create: `app/models/qa_models.py`
-- Create: `app/services/qa_service.py`
-- Create: `tests/test_qa_service.py`
-
-- [ ] **Step 1: 实现 `app/models/qa_models.py`**
-
-```python
-from pydantic import BaseModel
-
-
-class TextTripleForQA(BaseModel):
-    subject: str
-    predicate: str
-    object: str
-    source_snippet: str
-
-
-class TextQARequest(BaseModel):
-    items: list[TextTripleForQA]
-    model: str | None = None
-    prompt_template: str | None = None
-
-
-class QAPair(BaseModel):
-    question: str
-    answer: str
-
-
-class TextQAResponse(BaseModel):
-    pairs: list[QAPair]
-
-
-class ImageQuadrupleForQA(BaseModel):
-    subject: str
-    predicate: str
-    object: str
-    qualifier: str
-    cropped_image_path: str
-
-
-class ImageQARequest(BaseModel):
-    items: list[ImageQuadrupleForQA]
-    model: str | None = None
-    prompt_template: str | None = None
-
-
-class ImageQAPair(BaseModel):
-    question: str
-    answer: str
-    image_path: str
-
-
-class ImageQAResponse(BaseModel):
-    pairs: list[ImageQAPair]
-```
-
-- [ ] **Step 2: 编写失败测试**
-
-`tests/test_qa_service.py`:
-
-```python
-import pytest
-from app.services.qa_service import gen_text_qa, gen_image_qa, _parse_qa_pairs
-from app.models.qa_models import TextTripleForQA, ImageQuadrupleForQA
-from app.core.exceptions import LLMResponseParseError, LLMCallError
-
-QA_JSON = '[{"question":"变压器额定电压是多少？","answer":"110kV"}]'
-
-
-def test_parse_qa_pairs_plain_json():
-    result = _parse_qa_pairs(QA_JSON)
-    assert len(result) == 1
-    assert result[0].question == "变压器额定电压是多少？"
-
-
-def test_parse_qa_pairs_markdown_wrapped():
-    result = _parse_qa_pairs(f"```json\n{QA_JSON}\n```")
-    assert len(result) == 1
-
-
-def test_parse_qa_pairs_invalid_raises():
-    with pytest.raises(LLMResponseParseError):
-        _parse_qa_pairs("这不是JSON")
-
-
-@pytest.mark.asyncio
-async def test_gen_text_qa(mock_llm):
-    mock_llm.chat.return_value = QA_JSON
-    items = [TextTripleForQA(subject="变压器", predicate="额定电压", object="110kV", source_snippet="额定电压为110kV")]
-
-    result = await gen_text_qa(items=items, model="glm-4-flash", prompt_template="", llm=mock_llm)
-    assert len(result) == 1
-    assert result[0].answer == "110kV"
-
-
-@pytest.mark.asyncio
-async def test_gen_text_qa_llm_error(mock_llm):
-    mock_llm.chat.side_effect = Exception("network error")
-    items = [TextTripleForQA(subject="A", predicate="B", object="C", source_snippet="ABC")]
-
-    with pytest.raises(LLMCallError):
-        await gen_text_qa(items=items, model="glm-4-flash", prompt_template="", llm=mock_llm)
-
-
-@pytest.mark.asyncio
-async def test_gen_image_qa(mock_llm, mock_storage):
-    mock_llm.chat_vision.return_value = '[{"question":"图中是什么？","answer":"电缆接头"}]'
-    mock_storage.download_bytes.return_value = b"fake-image-bytes"
-    items = [ImageQuadrupleForQA(
-        subject="电缆接头", predicate="位于", object="配电箱", qualifier="", cropped_image_path="crops/1/0.jpg"
-    )]
-
-    result = await gen_image_qa(items=items, model="glm-4v-flash", prompt_template="", llm=mock_llm, storage=mock_storage)
-    assert len(result) == 1
-    assert result[0].image_path == "crops/1/0.jpg"
-    # 验证使用 download_bytes（base64），而非 presigned URL
-    mock_storage.download_bytes.assert_called_once_with("source-data", "crops/1/0.jpg")
-    # 验证发送给 GLM-4V 的消息包含 base64 data URL
-    call_messages = mock_llm.chat_vision.call_args[0][0]
-    image_content = call_messages[1]["content"][0]
-    assert image_content["image_url"]["url"].startswith("data:image/jpeg;base64,")
-```
-
-- [ ] **Step 3: 运行，确认失败**
-
-```bash
-conda run -n label pytest tests/test_qa_service.py -v
-```
-
-Expected: `ImportError`
-
-- [ ] **Step 4: 实现 `app/services/qa_service.py`**
-
-```python
-import base64
-import json
-import logging
-
-from app.clients.llm.base import LLMClient
-from app.clients.storage.base import StorageClient
-from app.core.exceptions import LLMCallError, LLMResponseParseError, StorageDownloadError
-from app.core.json_utils import parse_json_response
-from app.models.qa_models import (
-    ImageQAPair,
-    ImageQuadrupleForQA,
-    QAPair,
-    TextTripleForQA,
-)
-
-logger = logging.getLogger(__name__)
-
-DEFAULT_TEXT_QA_PROMPT = """基于以下知识三元组和原文证据片段，生成高质量问答对。
-要求：
-1. 问题自然、具体，不能过于宽泛
-2. 答案基于原文片段，语言流畅
-3. 每个三元组生成1-2个问答对
-
-以 JSON 数组格式返回：[{"question":"...","answer":"..."}]
-
-三元组数据：
-"""
-
-DEFAULT_IMAGE_QA_PROMPT = """基于图片内容和以下四元组信息，生成高质量图文问答对。
-要求：
-1. 问题需要结合图片才能回答
-2. 答案基于图片中的实际内容
-3. 每个四元组生成1个问答对
-
-以 JSON 数组格式返回：[{"question":"...","answer":"..."}]
-
-四元组信息：
-"""
-
-
-def _parse_qa_pairs(raw: str) -> list[QAPair]:
-    items_raw = parse_json_response(raw)
-    result = []
-    for item in items_raw:
-        try:
-            result.append(QAPair(question=item["question"], answer=item["answer"]))
-        except KeyError as e:
-            logger.warning(f"跳过不完整问答对: {item}, error: {e}")
-    return result
-
-
-async def gen_text_qa(
-    items: list[TextTripleForQA],
-    model: str,
-    prompt_template: str,
-    llm: LLMClient,
-) -> list[QAPair]:
-    triples_text = json.dumps([i.model_dump() for i in items], ensure_ascii=False, indent=2)
-    messages = [
-        {"role": "system", "content": "你是专业的知识问答对生成助手。"},
-        {"role": "user", "content": (prompt_template or DEFAULT_TEXT_QA_PROMPT) + triples_text},
-    ]
-    try:
-        raw = await llm.chat(messages, model)
-    except Exception as e:
-        raise LLMCallError(f"GLM 调用失败: {e}") from e
-    logger.info(f"gen_text_qa model={model} items={len(items)}")
-    return _parse_qa_pairs(raw)
-
-
-async def gen_image_qa(
-    items: list[ImageQuadrupleForQA],
-    model: str,
-    prompt_template: str,
-    llm: LLMClient,
-    storage: StorageClient,
-    bucket: str = "source-data",
-) -> list[ImageQAPair]:
-    result = []
-    prompt = prompt_template or DEFAULT_IMAGE_QA_PROMPT
-    for item in items:
-        # 下载裁剪图并 base64 编码：RustFS 为内网部署，presigned URL 无法被云端 GLM-4V 访问
-        try:
-            image_bytes = await storage.download_bytes(bucket, item.cropped_image_path)
-        except Exception as e:
-            raise StorageDownloadError(f"下载裁剪图失败 {item.cropped_image_path}: {e}") from e
-        b64 = base64.b64encode(image_bytes).decode()
-        quad_text = json.dumps(
-            {k: v for k, v in item.model_dump().items() if k != "cropped_image_path"},
-            ensure_ascii=False,
-        )
-        messages = [
-            {"role": "system", "content": "你是专业的视觉问答对生成助手。"},
-            {"role": "user", "content": [
-                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}},
-                {"type": "text", "text": prompt + quad_text},
-            ]},
-        ]
-        try:
-            raw = await llm.chat_vision(messages, model)
-        except Exception as e:
-            raise LLMCallError(f"GLM-4V 调用失败: {e}") from e
-        for pair in _parse_qa_pairs(raw):
-            result.append(ImageQAPair(question=pair.question, answer=pair.answer, image_path=item.cropped_image_path))
-    logger.info(f"gen_image_qa model={model} items={len(items)} pairs={len(result)}")
-    return result
-```
-
-- [ ] **Step 5: 运行，确认通过**
-
-```bash
-conda run -n label pytest tests/test_qa_service.py -v
-```
-
-Expected: `6 passed`
-
-- [ ] **Step 6: Commit**
-
-```bash
-git add app/models/qa_models.py app/services/qa_service.py tests/test_qa_service.py
-git commit -m "feat: QA models and service for text and image QA generation"
-```
-
----
-
-## Task 15: QA Router
-
-**Files:**
-- Create: `app/routers/qa.py`
-- Create: `tests/test_qa_router.py`
-
-- [ ] **Step 1: 编写失败测试**
-
-`tests/test_qa_router.py`:
-
-```python
-import pytest
-from fastapi.testclient import TestClient
-from unittest.mock import AsyncMock
-from app.main import app
-from app.core.dependencies import set_clients
-
-
-@pytest.fixture
-def client(mock_llm, mock_storage):
-    set_clients(mock_llm, mock_storage)
-    return TestClient(app)
-
-
-def test_gen_text_qa_success(client, mock_llm):
-    mock_llm.chat = AsyncMock(return_value='[{"question":"额定电压？","answer":"110kV"}]')
-    resp = client.post("/api/v1/qa/gen-text", json={
-        "items": [{"subject": "变压器", "predicate": "额定电压", "object": "110kV", "source_snippet": "额定电压为110kV"}],
-    })
-    assert resp.status_code == 200
-    assert resp.json()["pairs"][0]["question"] == "额定电压？"
-
-
-def test_gen_image_qa_success(client, mock_llm, mock_storage):
-    mock_llm.chat_vision = AsyncMock(return_value='[{"question":"图中是什么？","answer":"接头"}]')
-    mock_storage.get_presigned_url.return_value = "https://example.com/crop.jpg"
-    resp = client.post("/api/v1/qa/gen-image", json={
-        "items": [{"subject": "A", "predicate": "B", "object": "C", "qualifier": "", "cropped_image_path": "crops/1/0.jpg"}],
-    })
-    assert resp.status_code == 200
-    data = resp.json()
-    assert data["pairs"][0]["image_path"] == "crops/1/0.jpg"
-```
-
-- [ ] **Step 2: 实现 `app/routers/qa.py`**
-
-```python
-from fastapi import APIRouter, Depends
-
-from app.clients.llm.base import LLMClient
-from app.clients.storage.base import StorageClient
-from app.core.config import get_config
-from app.core.dependencies import get_llm_client, get_storage_client
-from app.models.qa_models import ImageQARequest, ImageQAResponse, TextQARequest, TextQAResponse
-from app.services import qa_service
-
-router = APIRouter(tags=["QA"])
-
-
-@router.post("/qa/gen-text", response_model=TextQAResponse)
-async def gen_text_qa(
-    req: TextQARequest,
-    llm: LLMClient = Depends(get_llm_client),
-):
-    cfg = get_config()
-    pairs = await qa_service.gen_text_qa(
-        items=req.items,
-        model=req.model or cfg["models"]["default_text"],
-        prompt_template=req.prompt_template or qa_service.DEFAULT_TEXT_QA_PROMPT,
-        llm=llm,
-    )
-    return TextQAResponse(pairs=pairs)
-
-
-@router.post("/qa/gen-image", response_model=ImageQAResponse)
-async def gen_image_qa(
-    req: ImageQARequest,
-    llm: LLMClient = Depends(get_llm_client),
-    storage: StorageClient = Depends(get_storage_client),
-):
-    cfg = get_config()
-    pairs = await qa_service.gen_image_qa(
-        items=req.items,
-        model=req.model or cfg["models"]["default_vision"],
-        prompt_template=req.prompt_template or qa_service.DEFAULT_IMAGE_QA_PROMPT,
-        llm=llm,
-        storage=storage,
-        bucket=cfg["storage"]["buckets"]["source_data"],
-    )
-    return ImageQAResponse(pairs=pairs)
-```
-
-- [ ] **Step 3: 在 `app/main.py` 注册路由**
-
-```python
-from app.routers import text, image, video, qa
-app.include_router(qa.router, prefix="/api/v1")
-```
-
-- [ ] **Step 4: 运行测试**
-
-```bash
-conda run -n label pytest tests/test_qa_router.py -v
-```
-
-Expected: `2 passed`
-
-- [ ] **Step 5: Commit**
-
-```bash
-git add app/routers/qa.py tests/test_qa_router.py app/main.py
-git commit -m "feat: QA router POST /api/v1/qa/gen-text and /gen-image"
-```
-
----
-
-## Task 16: Finetune Models + Service + Router
-
-**Files:**
-- Create: `app/models/finetune_models.py`
-- Create: `app/services/finetune_service.py`
-- Create: `app/routers/finetune.py`
-- Create: `tests/test_finetune_service.py`
-- Create: `tests/test_finetune_router.py`
-
-- [ ] **Step 1: 实现 `app/models/finetune_models.py`**
-
-```python
-from pydantic import BaseModel
-
-
-class FinetuneHyperparams(BaseModel):
-    learning_rate: float = 1e-4
-    epochs: int = 3
-
-
-class FinetuneStartRequest(BaseModel):
-    jsonl_url: str
-    base_model: str
-    hyperparams: FinetuneHyperparams = FinetuneHyperparams()
-
-
-class FinetuneStartResponse(BaseModel):
-    job_id: str
-
-
-class FinetuneStatusResponse(BaseModel):
-    job_id: str
-    status: str             # RUNNING | SUCCESS | FAILED
-    progress: int | None = None
-    error_message: str | None = None
-```
-
-- [ ] **Step 2: 编写失败测试**
-
-`tests/test_finetune_service.py`:
-
-```python
-import pytest
-from unittest.mock import MagicMock
-from app.services.finetune_service import start_finetune, get_finetune_status
-from app.models.finetune_models import FinetuneHyperparams
-
-
-@pytest.mark.asyncio
-async def test_start_finetune():
-    mock_job = MagicMock()
-    mock_job.id = "glm-ft-abc123"
-    mock_zhipuai = MagicMock()
-    mock_zhipuai.fine_tuning.jobs.create.return_value = mock_job
-
-    result = await start_finetune(
-        jsonl_url="https://example.com/export.jsonl",
-        base_model="glm-4-flash",
-        hyperparams=FinetuneHyperparams(learning_rate=1e-4, epochs=3),
-        client=mock_zhipuai,
-    )
-    assert result == "glm-ft-abc123"
-    mock_zhipuai.fine_tuning.jobs.create.assert_called_once()
-
-
-@pytest.mark.asyncio
-async def test_get_finetune_status_running():
-    mock_job = MagicMock()
-    mock_job.status = "running"
-    mock_job.progress = 50
-    mock_job.error = None
-    mock_zhipuai = MagicMock()
-    mock_zhipuai.fine_tuning.jobs.retrieve.return_value = mock_job
-
-    result = await get_finetune_status("glm-ft-abc123", mock_zhipuai)
-    assert result.status == "RUNNING"
-    assert result.progress == 50
-    assert result.job_id == "glm-ft-abc123"
-
-
-@pytest.mark.asyncio
-async def test_get_finetune_status_success():
-    mock_job = MagicMock()
-    mock_job.status = "succeeded"
-    mock_job.progress = 100
-    mock_job.error = None
-    mock_zhipuai = MagicMock()
-    mock_zhipuai.fine_tuning.jobs.retrieve.return_value = mock_job
-
-    result = await get_finetune_status("glm-ft-abc123", mock_zhipuai)
-    assert result.status == "SUCCESS"
-```
-
-- [ ] **Step 3: 运行，确认失败**
-
-```bash
-conda run -n label pytest tests/test_finetune_service.py -v
-```
-
-Expected: `ImportError`
-
-- [ ] **Step 4: 实现 `app/services/finetune_service.py`**
-
-```python
-import logging
-
-from app.models.finetune_models import FinetuneHyperparams, FinetuneStatusResponse
-
-logger = logging.getLogger(__name__)
-
-_STATUS_MAP = {
-    "running": "RUNNING",
-    "succeeded": "SUCCESS",
-    "failed": "FAILED",
-}
-
-
-async def start_finetune(
-    jsonl_url: str,
-    base_model: str,
-    hyperparams: FinetuneHyperparams,
-    client,  # ZhipuAI SDK client instance
-) -> str:
-    job = client.fine_tuning.jobs.create(
-        training_file=jsonl_url,
-        model=base_model,
-        hyperparameters={
-            "learning_rate_multiplier": hyperparams.learning_rate,
-            "n_epochs": hyperparams.epochs,
-        },
-    )
-    logger.info(f"finetune_start job_id={job.id} model={base_model}")
-    return job.id
-
-
-async def get_finetune_status(job_id: str, client) -> FinetuneStatusResponse:
-    job = client.fine_tuning.jobs.retrieve(job_id)
-    status = _STATUS_MAP.get(job.status, "RUNNING")
-    return FinetuneStatusResponse(
-        job_id=job_id,
-        status=status,
-        progress=getattr(job, "progress", None),
-        error_message=getattr(job, "error", None),
-    )
-```
-
-- [ ] **Step 5: 运行，确认通过**
-
-```bash
-conda run -n label pytest tests/test_finetune_service.py -v
-```
-
-Expected: `3 passed`
-
-- [ ] **Step 6: 实现 `app/routers/finetune.py`**
-
-```python
-from fastapi import APIRouter, Depends
-
-from app.clients.llm.base import LLMClient
-from app.clients.llm.zhipuai_client import ZhipuAIClient
-from app.core.dependencies import get_llm_client
-from app.models.finetune_models import (
-    FinetuneStartRequest,
-    FinetuneStartResponse,
-    FinetuneStatusResponse,
-)
-from app.services import finetune_service
-
-router = APIRouter(tags=["Finetune"])
-
-
-def _get_zhipuai(llm: LLMClient = Depends(get_llm_client)) -> ZhipuAIClient:
-    if not isinstance(llm, ZhipuAIClient):
-        raise RuntimeError("微调功能仅支持 ZhipuAI 后端")
-    return llm
-
-
-@router.post("/finetune/start", response_model=FinetuneStartResponse)
-async def start_finetune(
-    req: FinetuneStartRequest,
-    llm: ZhipuAIClient = Depends(_get_zhipuai),
-):
-    job_id = await finetune_service.start_finetune(
-        jsonl_url=req.jsonl_url,
-        base_model=req.base_model,
-        hyperparams=req.hyperparams,
-        client=llm._client,
-    )
-    return FinetuneStartResponse(job_id=job_id)
-
-
-@router.get("/finetune/status/{job_id}", response_model=FinetuneStatusResponse)
-async def get_finetune_status(
-    job_id: str,
-    llm: ZhipuAIClient = Depends(_get_zhipuai),
-):
-    return await finetune_service.get_finetune_status(job_id, llm._client)
-```
-
-- [ ] **Step 7: 编写路由测试**
-
-`tests/test_finetune_router.py`:
-
-```python
-import pytest
-from fastapi.testclient import TestClient
-from unittest.mock import MagicMock, patch
-from app.main import app
-from app.core.dependencies import set_clients
-from app.clients.llm.zhipuai_client import ZhipuAIClient
-from app.clients.storage.base import StorageClient
-
-
-@pytest.fixture
-def client(mock_storage):
-    with patch("app.clients.llm.zhipuai_client.ZhipuAI") as MockZhipuAI:
-        mock_sdk = MagicMock()
-        MockZhipuAI.return_value = mock_sdk
-        llm = ZhipuAIClient(api_key="test-key")
-        llm._mock_sdk = mock_sdk
-        set_clients(llm, mock_storage)
-        yield TestClient(app), mock_sdk
-
-
-def test_start_finetune(client):
-    test_client, mock_sdk = client
-    mock_job = MagicMock()
-    mock_job.id = "glm-ft-xyz"
-    mock_sdk.fine_tuning.jobs.create.return_value = mock_job
-
-    resp = test_client.post("/api/v1/finetune/start", json={
-        "jsonl_url": "https://example.com/export.jsonl",
-        "base_model": "glm-4-flash",
-        "hyperparams": {"learning_rate": 1e-4, "epochs": 3},
-    })
-    assert resp.status_code == 200
-    assert resp.json()["job_id"] == "glm-ft-xyz"
-
-
-def test_get_finetune_status(client):
-    test_client, mock_sdk = client
-    mock_job = MagicMock()
-    mock_job.status = "running"
-    mock_job.progress = 30
-    mock_job.error = None
-    mock_sdk.fine_tuning.jobs.retrieve.return_value = mock_job
-
-    resp = test_client.get("/api/v1/finetune/status/glm-ft-xyz")
-    assert resp.status_code == 200
-    data = resp.json()
-    assert data["status"] == "RUNNING"
-    assert data["progress"] == 30
-```
-
-- [ ] **Step 8: 在 `app/main.py` 注册路由（最终状态）**
-
-```python
-from app.routers import text, image, video, qa, finetune
-
-app.include_router(text.router, prefix="/api/v1")
-app.include_router(image.router, prefix="/api/v1")
-app.include_router(video.router, prefix="/api/v1")
-app.include_router(qa.router, prefix="/api/v1")
-app.include_router(finetune.router, prefix="/api/v1")
-```
-
-- [ ] **Step 9: 运行全部测试**
-
-```bash
-conda run -n label pytest tests/ -v
-```
-
-Expected: 所有测试通过，无失败
-
-- [ ] **Step 10: Commit**
-
-```bash
-git add app/models/finetune_models.py app/services/finetune_service.py app/routers/finetune.py tests/test_finetune_service.py tests/test_finetune_router.py app/main.py
-git commit -m "feat: finetune models, service, and router - complete all endpoints"
-```
-
----
-
-## Task 17: 部署文件
-
-**Files:**
-- Create: `Dockerfile`
-- Create: `docker-compose.yml`
-
-- [ ] **Step 1: 创建 `Dockerfile`**
-
-```dockerfile
-FROM python:3.12-slim
-
-WORKDIR /app
-
-# OpenCV 系统依赖
-RUN apt-get update && apt-get install -y \
-    libgl1 \
-    libglib2.0-0 \
-    && rm -rf /var/lib/apt/lists/*
-
-COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
-
-COPY app/ ./app/
-COPY config.yaml .
-COPY .env .
-
-EXPOSE 8000
-
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
-```
-
-- [ ] **Step 2: 创建 `docker-compose.yml`**
-
-```yaml
-version: "3.9"
-
-services:
-  ai-service:
-    build: .
-    ports:
-      - "8000:8000"
-    env_file:
-      - .env
-    depends_on:
-      - rustfs
-    networks:
-      - label-net
-    restart: unless-stopped
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
-      interval: 30s
-      timeout: 5s
-      retries: 3
-      start_period: 10s
-
-  rustfs:
-    image: minio/minio:latest
-    command: server /data --console-address ":9001"
-    ports:
-      - "9000:9000"
-      - "9001:9001"
-    environment:
-      MINIO_ROOT_USER: minioadmin
-      MINIO_ROOT_PASSWORD: minioadmin
-    volumes:
-      - rustfs-data:/data
-    networks:
-      - label-net
-
-volumes:
-  rustfs-data:
-
-networks:
-  label-net:
-    driver: bridge
-```
-
-- [ ] **Step 3: 验证 Docker 构建**
-
-```bash
-docker build -t label-ai-service:dev .
-```
-
-Expected: 镜像构建成功，无错误
-
-- [ ] **Step 4: 运行全量测试，最终确认**
-
-```bash
-conda run -n label pytest tests/ -v --tb=short
-```
-
-Expected: 所有测试通过
-
-- [ ] **Step 5: Commit**
-
-```bash
-git add Dockerfile docker-compose.yml
-git commit -m "feat: Dockerfile and docker-compose for containerized deployment"
-```
-
----
-
-## 自审检查结果
-
-**Spec coverage:**
-- ✅ 文本三元组提取（TXT/PDF/DOCX）— Task 8-9
-- ✅ 图像四元组提取 + bbox 裁剪 — Task 10-11
-- ✅ 视频帧提取（interval/keyframe）— Task 12-13
-- ✅ 视频转文本（BackgroundTask）— Task 12-13
-- ✅ 文本问答对生成 — Task 14-15
-- ✅ 图像问答对生成 — Task 14-15
-- ✅ 微调任务提交与状态查询 — Task 16
-- ✅ LLMClient / StorageClient ABC 适配层 — Task 4-5
-- ✅ config.yaml + .env 分层配置 — Task 2
-- ✅ 结构化日志 + 请求日志 — Task 3
-- ✅ 全局异常处理 — Task 3
-- ✅ Swagger 文档（FastAPI 自动生成） — Task 6
-- ✅ Dockerfile + docker-compose — Task 17
-- ✅ pytest 测试覆盖全部 service 和 router — 各 Task
-
-**类型一致性：** `TripleItem.source_offset` 在 Task 7 定义，Task 8 使用；`VideoJobCallback` 在 Task 12 定义，Task 12 service 使用 — 一致。
-
-**占位符：** 无 TBD / TODO，所有步骤均含完整代码。
-
----
-
-## GSTACK REVIEW REPORT
-
-| Review | Trigger | Why | Runs | Status | Findings |
-|--------|---------|-----|------|--------|----------|
-| Eng Review | `/plan-eng-review` | Architecture & tests (required) | 1 | ✅ DONE | 4 architecture issues found and resolved (see below) |
-| CEO Review | `/plan-ceo-review` | Scope & strategy | 0 | — | — |
-| Codex Review | `/codex review` | Independent 2nd opinion | 0 | — | — |
-| Design Review | `/plan-design-review` | UI/UX gaps | 0 | — | N/A（纯后端服务） |
-
-**VERDICT:** ENG REVIEW COMPLETE
-
-### Eng Review 发现和处理结果
-
-| # | 问题 | 决策 | 影响文件 |
-|---|------|------|---------|
-| 1 | `asyncio.get_event_loop()` 在 Python 3.10+ async 上下文中已弃用 | **保持不变**（用户明确选择） | Task 4, Task 5 |
-| 2 | 图像 QA 使用 presigned URL，但 RustFS 内网地址云端 GLM-4V 不可达 | **改为 base64**：download_bytes → base64 encode | Task 14 qa_service, test |
-| 3 | 缺少 `/health` 健康检查端点 | **已添加**：`GET /health` + Docker healthcheck | Task 6 main.py, Task 17 docker-compose |
-| 4 | 视频全量下载到内存，大文件 OOM 风险 | **加文件大小限制**：`get_object_size()` 路由层校验，可通过 `MAX_VIDEO_SIZE_MB` env 配置 | Task 5 StorageClient, Task 13 router |
-
-### 代码质量修复
-
-| 问题 | 修复 |
-|------|------|
-| `image_service.py` 中 `except (KeyError, TypeError, Exception)` 冗余 | 改为 `except Exception` |
-| `test_qa_service.py` 断言 `get_presigned_url` 被调用 | 改为断言 `download_bytes` 调用 + 验证 base64 data URL |
diff --git a/docs/superpowers/specs/2026-04-10-ai-service-design.md b/docs/superpowers/specs/2026-04-10-ai-service-design.md
deleted file mode 100644
index 55b01bb..0000000
--- a/docs/superpowers/specs/2026-04-10-ai-service-design.md
+++ /dev/null
@@ -1,835 +0,0 @@
-# 知识图谱智能标注平台 — AI 服务设计文档
-
-> 版本：v1.0 | 日期：2026-04-10  
-> 运行时：Python 3.12.13（conda `label` 环境）| 框架：FastAPI  
-> 上游系统：label-backend（Java Spring Boot）| 模型：ZhipuAI GLM 系列
-
----
-
-## 一、项目定位
-
-AI 服务（`label_ai_service`）是标注平台的智能计算层，独立部署为 Python FastAPI 服务，接收 Java 后端调用，完成以下核心任务：
-
-| 能力 | 说明 |
-|------|------|
-| 文本三元组提取 | 从 TXT / PDF / DOCX 文档中提取 subject / predicate / object + 原文定位信息 |
-| 图像四元组提取 | 调用 GLM-4V 分析图片，提取四元组 + bbox 坐标，自动裁剪区域图 |
-| 视频帧提取 | OpenCV 按间隔或关键帧模式抽帧，帧图上传 RustFS |
-| 视频转文本 | GLM-4V 理解视频片段，输出结构化文字描述，降维为文本标注流程 |
-| 问答对生成 | 基于三元组/四元组 + 原文/图像证据，生成 GLM 微调格式候选问答对 |
-| 微调任务管理 | 向 ZhipuAI 提交微调任务、查询状态 |
-
-系统只有两条标注流水线（文本线、图片线），视频是两种预处理入口，不构成第三条流水线。
-
----
-
-## 二、整体架构
-
-### 2.1 在平台中的位置
-
-```
-                    ┌─────────────┐
-                    │  Nginx 反代  │
-                    └──────┬──────┘
-             ┌─────────────┼─────────────┐
-             ▼             ▼             ▼
-        ┌─────────┐  ┌──────────┐  ┌──────────┐
-        │ Vue3 前端│  │ Spring   │  │ FastAPI  │
-        │ (静态)   │  │ Boot 后端 │  │ AI 服务  │◄── 本文档范围
-        └─────────┘  └────┬─────┘  └────┬─────┘
-                          │              │
-              ┌───────────┼──────────────┤
-              ▼           ▼              ▼
-        ┌──────────┐ ┌────────┐  ┌────────────┐
-        │PostgreSQL│ │ Redis  │  │   RustFS   │
-        └──────────┘ └────────┘  └────────────┘
-```
-
-AI 服务**不直接访问数据库**，只通过：
-- **RustFS S3 API**：读取原始文件、写入处理结果
-- **ZhipuAI API**：调用 GLM 系列模型
-- **Java 后端回调接口**：视频异步任务完成后回传结果
-
-### 2.2 目录结构
-
-```
-label_ai_service/
-├── app/
-│   ├── main.py                        # FastAPI 应用入口，注册路由、lifespan
-│   ├── core/
-│   │   ├── config.py                  # YAML + .env 分层配置，lru_cache 单例
-│   │   ├── logging.py                 # 统一结构化日志配置
-│   │   ├── exceptions.py              # 自定义异常类 + 全局异常处理器
-│   │   └── dependencies.py            # FastAPI Depends 工厂函数
-│   ├── clients/
-│   │   ├── llm/
-│   │   │   ├── base.py                # LLMClient ABC（抽象接口）
-│   │   │   └── zhipuai_client.py      # ZhipuAI 实现
-│   │   └── storage/
-│   │       ├── base.py                # StorageClient ABC（抽象接口）
-│   │       └── rustfs_client.py       # RustFS S3 兼容实现（boto3）
-│   ├── services/
-│   │   ├── text_service.py            # 文档解析 + 三元组提取
-│   │   ├── image_service.py           # 图像四元组提取 + bbox 裁剪
-│   │   ├── video_service.py           # OpenCV 抽帧 + 视频转文本
-│   │   ├── qa_service.py              # 文本/图像问答对生成
-│   │   └── finetune_service.py        # 微调任务提交与状态查询
-│   ├── routers/
-│   │   ├── text.py                    # POST /api/v1/text/extract
-│   │   ├── image.py                   # POST /api/v1/image/extract
-│   │   ├── video.py                   # POST /api/v1/video/extract-frames
-│   │   │                              # POST /api/v1/video/to-text
-│   │   ├── qa.py                      # POST /api/v1/qa/gen-text
-│   │   │                              # POST /api/v1/qa/gen-image
-│   │   └── finetune.py                # POST /api/v1/finetune/start
-│   │                                  # GET  /api/v1/finetune/status/{jobId}
-│   └── models/
-│       ├── text_models.py             # 三元组请求/响应 schema
-│       ├── image_models.py            # 四元组请求/响应 schema
-│       ├── video_models.py            # 视频处理请求/响应 schema
-│       ├── qa_models.py               # 问答对请求/响应 schema
-│       └── finetune_models.py         # 微调请求/响应 schema
-├── config.yaml                        # 非敏感配置（提交 git）
-├── .env                               # 密钥与环境差异项（提交 git）
-├── requirements.txt
-├── Dockerfile
-└── docker-compose.yml
-```
-
----
-
-## 三、配置设计
-
-### 3.1 分层配置原则
-
-| 文件 | 职责 | 提交 git |
-|------|------|----------|
-| `config.yaml` | 稳定配置：端口、路径规范、模型名、桶名、视频参数 | ✅ |
-| `.env` | 环境差异项：密钥、服务地址 | ✅ |
-
-环境变量优先级高于 `config.yaml`，Docker Compose 通过 `env_file` 加载 `.env`，本地开发由 `python-dotenv` 加载。
-
-### 3.2 `config.yaml`
-
-```yaml
-server:
-  port: 8000
-  log_level: INFO
-
-storage:
-  buckets:
-    source_data: "source-data"
-    finetune_export: "finetune-export"
-
-backend: {}   # callback_url 由 .env 注入
-
-video:
-  frame_sample_count: 8    # 视频转文本时均匀抽取的代表帧数
-  max_file_size_mb: 200    # 视频文件大小上限（超过则拒绝，防止 OOM）
-
-models:
-  default_text: "glm-4-flash"
-  default_vision: "glm-4v-flash"
-```
-
-### 3.3 `.env`
-
-```ini
-ZHIPUAI_API_KEY=your-zhipuai-api-key
-STORAGE_ACCESS_KEY=minioadmin
-STORAGE_SECRET_KEY=minioadmin
-STORAGE_ENDPOINT=http://rustfs:9000
-BACKEND_CALLBACK_URL=http://backend:8080/internal/video-job/callback
-# MAX_VIDEO_SIZE_MB=200   # 可选，覆盖 config.yaml 中的视频大小上限
-```
-
-### 3.4 config 模块实现
-
-```python
-# core/config.py
-import os, yaml
-from functools import lru_cache
-from pathlib import Path
-from dotenv import load_dotenv
-
-_ROOT = Path(__file__).parent.parent.parent
-
-# 环境变量 → YAML 路径映射
-_ENV_OVERRIDES = {
-    "ZHIPUAI_API_KEY":       ["zhipuai", "api_key"],
-    "STORAGE_ACCESS_KEY":    ["storage", "access_key"],
-    "STORAGE_SECRET_KEY":    ["storage", "secret_key"],
-    "STORAGE_ENDPOINT":      ["storage", "endpoint"],
-    "BACKEND_CALLBACK_URL":  ["backend", "callback_url"],
-    "LOG_LEVEL":             ["server", "log_level"],
-    "MAX_VIDEO_SIZE_MB":     ["video", "max_file_size_mb"],
-}
-
-def _set_nested(d: dict, keys: list[str], value: str):
-    for k in keys[:-1]:
-        d = d.setdefault(k, {})
-    d[keys[-1]] = value
-
-@lru_cache(maxsize=1)
-def get_config() -> dict:
-    load_dotenv(_ROOT / ".env")                          # 1. 加载 .env
-    with open(_ROOT / "config.yaml", encoding="utf-8") as f:
-        cfg = yaml.safe_load(f)                          # 2. 读取 YAML
-    for env_key, yaml_path in _ENV_OVERRIDES.items():    # 3. 环境变量覆盖
-        val = os.environ.get(env_key)
-        if val:
-            _set_nested(cfg, yaml_path, val)
-    _validate(cfg)
-    return cfg
-
-def _validate(cfg: dict):
-    checks = [
-        (["zhipuai", "api_key"],    "ZHIPUAI_API_KEY"),
-        (["storage", "access_key"], "STORAGE_ACCESS_KEY"),
-        (["storage", "secret_key"], "STORAGE_SECRET_KEY"),
-    ]
-    for path, name in checks:
-        val = cfg
-        for k in path:
-            val = (val or {}).get(k, "")
-        if not val:
-            raise RuntimeError(f"缺少必要配置项：{name}")
-```
-
----
-
-## 四、适配层设计
-
-### 4.1 LLM 适配层
-
-```python
-# clients/llm/base.py
-from abc import ABC, abstractmethod
-
-class LLMClient(ABC):
-    @abstractmethod
-    async def chat(self, messages: list[dict], model: str, **kwargs) -> str:
-        """纯文本对话，返回模型输出文本"""
-
-    @abstractmethod
-    async def chat_vision(self, messages: list[dict], model: str, **kwargs) -> str:
-        """多模态对话（图文混合输入），返回模型输出文本"""
-```
-
-```python
-# clients/llm/zhipuai_client.py
-import asyncio
-from zhipuai import ZhipuAI
-from .base import LLMClient
-
-class ZhipuAIClient(LLMClient):
-    def __init__(self, api_key: str):
-        self._client = ZhipuAI(api_key=api_key)
-
-    async def chat(self, messages: list[dict], model: str, **kwargs) -> str:
-        loop = asyncio.get_event_loop()
-        resp = await loop.run_in_executor(
-            None,
-            lambda: self._client.chat.completions.create(
-                model=model, messages=messages, **kwargs
-            ),
-        )
-        return resp.choices[0].message.content
-
-    async def chat_vision(self, messages: list[dict], model: str, **kwargs) -> str:
-        # GLM-4V 与文本接口相同，通过 image_url type 区分图文消息
-        return await self.chat(messages, model, **kwargs)
-```
-
-**扩展**：替换 GLM 只需新增 `class OpenAIClient(LLMClient)` 并在 `lifespan` 中注入，services 层零修改。
-
-### 4.2 Storage 适配层
-
-```python
-# clients/storage/base.py
-from abc import ABC, abstractmethod
-
-class StorageClient(ABC):
-    @abstractmethod
-    async def download_bytes(self, bucket: str, path: str) -> bytes: ...
-
-    @abstractmethod
-    async def upload_bytes(
-        self, bucket: str, path: str, data: bytes,
-        content_type: str = "application/octet-stream"
-    ) -> None: ...
-
-    @abstractmethod
-    def get_presigned_url(self, bucket: str, path: str, expires: int = 3600) -> str: ...
-```
-
-```python
-# clients/storage/rustfs_client.py
-import asyncio
-import boto3
-from .base import StorageClient
-
-class RustFSClient(StorageClient):
-    def __init__(self, endpoint: str, access_key: str, secret_key: str):
-        self._s3 = boto3.client(
-            "s3",
-            endpoint_url=endpoint,
-            aws_access_key_id=access_key,
-            aws_secret_access_key=secret_key,
-        )
-
-    async def download_bytes(self, bucket: str, path: str) -> bytes:
-        loop = asyncio.get_event_loop()
-        resp = await loop.run_in_executor(
-            None, lambda: self._s3.get_object(Bucket=bucket, Key=path)
-        )
-        return resp["Body"].read()
-
-    async def upload_bytes(self, bucket, path, data, content_type="application/octet-stream"):
-        loop = asyncio.get_event_loop()
-        await loop.run_in_executor(
-            None,
-            lambda: self._s3.put_object(
-                Bucket=bucket, Key=path, Body=data, ContentType=content_type
-            ),
-        )
-
-    def get_presigned_url(self, bucket: str, path: str, expires: int = 3600) -> str:
-        return self._s3.generate_presigned_url(
-            "get_object",
-            Params={"Bucket": bucket, "Key": path},
-            ExpiresIn=expires,
-        )
-```
-
-### 4.3 依赖注入
-
-```python
-# core/dependencies.py
-from app.clients.llm.base import LLMClient
-from app.clients.storage.base import StorageClient
-
-_llm_client: LLMClient | None = None
-_storage_client: StorageClient | None = None
-
-def set_clients(llm: LLMClient, storage: StorageClient):
-    global _llm_client, _storage_client
-    _llm_client, _storage_client = llm, storage
-
-def get_llm_client() -> LLMClient:
-    return _llm_client
-
-def get_storage_client() -> StorageClient:
-    return _storage_client
-```
-
-```python
-# main.py（lifespan 初始化）
-from contextlib import asynccontextmanager
-from fastapi import FastAPI
-from app.core.config import get_config
-from app.core.dependencies import set_clients
-from app.clients.llm.zhipuai_client import ZhipuAIClient
-from app.clients.storage.rustfs_client import RustFSClient
-
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    cfg = get_config()
-    set_clients(
-        llm=ZhipuAIClient(api_key=cfg["zhipuai"]["api_key"]),
-        storage=RustFSClient(
-            endpoint=cfg["storage"]["endpoint"],
-            access_key=cfg["storage"]["access_key"],
-            secret_key=cfg["storage"]["secret_key"],
-        ),
-    )
-    yield
-
-app = FastAPI(title="Label AI Service", lifespan=lifespan)
-```
-
----
-
-## 五、API 接口设计
-
-统一前缀：`/api/v1`。FastAPI 自动生成 Swagger 文档（`/docs`）。
-
-### 5.0 健康检查
-
-**`GET /health`**
-
-```json
-// 响应（200 OK）
-{"status": "ok"}
-```
-
-用于 Docker healthcheck、Nginx 上游探测、运维监控。无需认证，不访问外部依赖。
-
-### 5.1 文本三元组提取
-
-**`POST /api/v1/text/extract`**
-
-```json
-// 请求
-{
-  "file_path": "text/202404/123.txt",
-  "file_name": "设备规范.txt",
-  "model": "glm-4-flash",
-  "prompt_template": "..."        // 可选，不传使用 config 默认
-}
-
-// 响应
-{
-  "items": [
-    {
-      "subject": "变压器",
-      "predicate": "额定电压",
-      "object": "110kV",
-      "source_snippet": "该变压器额定电压为110kV，...",
-      "source_offset": {"start": 120, "end": 280}
-    }
-  ]
-}
-```
-
-### 5.2 图像四元组提取
-
-**`POST /api/v1/image/extract`**
-
-```json
-// 请求
-{
-  "file_path": "image/202404/456.jpg",
-  "task_id": 789,
-  "model": "glm-4v-flash",
-  "prompt_template": "..."
-}
-
-// 响应
-{
-  "items": [
-    {
-      "subject": "电缆接头",
-      "predicate": "位于",
-      "object": "配电箱左侧",
-      "qualifier": "2024年检修现场",
-      "bbox": {"x": 10, "y": 20, "w": 100, "h": 80},
-      "cropped_image_path": "crops/789/0.jpg"
-    }
-  ]
-}
-```
-
-裁剪图由 AI 服务自动完成并上传 RustFS，`cropped_image_path` 直接写入响应。
-
-### 5.3 视频帧提取（异步）
-
-**`POST /api/v1/video/extract-frames`**
-
-```json
-// 请求
-{
-  "file_path": "video/202404/001.mp4",
-  "source_id": 10,
-  "job_id": 42,
-  "mode": "interval",             // interval | keyframe
-  "frame_interval": 30            // interval 模式专用，单位：帧数
-}
-
-// 立即响应（202 Accepted）
-{
-  "message": "任务已接受，后台处理中",
-  "job_id": 42
-}
-```
-
-后台完成后，AI 服务调用 Java 后端回调接口：
-
-```json
-POST {BACKEND_CALLBACK_URL}
-{
-  "job_id": 42,
-  "status": "SUCCESS",
-  "frames": [
-    {"frame_index": 0,  "time_sec": 0.0,  "frame_path": "frames/10/0.jpg"},
-    {"frame_index": 30, "time_sec": 1.0,  "frame_path": "frames/10/1.jpg"}
-  ],
-  "error_message": null
-}
-```
-
-### 5.4 视频转文本（异步）
-
-**`POST /api/v1/video/to-text`**
-
-```json
-// 请求
-{
-  "file_path": "video/202404/001.mp4",
-  "source_id": 10,
-  "job_id": 43,
-  "start_sec": 0,
-  "end_sec": 120,
-  "model": "glm-4v-flash",
-  "prompt_template": "..."
-}
-
-// 立即响应（202 Accepted）
-{
-  "message": "任务已接受，后台处理中",
-  "job_id": 43
-}
-```
-
-后台完成后回调：
-
-```json
-POST {BACKEND_CALLBACK_URL}
-{
-  "job_id": 43,
-  "status": "SUCCESS",
-  "output_path": "video-text/10/1712800000.txt",
-  "error_message": null
-}
-```
-
-### 5.5 文本问答对生成
-
-**`POST /api/v1/qa/gen-text`**
-
-```json
-// 请求
-{
-  "items": [
-    {
-      "subject": "变压器",
-      "predicate": "额定电压",
-      "object": "110kV",
-      "source_snippet": "该变压器额定电压为110kV，..."
-    }
-  ],
-  "model": "glm-4-flash",
-  "prompt_template": "..."
-}
-
-// 响应
-{
-  "pairs": [
-    {
-      "question": "变压器的额定电压是多少？",
-      "answer": "该变压器额定电压为110kV。"
-    }
-  ]
-}
-```
-
-### 5.6 图像问答对生成
-
-**`POST /api/v1/qa/gen-image`**
-
-```json
-// 请求
-{
-  "items": [
-    {
-      "subject": "电缆接头",
-      "predicate": "位于",
-      "object": "配电箱左侧",
-      "qualifier": "2024年检修现场",
-      "cropped_image_path": "crops/789/0.jpg"
-    }
-  ],
-  "model": "glm-4v-flash",
-  "prompt_template": "..."
-}
-
-// 响应
-{
-  "pairs": [
-    {
-      "question": "图中电缆接头位于何处？",
-      "answer": "图中电缆接头位于配电箱左侧。",
-      "image_path": "crops/789/0.jpg"
-    }
-  ]
-}
-```
-
-图像 QA 生成时，AI 服务通过 `storage.download_bytes` 重新下载裁剪图，base64 编码后直接嵌入多模态消息，避免 RustFS 内网 presigned URL 无法被云端 GLM-4V 访问的问题。
-
-### 5.7 提交微调任务
-
-**`POST /api/v1/finetune/start`**
-
-```json
-// 请求
-{
-  "jsonl_url": "https://rustfs.example.com/finetune-export/export/xxx.jsonl",
-  "base_model": "glm-4-flash",
-  "hyperparams": {
-    "learning_rate": 1e-4,
-    "epochs": 3
-  }
-}
-
-// 响应
-{
-  "job_id": "glm-ft-xxxxxx"
-}
-```
-
-### 5.8 查询微调状态
-
-**`GET /api/v1/finetune/status/{jobId}`**
-
-```json
-// 响应
-{
-  "job_id": "glm-ft-xxxxxx",
-  "status": "RUNNING",            // RUNNING | SUCCESS | FAILED
-  "progress": 45,
-  "error_message": null
-}
-```
-
----
-
-## 六、Service 层设计
-
-### 6.1 text_service — 文档解析 + 三元组提取
-
-```
-1. storage.download_bytes("source-data", file_path) → bytes
-2. 按扩展名路由解析器：
-   .txt  → decode("utf-8")
-   .pdf  → pdfplumber.open() 提取全文
-   .docx → python-docx 遍历段落
-3. 拼装 Prompt（系统模板 + 文档正文）
-4. llm.chat(messages, model) → JSON 字符串
-5. 解析 JSON → 校验字段完整性 → 返回 TripleList
-```
-
-解析器注册表（消除 if-else）：
-
-```python
-PARSERS: dict[str, Callable[[bytes], str]] = {
-    ".txt":  parse_txt,
-    ".pdf":  parse_pdf,
-    ".docx": parse_docx,
-}
-def extract_text(data: bytes, filename: str) -> str:
-    ext = Path(filename).suffix.lower()
-    if ext not in PARSERS:
-        raise UnsupportedFileTypeError(ext)
-    return PARSERS[ext](data)
-```
-
-### 6.2 image_service — 四元组提取 + bbox 裁剪
-
-```
-1. storage.download_bytes("source-data", file_path) → bytes
-2. 图片 bytes 转 base64，构造 GLM-4V image_url 消息
-3. llm.chat_vision(messages, model) → JSON 字符串
-4. 解析四元组（含 bbox）
-5. 按 bbox 裁剪：
-   numpy 解码 bytes → cv2 裁剪区域 → cv2.imencode(".jpg") → bytes
-6. storage.upload_bytes("source-data", f"crops/{task_id}/{i}.jpg", ...)
-7. 返回 QuadrupleList（含 cropped_image_path）
-```
-
-### 6.3 video_service — OpenCV 抽帧 + 视频转文本
-
-**抽帧（BackgroundTask）**：
-
-```
-0. storage.get_object_size(bucket, file_path) → 字节数
-   超过 video.max_file_size_mb 限制 → 回调 FAILED（路由层提前校验，返回 400）
-1. storage.download_bytes → bytes → 写入 tempfile
-2. cv2.VideoCapture 打开临时文件
-3. interval 模式：按 frame_interval 步进读帧
-   keyframe 模式：逐帧计算与前帧的像素差均值，差值超过阈值则判定为场景切换关键帧
-                  （OpenCV 无原生 I 帧检测，用帧差分近似实现）
-4. 每帧 cv2.imencode(".jpg") → upload_bytes("source-data", f"frames/{source_id}/{i}.jpg")
-5. 清理临时文件
-6. httpx.post(BACKEND_CALLBACK_URL, json={job_id, status="SUCCESS", frames=[...]})
-异常：回调 status="FAILED", error_message=str(e)
-```
-
-**视频转文本（BackgroundTask）**：
-
-```
-1. download_bytes → tempfile
-2. cv2.VideoCapture 在 start_sec～end_sec 区间均匀抽 frame_sample_count 帧
-3. 每帧转 base64，构造多图 GLM-4V 消息（含时序说明）
-4. llm.chat_vision → 文字描述
-5. 描述文本 upload_bytes("source-data", f"video-text/{source_id}/{timestamp}.txt")
-6. 回调 Java 后端：output_path + status="SUCCESS"
-```
-
-### 6.4 qa_service — 问答对生成
-
-```
-文本 QA：
-  批量拼入三元组 + source_snippet 到 Prompt
-  llm.chat(messages, model) → 解析问答对 JSON → QAPairList
-
-图像 QA：
-  遍历四元组列表
-  storage.download_bytes(bucket, cropped_image_path) → bytes → base64 编码
-  构造多模态消息（data:image/jpeg;base64,... + 问题指令）
-  llm.chat_vision → 解析 → 含 image_path 的 QAPairList
-  （注：不使用 presigned URL，因 RustFS 为内网部署，云端 GLM-4V 无法访问内网地址）
-```
-
-### 6.5 finetune_service — GLM 微调对接
-
-微调 API 属 ZhipuAI 专有能力，无需抽象为通用接口。`finetune_service` 直接依赖 `ZhipuAIClient`（通过依赖注入获取后强转类型），不走 `LLMClient` ABC。
-
-```
-提交：
-  zhipuai_client._client.fine_tuning.jobs.create(
-      training_file=jsonl_url,
-      model=base_model,
-      hyperparameters=hyperparams
-  ) → job_id
-
-查询：
-  zhipuai_client._client.fine_tuning.jobs.retrieve(job_id)
-  → 映射 status 枚举 RUNNING / SUCCESS / FAILED
-```
-
----
-
-## 七、日志设计
-
-- 使用标准库 `logging`，JSON 格式输出，与 uvicorn 集成
-- 每个请求记录：`method / path / status_code / duration_ms`
-- 每次 GLM 调用记录：`model / prompt_tokens / completion_tokens / duration_ms`
-- BackgroundTask 记录：`job_id / stage / status / error`
-- **不记录文件内容原文**（防止敏感数据泄露）
-
----
-
-## 八、异常处理
-
-| 异常类 | HTTP 状态码 | 场景 |
-|--------|------------|------|
-| `UnsupportedFileTypeError` | 400 | 文件格式不支持 |
-| `StorageDownloadError` | 502 | RustFS 不可达或文件不存在 |
-| `LLMResponseParseError` | 502 | GLM 返回非合法 JSON |
-| `LLMCallError` | 503 | GLM API 限流 / 超时 |
-| 未捕获异常 | 500 | 记录完整 traceback |
-
-所有错误响应统一格式：
-
-```json
-{"code": "ERROR_CODE", "message": "具体描述"}
-```
-
----
-
-## 九、RustFS 存储路径规范
-
-| 资源类型 | 存储桶 | 路径格式 |
-|----------|--------|----------|
-| 上传文本文件 | `source-data` | `text/{年月}/{source_id}.txt` |
-| 上传图片 | `source-data` | `image/{年月}/{source_id}.jpg` |
-| 上传视频 | `source-data` | `video/{年月}/{source_id}.mp4` |
-| 视频帧模式抽取的帧图 | `source-data` | `frames/{source_id}/{frame_index}.jpg` |
-| 视频片段转译输出的文本 | `source-data` | `video-text/{source_id}/{timestamp}.txt` |
-| 图像/帧 bbox 裁剪图 | `source-data` | `crops/{task_id}/{item_index}.jpg` |
-| 导出 JSONL 文件 | `finetune-export` | `export/{batchUuid}.jsonl` |
-
----
-
-## 十、部署设计
-
-### 10.1 Dockerfile
-
-```dockerfile
-FROM python:3.12-slim
-
-WORKDIR /app
-
-# OpenCV 系统依赖
-RUN apt-get update && apt-get install -y \
-    libgl1 libglib2.0-0 \
-    && rm -rf /var/lib/apt/lists/*
-
-COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
-
-COPY app/ ./app/
-COPY config.yaml .
-COPY .env .
-
-EXPOSE 8000
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
-```
-
-### 10.2 docker-compose.yml（ai-service 片段）
-
-```yaml
-ai-service:
-  build: ./label_ai_service
-  ports:
-    - "8000:8000"
-  env_file:
-    - ./label_ai_service/.env
-  depends_on:
-    - rustfs
-    - backend
-  networks:
-    - label-net
-  healthcheck:
-    test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
-    interval: 30s
-    timeout: 5s
-    retries: 3
-    start_period: 10s
-```
-
-### 10.3 requirements.txt
-
-```
-fastapi>=0.111
-uvicorn[standard]>=0.29
-pydantic>=2.7
-python-dotenv>=1.0
-pyyaml>=6.0
-zhipuai>=2.1
-boto3>=1.34
-pdfplumber>=0.11
-python-docx>=1.1
-opencv-python-headless>=4.9
-numpy>=1.26
-httpx>=0.27
-```
-
----
-
-## 十一、关键设计决策
-
-### 11.1 为何 LLMClient / StorageClient 使用 ABC
-
-当前只实现 ZhipuAI 和 RustFS，但模型选型和对象存储可能随项目演进变化。ABC 约束接口契约，保证替换实现时 services 层零修改。注入点集中在 `lifespan`，一处修改全局生效。
-
-### 11.2 为何 ZhipuAI 同步 SDK 在线程池中调用
-
-ZhipuAI 官方 SDK 是同步阻塞调用，直接 `await` 不生效。通过 `loop.run_in_executor(None, ...)` 在线程池中运行，不阻塞 FastAPI 的 asyncio 事件循环，保持并发处理能力。
-
-### 11.3 为何视频任务使用 BackgroundTasks 而非 Celery
-
-项目规模适中，视频处理任务由 ADMIN 手动触发，并发量可控。FastAPI `BackgroundTasks` 无需额外中间件（Redis 队列、Celery Worker），部署简单，任务状态通过回调接口传递给 Java 后端管理，符合整体架构风格。
-
-### 11.4 为何图像 QA 生成用 base64 而非 presigned URL
-
-RustFS 部署在 Docker 内网（`http://rustfs:9000`），presigned URL 指向内网地址，云端 GLM-4V API 无法访问，会导致所有图像 QA 请求失败。因此将裁剪图重新下载为 bytes，base64 编码后直接嵌入多模态消息体，与 `image_service` 处理原图的方式保持一致，无需 RustFS 有公网地址。
-
-### 11.5 config.yaml + .env 分层配置的原因
-
-`config.yaml` 存结构化、稳定的非敏感配置，可读性好，适合 git 追踪变更历史；`.env` 存密钥和环境差异项，格式简单，Docker `env_file` 原生支持，本地开发和容器启动行为一致，无需维护两套配置文件。
-
----
-
-*文档版本：v1.0 | 生成日期：2026-04-10*
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index 78c5011..0000000
--- a/pytest.ini
+++ /dev/null
@@ -1,3 +0,0 @@
-[pytest]
-asyncio_mode = auto
-testpaths = tests
diff --git a/specs/001-ai-service-requirements/checklists/requirements.md b/specs/001-ai-service-requirements/checklists/requirements.md
deleted file mode 100644
index bc0c9a6..0000000
--- a/specs/001-ai-service-requirements/checklists/requirements.md
+++ /dev/null
@@ -1,38 +0,0 @@
-# Specification Quality Checklist: AI 服务需求文档
-
-**Purpose**: Validate specification completeness and quality before proceeding to planning
-**Created**: 2026-04-10
-**Feature**: [../spec.md](../spec.md)
-
-## Content Quality
-
-- [x] No implementation details (languages, frameworks, APIs) — 注：Technical Environment 节单独列出，明确标注为已确认技术约束，不影响需求层表述
-- [x] Focused on user value and business needs
-- [x] Written for non-technical stakeholders（业务场景均以 ADMIN/标注员/系统为视角描述）
-- [x] All mandatory sections completed
-
-## Requirement Completeness
-
-- [x] No [NEEDS CLARIFICATION] markers remain
-- [x] Requirements are testable and unambiguous（每条 FR 含明确行为和可验证条件）
-- [x] Success criteria are measurable（SC 含具体时间、像素精度等量化指标）
-- [x] Success criteria are technology-agnostic (no implementation details)
-- [x] All acceptance scenarios are defined（8 个 User Story 均含 Acceptance Scenarios）
-- [x] Edge cases are identified（6 条边界情况，覆盖文件损坏、空结果、并发等）
-- [x] Scope is clearly bounded（明确：不处理上传逻辑，不管理训练资源，不对外暴露）
-- [x] Dependencies and assumptions identified（9 条假设，含内外网访问、ZhipuAI 托管等）
-
-## Feature Readiness
-
-- [x] All functional requirements have clear acceptance criteria（FR-001~FR-018 逐一可追溯到 User Story 验收场景）
-- [x] User scenarios cover primary flows（P1: 文本/图像提取；P2: 视频/QA；P3: 微调/健康检查）
-- [x] Feature meets measurable outcomes defined in Success Criteria
-- [x] No implementation details leak into specification（Technical Environment 节独立，不混入 FR/SC）
-
-## Notes
-
-- Technical Environment 节超出传统需求文档范围，但用户明确要求包含环境约束（Python 3.12.13、FastAPI、conda label 环境），已单独成节并说明其性质。
-- SC-009（测试覆盖）为工程质量指标，非用户感知需求，但对服务可靠性有实质影响，保留。
-- 所有 [NEEDS CLARIFICATION] 均已通过合理默认值或设计文档确认，无待用户回答的开放问题。
-
-**VERDICT**: ✅ 规格就绪，可进行 `/speckit.clarify` 或 `/speckit.plan`
diff --git a/specs/001-ai-service-requirements/contracts/api.md b/specs/001-ai-service-requirements/contracts/api.md
deleted file mode 100644
index 3a93151..0000000
--- a/specs/001-ai-service-requirements/contracts/api.md
+++ /dev/null
@@ -1,333 +0,0 @@
-# API Contract: AI 服务接口定义
-
-**Branch**: `001-ai-service-requirements` | **Date**: 2026-04-10  
-**Base URL**: `http://ai-service:8000`  
-**API Prefix**: `/api/v1`  
-**Swagger**: `/docs`（FastAPI 自动生成）
-
----
-
-## 通用约定
-
-### 请求格式
-- 所有请求体：`Content-Type: application/json`
-- 无认证机制（内网服务，仅 Java 后端调用）
-
-### 响应格式
-- 成功：HTTP 2xx，JSON 响应体
-- 错误：HTTP 4xx/5xx，统一错误格式：
-  ```json
-  {"code": "ERROR_CODE", "message": "具体描述"}
-  ```
-
-### 错误码
-
-| HTTP 状态码 | code | 触发条件 |
-|------------|------|---------|
-| 400 | UNSUPPORTED_FILE_TYPE | 文件格式不支持（如 .xlsx） |
-| 400 | VIDEO_TOO_LARGE | 视频文件超过大小上限 |
-| 502 | STORAGE_ERROR | RustFS 不可达或文件不存在 |
-| 502 | LLM_PARSE_ERROR | GLM 返回非合法 JSON |
-| 503 | LLM_CALL_ERROR | GLM API 限流 / 超时 |
-| 500 | INTERNAL_ERROR | 未捕获异常 |
-
----
-
-## 端点一览
-
-| 端点 | 方法 | 功能 | 响应码 |
-|------|------|------|--------|
-| `/health` | GET | 健康检查 | 200 |
-| `/api/v1/text/extract` | POST | 文档三元组提取 | 200 |
-| `/api/v1/image/extract` | POST | 图像四元组提取 | 200 |
-| `/api/v1/video/extract-frames` | POST | 视频帧提取（异步） | 202 |
-| `/api/v1/video/to-text` | POST | 视频转文本（异步） | 202 |
-| `/api/v1/qa/gen-text` | POST | 文本问答对生成 | 200 |
-| `/api/v1/qa/gen-image` | POST | 图像问答对生成 | 200 |
-| `/api/v1/finetune/start` | POST | 提交微调任务 | 200 |
-| `/api/v1/finetune/status/{jobId}` | GET | 查询微调状态 | 200 |
-
----
-
-## 端点详情
-
-### GET /health
-
-健康检查端点，无需认证，无请求体。
-
-**响应（200 OK）**:
-```json
-{"status": "ok"}
-```
-
----
-
-### POST /api/v1/text/extract
-
-从存储中指定路径的文档提取文本三元组。
-
-**请求体**:
-```json
-{
-  "file_path": "text/202404/123.txt",
-  "file_name": "设备规范.txt",
-  "model": "glm-4-flash",
-  "prompt_template": "..."
-}
-```
-
-| 字段 | 类型 | 必填 | 说明 |
-|------|------|------|------|
-| file_path | string | 是 | RustFS 中的文件路径 |
-| file_name | string | 是 | 带扩展名的文件名（用于判断格式） |
-| model | string | 否 | 模型名，默认使用 config 中的 default_text |
-| prompt_template | string | 否 | 自定义提示词，不传使用内置模板 |
-
-**支持格式**: `.txt`, `.pdf`, `.docx`
-
-**响应（200 OK）**:
-```json
-{
-  "items": [
-    {
-      "subject": "变压器",
-      "predicate": "额定电压",
-      "object": "110kV",
-      "source_snippet": "该变压器额定电压为110kV",
-      "source_offset": {"start": 120, "end": 150}
-    }
-  ]
-}
-```
-
----
-
-### POST /api/v1/image/extract
-
-从存储中指定路径的图片提取知识四元组，并自动裁剪 bbox 区域。
-
-**请求体**:
-```json
-{
-  "file_path": "image/202404/456.jpg",
-  "task_id": 789,
-  "model": "glm-4v-flash",
-  "prompt_template": "..."
-}
-```
-
-| 字段 | 类型 | 必填 | 说明 |
-|------|------|------|------|
-| file_path | string | 是 | RustFS 中的图片路径 |
-| task_id | int | 是 | 标注任务 ID（用于构造裁剪图存储路径） |
-| model | string | 否 | 默认使用 config 中的 default_vision |
-| prompt_template | string | 否 | 自定义提示词 |
-
-**响应（200 OK）**:
-```json
-{
-  "items": [
-    {
-      "subject": "电缆接头",
-      "predicate": "位于",
-      "object": "配电箱左侧",
-      "qualifier": "2024年检修现场",
-      "bbox": {"x": 10, "y": 20, "w": 100, "h": 80},
-      "cropped_image_path": "crops/789/0.jpg"
-    }
-  ]
-}
-```
-
----
-
-### POST /api/v1/video/extract-frames
-
-触发视频帧提取后台任务，立即返回。
-
-**请求体**:
-```json
-{
-  "file_path": "video/202404/001.mp4",
-  "source_id": 10,
-  "job_id": 42,
-  "mode": "interval",
-  "frame_interval": 30
-}
-```
-
-| 字段 | 类型 | 必填 | 说明 |
-|------|------|------|------|
-| file_path | string | 是 | RustFS 中的视频路径 |
-| source_id | int | 是 | 原始资料 ID（用于构造帧存储路径） |
-| job_id | int | 是 | 由 Java 后端分配的任务 ID |
-| mode | string | 否 | `interval`（默认）或 `keyframe` |
-| frame_interval | int | 否 | interval 模式专用，按帧数步进，默认 30 |
-
-**响应（202 Accepted）**:
-```json
-{"message": "任务已接受，后台处理中", "job_id": 42}
-```
-
-**完成后回调 Java 后端**（POST `{BACKEND_CALLBACK_URL}`）:
-```json
-{
-  "job_id": 42,
-  "status": "SUCCESS",
-  "frames": [
-    {"frame_index": 0, "time_sec": 0.0, "frame_path": "frames/10/0.jpg"}
-  ],
-  "error_message": null
-}
-```
-
----
-
-### POST /api/v1/video/to-text
-
-触发视频片段转文字后台任务，立即返回。
-
-**请求体**:
-```json
-{
-  "file_path": "video/202404/001.mp4",
-  "source_id": 10,
-  "job_id": 43,
-  "start_sec": 0,
-  "end_sec": 120,
-  "model": "glm-4v-flash",
-  "prompt_template": "..."
-}
-```
-
-| 字段 | 类型 | 必填 | 说明 |
-|------|------|------|------|
-| file_path | string | 是 | RustFS 中的视频路径 |
-| source_id | int | 是 | 原始资料 ID |
-| job_id | int | 是 | 由 Java 后端分配的任务 ID |
-| start_sec | float | 是 | 分析起始时间（秒） |
-| end_sec | float | 是 | 分析结束时间（秒） |
-| model | string | 否 | 默认使用 config 中的 default_vision |
-| prompt_template | string | 否 | 自定义提示词 |
-
-**响应（202 Accepted）**:
-```json
-{"message": "任务已接受，后台处理中", "job_id": 43}
-```
-
-**完成后回调 Java 后端**（POST `{BACKEND_CALLBACK_URL}`）:
-```json
-{
-  "job_id": 43,
-  "status": "SUCCESS",
-  "output_path": "video-text/10/1712800000.txt",
-  "error_message": null
-}
-```
-
----
-
-### POST /api/v1/qa/gen-text
-
-基于文本三元组批量生成候选问答对。
-
-**请求体**:
-```json
-{
-  "items": [
-    {
-      "subject": "变压器",
-      "predicate": "额定电压",
-      "object": "110kV",
-      "source_snippet": "该变压器额定电压为110kV"
-    }
-  ],
-  "model": "glm-4-flash",
-  "prompt_template": "..."
-}
-```
-
-**响应（200 OK）**:
-```json
-{
-  "pairs": [
-    {"question": "变压器的额定电压是多少？", "answer": "该变压器额定电压为110kV。"}
-  ]
-}
-```
-
----
-
-### POST /api/v1/qa/gen-image
-
-基于图像四元组生成候选图文问答对。图片由 AI 服务从存储自动获取，调用方只需提供路径。
-
-**请求体**:
-```json
-{
-  "items": [
-    {
-      "subject": "电缆接头",
-      "predicate": "位于",
-      "object": "配电箱左侧",
-      "qualifier": "2024年检修现场",
-      "cropped_image_path": "crops/789/0.jpg"
-    }
-  ],
-  "model": "glm-4v-flash",
-  "prompt_template": "..."
-}
-```
-
-**响应（200 OK）**:
-```json
-{
-  "pairs": [
-    {
-      "question": "图中电缆接头位于何处？",
-      "answer": "图中电缆接头位于配电箱左侧。",
-      "image_path": "crops/789/0.jpg"
-    }
-  ]
-}
-```
-
----
-
-### POST /api/v1/finetune/start
-
-向 ZhipuAI 提交微调任务。
-
-**请求体**:
-```json
-{
-  "jsonl_url": "https://rustfs.example.com/finetune-export/export/xxx.jsonl",
-  "base_model": "glm-4-flash",
-  "hyperparams": {"learning_rate": 1e-4, "epochs": 3}
-}
-```
-
-**响应（200 OK）**:
-```json
-{"job_id": "glm-ft-xxxxxx"}
-```
-
----
-
-### GET /api/v1/finetune/status/{jobId}
-
-查询微调任务状态。
-
-**路径参数**: `jobId` — 微调任务 ID（由 `/finetune/start` 返回）
-
-**响应（200 OK）**:
-```json
-{
-  "job_id": "glm-ft-xxxxxx",
-  "status": "RUNNING",
-  "progress": 45,
-  "error_message": null
-}
-```
-
-`status` 取值: `RUNNING` | `SUCCESS` | `FAILED`
diff --git a/specs/001-ai-service-requirements/data-model.md b/specs/001-ai-service-requirements/data-model.md
deleted file mode 100644
index 5ed2438..0000000
--- a/specs/001-ai-service-requirements/data-model.md
+++ /dev/null
@@ -1,167 +0,0 @@
-# Data Model: AI 服务
-
-**Branch**: `001-ai-service-requirements` | **Date**: 2026-04-10
-
----
-
-## 实体定义
-
-### TripleItem（文本三元组）
-
-从文档中提取的一条知识关系。
-
-| 字段 | 类型 | 约束 | 说明 |
-|------|------|------|------|
-| subject | string | 非空 | 主语实体 |
-| predicate | string | 非空 | 谓语/关系 |
-| object | string | 非空 | 宾语实体 |
-| source_snippet | string | 非空 | 原文中的证据片段（直接引用） |
-| source_offset.start | int | ≥0 | 证据片段在全文中的起始字符偏移 |
-| source_offset.end | int | >start | 证据片段在全文中的结束字符偏移 |
-
-**状态转换**: 无（只读输出）
-
----
-
-### QuadrupleItem（图像四元组）
-
-从图像中提取的一条知识关系，带图像位置信息。
-
-| 字段 | 类型 | 约束 | 说明 |
-|------|------|------|------|
-| subject | string | 非空 | 主体实体 |
-| predicate | string | 非空 | 关系/属性 |
-| object | string | 非空 | 客体实体 |
-| qualifier | string | 可为空 | 修饰信息（时间、条件、场景） |
-| bbox.x | int | ≥0 | 边界框左上角 x 像素坐标 |
-| bbox.y | int | ≥0 | 边界框左上角 y 像素坐标 |
-| bbox.w | int | >0 | 边界框宽度（像素） |
-| bbox.h | int | >0 | 边界框高度（像素） |
-| cropped_image_path | string | 非空 | 裁剪图在 RustFS 中的存储路径 |
-
-**派生规则**: `cropped_image_path = "crops/{task_id}/{item_index}.jpg"`，由 image_service 自动生成并上传
-
----
-
-### QAPair（文本问答对）
-
-由文本三元组生成的训练候选问答对。
-
-| 字段 | 类型 | 约束 | 说明 |
-|------|------|------|------|
-| question | string | 非空 | 问题文本 |
-| answer | string | 非空 | 答案文本 |
-
----
-
-### ImageQAPair（图像问答对）
-
-由图像四元组生成的训练候选图文问答对。
-
-| 字段 | 类型 | 约束 | 说明 |
-|------|------|------|------|
-| question | string | 非空 | 问题文本 |
-| answer | string | 非空 | 答案文本 |
-| image_path | string | 非空 | 对应裁剪图的存储路径（来源于 QuadrupleItem.cropped_image_path） |
-
----
-
-### FrameInfo（视频帧信息）
-
-视频帧提取任务中单帧的元数据。
-
-| 字段 | 类型 | 约束 | 说明 |
-|------|------|------|------|
-| frame_index | int | ≥0 | 帧在视频中的原始帧序号 |
-| time_sec | float | ≥0.0 | 帧对应的时间点（秒） |
-| frame_path | string | 非空 | 帧图在 RustFS 中的存储路径 |
-
-**派生规则**: `frame_path = "frames/{source_id}/{upload_index}.jpg"`
-
----
-
-### VideoJobCallback（视频任务回调）
-
-异步视频任务完成后发送给 Java 后端的通知载荷。
-
-| 字段 | 类型 | 约束 | 说明 |
-|------|------|------|------|
-| job_id | int | 非空 | 由 Java 后端分配的任务 ID |
-| status | string | SUCCESS \| FAILED | 任务最终状态 |
-| frames | FrameInfo[] \| null | 仅帧提取时非 null | 提取的帧列表（可为空列表） |
-| output_path | string \| null | 仅视频转文本时非 null | 输出文字描述的存储路径 |
-| error_message | string \| null | 仅 FAILED 时非 null | 错误描述 |
-
----
-
-### FinetuneJob（微调任务）
-
-微调任务的状态快照。
-
-| 字段 | 类型 | 约束 | 说明 |
-|------|------|------|------|
-| job_id | string | 非空 | 由 ZhipuAI 平台分配的任务 ID（如 "glm-ft-xxxxxx"） |
-| status | string | RUNNING \| SUCCESS \| FAILED | 当前状态 |
-| progress | int \| null | 0-100 \| null | 完成百分比（ZhipuAI 支持时） |
-| error_message | string \| null | 仅 FAILED 时非 null | 错误描述 |
-
-**状态映射**:
-```
-ZhipuAI "running"   → RUNNING
-ZhipuAI "succeeded" → SUCCESS
-ZhipuAI "failed"    → FAILED
-其他               → RUNNING（保守处理）
-```
-
----
-
-## RustFS 存储路径规范
-
-| 资源类型 | 存储桶 | 路径格式 |
-|----------|--------|----------|
-| 上传文本文件 | `source-data` | `text/{年月}/{source_id}.txt` |
-| 上传图片 | `source-data` | `image/{年月}/{source_id}.jpg` |
-| 上传视频 | `source-data` | `video/{年月}/{source_id}.mp4` |
-| 视频帧图 | `source-data` | `frames/{source_id}/{upload_index}.jpg` |
-| 视频转译文本 | `source-data` | `video-text/{source_id}/{timestamp}.txt` |
-| 图像/帧 bbox 裁剪图 | `source-data` | `crops/{task_id}/{item_index}.jpg` |
-| 导出 JSONL 文件 | `finetune-export` | `export/{batchUuid}.jsonl` |
-
----
-
-## 配置模型
-
-### config.yaml（非敏感，提交 git）
-
-```yaml
-server:
-  port: 8000
-  log_level: INFO
-
-storage:
-  buckets:
-    source_data: "source-data"
-    finetune_export: "finetune-export"
-
-backend: {}   # callback_url 由 .env 注入
-
-video:
-  frame_sample_count: 8    # 视频转文本时均匀采样帧数
-  max_file_size_mb: 200    # 视频大小上限（可通过 MAX_VIDEO_SIZE_MB 覆盖）
-
-models:
-  default_text: "glm-4-flash"
-  default_vision: "glm-4v-flash"
-```
-
-### 环境变量覆盖映射
-
-| 环境变量 | YAML 路径 | 说明 |
-|----------|-----------|------|
-| ZHIPUAI_API_KEY | zhipuai.api_key | 必填 |
-| STORAGE_ACCESS_KEY | storage.access_key | 必填 |
-| STORAGE_SECRET_KEY | storage.secret_key | 必填 |
-| STORAGE_ENDPOINT | storage.endpoint | RustFS 地址 |
-| BACKEND_CALLBACK_URL | backend.callback_url | Java 后端回调接口 |
-| LOG_LEVEL | server.log_level | 日志级别 |
-| MAX_VIDEO_SIZE_MB | video.max_file_size_mb | 视频大小上限 |
diff --git a/specs/001-ai-service-requirements/plan.md b/specs/001-ai-service-requirements/plan.md
deleted file mode 100644
index 900cffb..0000000
--- a/specs/001-ai-service-requirements/plan.md
+++ /dev/null
@@ -1,120 +0,0 @@
-# Implementation Plan: AI 服务需求文档
-
-**Branch**: `001-ai-service-requirements` | **Date**: 2026-04-10 | **Spec**: [spec.md](spec.md)  
-**Input**: Feature specification from `/specs/001-ai-service-requirements/spec.md`
-
-> **参考实现计划（主计划）**: `docs/superpowers/plans/2026-04-10-ai-service-impl.md`  
-> 本文件为 speckit 规划框架文档，详细 TDD 任务（17 个步骤含完整代码）见上述主计划。
-
-## Summary
-
-实现一个独立部署的 Python FastAPI AI 服务，为知识图谱标注平台提供文本三元组提取、图像四元组提取、视频帧处理、问答对生成和 GLM 微调管理能力。服务通过 RustFS S3 API 读写文件，通过 ZhipuAI GLM API 调用大模型，通过回调接口通知 Java 后端异步任务结果。采用 ABC 适配层（LLMClient / StorageClient）保证可扩展性，FastAPI BackgroundTasks 处理视频长任务，全量 TDD 开发。
-
-## Technical Context
-
-**Language/Version**: Python 3.12.13（conda `label` 环境）  
-**Primary Dependencies**: FastAPI ≥0.111, uvicorn[standard] ≥0.29, pydantic ≥2.7, zhipuai ≥2.1, boto3 ≥1.34, pdfplumber ≥0.11, python-docx ≥1.1, opencv-python-headless ≥4.9, numpy ≥1.26, httpx ≥0.27, python-dotenv ≥1.0, pyyaml ≥6.0  
-**Storage**: RustFS（S3 兼容协议，boto3 访问）  
-**Testing**: pytest ≥8.0 + pytest-asyncio ≥0.23，所有 service 和 router 均有单元测试  
-**Target Platform**: Linux 容器（Docker + Docker Compose）  
-**Project Type**: web-service  
-**Performance Goals**: 文本提取 <60s；图像提取 <30s；视频任务接受 <1s；健康检查 <1s；QA 生成（≤10条）<90s  
-**Constraints**: 视频文件大小上限默认 200MB（可通过 MAX_VIDEO_SIZE_MB 环境变量配置）；不访问数据库；GLM 为云端 API，图片须以 base64 传输；ZhipuAI SDK 同步阻塞，须在线程池中执行  
-**Scale/Scope**: 低并发（ADMIN 手动触发），同时不超过 5 个视频任务
-
-## Constitution Check
-
-*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.*
-
-> 项目 constitution 为未填充的模板，无项目特定约束规则。以下采用通用工程原则进行评估。
-
-| 原则 | 状态 | 说明 |
-|------|------|------|
-| 测试优先（TDD） | ✅ 通过 | 实现计划采用红绿重构循环，所有模块先写测试 |
-| 简单性（YAGNI） | ✅ 通过 | BackgroundTasks 而非 Celery；无数据库；适配层仅当前实现 |
-| 可观测性 | ✅ 通过 | JSON 结构化日志，含请求/GLM/视频任务维度 |
-| 错误分类 | ✅ 通过 | 4 种异常类（400/502/503/500），结构化响应 |
-| 可扩展性 | ✅ 通过 | LLMClient / StorageClient ABC 适配层 |
-| 配置分层 | ✅ 通过 | config.yaml + .env + 环境变量覆盖 |
-
-**GATE RESULT**: ✅ 无违规，可进入 Phase 0。
-
-## Project Structure
-
-### Documentation (this feature)
-
-```text
-specs/001-ai-service-requirements/
-├── plan.md              # 本文件 (/speckit.plan 输出)
-├── research.md          # Phase 0 输出
-├── data-model.md        # Phase 1 输出
-├── quickstart.md        # Phase 1 输出
-├── contracts/           # Phase 1 输出
-│   └── api.md
-└── tasks.md             # Phase 2 输出 (/speckit.tasks - 未由本命令创建)
-```
-
-### Source Code (repository root)
-
-```text
-label_ai_service/
-├── app/
-│   ├── main.py                    # FastAPI 应用入口，lifespan，/health 端点
-│   ├── core/
-│   │   ├── config.py              # YAML + .env 分层配置，lru_cache 单例
-│   │   ├── logging.py             # JSON 结构化日志，请求日志中间件
-│   │   ├── exceptions.py          # 自定义异常 + 全局处理器
-│   │   ├── json_utils.py          # GLM 响应 JSON 解析（兼容 Markdown 代码块）
-│   │   └── dependencies.py        # FastAPI Depends 工厂函数
-│   ├── clients/
-│   │   ├── llm/
-│   │   │   ├── base.py            # LLMClient ABC（chat / chat_vision）
-│   │   │   └── zhipuai_client.py  # ZhipuAI 实现（线程池包装同步 SDK）
-│   │   └── storage/
-│   │       ├── base.py            # StorageClient ABC（download/upload/presigned/size）
-│   │       └── rustfs_client.py   # RustFS S3 兼容实现
-│   ├── services/
-│   │   ├── text_service.py        # TXT/PDF/DOCX 解析 + 三元组提取
-│   │   ├── image_service.py       # 四元组提取 + bbox 裁剪
-│   │   ├── video_service.py       # 帧提取 + 视频转文本（BackgroundTask）
-│   │   ├── qa_service.py          # 文本/图像问答对生成（图像用 base64）
-│   │   └── finetune_service.py    # 微调任务提交与查询
-│   ├── routers/
-│   │   ├── text.py                # POST /api/v1/text/extract
-│   │   ├── image.py               # POST /api/v1/image/extract
-│   │   ├── video.py               # POST /api/v1/video/extract-frames, /to-text
-│   │   ├── qa.py                  # POST /api/v1/qa/gen-text, /gen-image
-│   │   └── finetune.py            # POST /api/v1/finetune/start, GET /status/{id}
-│   └── models/
-│       ├── text_models.py
-│       ├── image_models.py
-│       ├── video_models.py
-│       ├── qa_models.py
-│       └── finetune_models.py
-├── tests/
-│   ├── conftest.py                # mock_llm, mock_storage fixtures
-│   ├── test_config.py
-│   ├── test_llm_client.py
-│   ├── test_storage_client.py
-│   ├── test_text_service.py
-│   ├── test_text_router.py
-│   ├── test_image_service.py
-│   ├── test_image_router.py
-│   ├── test_video_service.py
-│   ├── test_video_router.py
-│   ├── test_qa_service.py
-│   ├── test_qa_router.py
-│   ├── test_finetune_service.py
-│   └── test_finetune_router.py
-├── config.yaml
-├── .env
-├── requirements.txt
-├── Dockerfile
-└── docker-compose.yml
-```
-
-**Structure Decision**: 单项目结构（Option 1），分层为 routers → services → clients，测试与源码并列。
-
-## Complexity Tracking
-
-> Constitution 无违规，此节无需填写。
diff --git a/specs/001-ai-service-requirements/quickstart.md b/specs/001-ai-service-requirements/quickstart.md
deleted file mode 100644
index 53b6133..0000000
--- a/specs/001-ai-service-requirements/quickstart.md
+++ /dev/null
@@ -1,109 +0,0 @@
-# Quickstart: AI 服务开发指南
-
-**Branch**: `001-ai-service-requirements` | **Date**: 2026-04-10
-
----
-
-## 环境准备
-
-```bash
-# 激活 conda 环境
-conda activate label
-
-# 安装依赖（在 label_ai_service 目录下）
-pip install -r requirements.txt
-```
-
----
-
-## 本地开发启动
-
-```bash
-# 1. 复制并配置 .env（已提交模板）
-# 编辑 .env 填写真实的 ZHIPUAI_API_KEY 和 STORAGE_ENDPOINT
-
-# 2. 启动开发服务器
-conda run -n label uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
-
-# 3. 访问 Swagger 文档
-# http://localhost:8000/docs
-```
-
----
-
-## 运行测试
-
-```bash
-# 运行全部测试
-conda run -n label pytest tests/ -v
-
-# 运行指定模块测试
-conda run -n label pytest tests/test_text_service.py -v
-
-# 运行带覆盖率报告
-conda run -n label pytest tests/ --cov=app --cov-report=term-missing
-```
-
----
-
-## Docker 部署
-
-```bash
-# 构建镜像
-docker build -t label-ai-service:dev .
-
-# 使用 docker-compose 启动（含 RustFS）
-docker-compose up -d
-
-# 查看日志
-docker-compose logs -f ai-service
-
-# 健康检查
-curl http://localhost:8000/health
-```
-
----
-
-## 关键配置说明
-
-### 视频大小上限调整
-
-无需重建镜像，在 `.env` 中添加：
-```ini
-MAX_VIDEO_SIZE_MB=500
-```
-
-### 切换大模型
-
-修改 `config.yaml`：
-```yaml
-models:
-  default_text: "glm-4-flash"    # 文本模型
-  default_vision: "glm-4v-flash" # 视觉模型
-```
-
----
-
-## 开发流程（TDD）
-
-详细的 17 个任务步骤（含完整代码）见主实现计划：  
-`docs/superpowers/plans/2026-04-10-ai-service-impl.md`
-
-每个任务的开发步骤：
-1. 编写失败测试（`pytest ... -v` 验证失败）
-2. 实现最小代码使测试通过（`pytest ... -v` 验证通过）
-3. Commit
-
----
-
-## 目录结构速查
-
-```
-app/
-├── main.py          # 入口，/health 端点，路由注册
-├── core/            # 配置、日志、异常、工具
-├── clients/         # LLM 和 Storage 适配层（ABC + 实现）
-├── services/        # 业务逻辑（text/image/video/qa/finetune）
-├── routers/         # HTTP 路由处理
-└── models/          # Pydantic 请求/响应 Schema
-```
diff --git a/specs/001-ai-service-requirements/research.md b/specs/001-ai-service-requirements/research.md
deleted file mode 100644
index b703aa8..0000000
--- a/specs/001-ai-service-requirements/research.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# Research: AI 服务实现方案
-
-**Branch**: `001-ai-service-requirements` | **Date**: 2026-04-10  
-**Status**: 完成（所有决策已在设计阶段确定，无待研究项）
-
----
-
-## 决策记录
-
-### D-001: 异步框架选型
-
-**Decision**: FastAPI + uvicorn  
-**Rationale**: 原生 async/await 支持、Pydantic 自动校验、自动生成 Swagger 文档、Python 生态系中性能和开发效率的最优权衡。  
-**Alternatives considered**: Django（过重）、Flask（无原生异步）、aiohttp（无自动文档和类型校验）
-
----
-
-### D-002: ZhipuAI SDK 调用方式
-
-**Decision**: 同步 SDK 通过 `asyncio.get_event_loop().run_in_executor(None, ...)` 在线程池中调用  
-**Rationale**: ZhipuAI 官方 SDK 为同步阻塞设计，直接在 async 函数中调用会阻塞事件循环。`run_in_executor` 将阻塞调用卸载到线程池，保持 FastAPI 事件循环响应能力。  
-**Alternatives considered**: 使用 `asyncio.to_thread()`（Python 3.9+ 语法糖，等效实现，选择 run_in_executor 保持向后兼容性）；使用 httpx 直接调用 ZhipuAI HTTP API（绕过 SDK 但增加维护负担）
-
----
-
-### D-003: 图像 QA 生成的图片传输方式
-
-**Decision**: base64 编码嵌入消息体（`data:image/jpeg;base64,...`）  
-**Rationale**: RustFS 部署在 Docker 内网（endpoint: `http://rustfs:9000`），presigned URL 指向内网地址，云端 GLM-4V 无法访问。base64 编码将图片内容直接内联到 API 请求，不依赖网络可达性。  
-**Alternatives considered**: presigned URL（不可行，内网地址云端不可达）；公网 RustFS 暴露（增加安全风险）
-
----
-
-### D-004: 视频长任务处理机制
-
-**Decision**: FastAPI BackgroundTasks + HTTP 回调通知 Java 后端  
-**Rationale**: 视频处理耗时不可控（几秒到几分钟），同步等待会超时。BackgroundTasks 无需额外中间件（Redis/Celery），部署简单，任务状态通过回调接口由 Java 后端管理，符合整体架构风格。并发量有限（≤5个同时任务），BackgroundTasks 完全够用。  
-**Alternatives considered**: Celery（需 Redis broker，引入额外运维负担）；asyncio.create_task（进程重启会丢失任务）
-
----
-
-### D-005: 分层配置方案
-
-**Decision**: config.yaml（稳定非敏感配置）+ .env（密钥和环境差异项），环境变量优先级高于 YAML  
-**Rationale**: YAML 提供结构化可读性，适合 git 追踪非敏感配置变更；.env 格式为 Docker `env_file` 原生支持；环境变量覆盖机制使容器部署时无需重建镜像即可切换配置。  
-**Alternatives considered**: 纯 .env 文件（缺乏结构化，复杂配置难维护）；数据库存储配置（过重）
-
----
-
-### D-006: 视频大文件 OOM 防护
-
-**Decision**: 在视频路由层（接受请求后、启动后台任务前）通过 `storage.get_object_size()` 查询文件大小，超限返回 HTTP 400  
-**Rationale**: 在下载前拒绝，避免实际 OOM；大小限制通过 config.yaml + MAX_VIDEO_SIZE_MB 环境变量运行时可配置，无需重建镜像；实现简单，无需引入流式下载的新抽象。  
-**Alternatives considered**: 流式下载（Completeness: 9/10，但 YAGNI，当前规模不需要）；不限制（Completeness: 4/10，有 OOM 风险）
-
----
-
-### D-007: 视频关键帧检测算法
-
-**Decision**: 帧差分（frame difference）近似检测：计算当前帧与前帧灰度图的像素差均值，差值超过阈值（默认 30.0）判定为场景切换  
-**Rationale**: OpenCV 无原生 I 帧检测 API（`CAP_PROP_POS_FRAMES` 是帧定位，非 I 帧标识）。帧差分简单有效，对场景切换检测准确，且无需视频解码器底层支持。  
-**Alternatives considered**: 基于编码信息的 I 帧检测（需 FFmpeg 支持，引入额外依赖）；固定间隔（不够智能，不适合关键帧模式）
-
----
-
-### D-008: 测试策略
-
-**Decision**: pytest + pytest-asyncio，Service 层和 Router 层分别测试，使用 AsyncMock 模拟外部依赖  
-**Rationale**: Service 层测试业务逻辑，不依赖 HTTP；Router 层使用 TestClient 测试完整请求流程。视频 service 测试使用真实小视频文件（OpenCV VideoWriter 生成），验证帧提取逻辑正确性。  
-**Alternatives considered**: 仅集成测试（需要真实 RustFS 和 ZhipuAI，CI 成本高）；全部单元测试（无法覆盖路由和异常处理器集成）
-
----
-
-## 无待解决项
-
-所有 NEEDS CLARIFICATION 均已在设计阶段通过用户确认或合理默认值解决。本 research.md 仅作决策存档。
diff --git a/specs/001-ai-service-requirements/spec.md b/specs/001-ai-service-requirements/spec.md
deleted file mode 100644
index fbc8915..0000000
--- a/specs/001-ai-service-requirements/spec.md
+++ /dev/null
@@ -1,258 +0,0 @@
-# Feature Specification: AI 服务需求文档
-
-**Feature Branch**: `001-ai-service-requirements`  
-**Created**: 2026-04-10  
-**Status**: Draft  
-**Input**: User description: "@docs/superpowers/specs/2026-04-10-ai-service-design.md 根据设计文档完成需求文档"
-
----
-
-## 概述
-
-知识图谱智能标注平台需要一个独立的 AI 计算服务，接收 Java 后端的调用，完成文档结构化提取、图像分析、视频预处理、训练数据生成和模型微调管理等智能化任务，将大模型能力嵌入标注工作流，大幅降低人工标注成本。
-
----
-
-## User Scenarios & Testing *(mandatory)*
-
-### User Story 1 - ADMIN 从文档中提取知识三元组 (Priority: P1)
-
-ADMIN 在标注平台上选择一份已上传的文本文件（TXT、PDF 或 Word 文档），触发 AI 辅助提取。AI 服务从存储系统中读取该文档，分析内容，识别其中的主谓宾知识关系（三元组），并为每个三元组标注原文出处片段和字符偏移位置，返回结构化结果供标注员审核确认。
-
-**Why this priority**: 文本三元组提取是平台文本标注流水线的核心入口，所有文本类标注任务都依赖此能力。无此功能，平台的主要价值无法实现。
-
-**Independent Test**: 向 AI 服务发送一个包含已知知识点的测试文档路径，验证返回结果包含正确的主语/谓语/宾语和对应的原文位置信息，即可独立验证此功能完整运行。
-
-**Acceptance Scenarios**:
-
-1. **Given** 存储系统中存有一份 TXT 格式文档，**When** AI 服务收到该文档路径和提取请求，**Then** 返回包含至少一条三元组的结果，每条含 subject、predicate、object、原文片段和字符偏移。
-2. **Given** 存储系统中存有一份 PDF 格式文档，**When** AI 服务收到提取请求，**Then** 正确解析 PDF 内容并返回三元组结果。
-3. **Given** 存储系统中存有一份 Word（.docx）格式文档，**When** AI 服务收到提取请求，**Then** 正确解析文档内容并返回三元组结果。
-4. **Given** 请求包含不支持的文件格式（如 .xlsx），**When** AI 服务收到请求，**Then** 返回明确的格式不支持错误，不崩溃。
-5. **Given** 存储系统不可达，**When** AI 服务尝试下载文件，**Then** 返回存储故障错误，而非通用服务器错误。
-
----
-
-### User Story 2 - ADMIN 从图片中提取知识四元组并自动裁剪 (Priority: P1)
-
-ADMIN 在标注平台选择一张已上传的图片，触发 AI 辅助提取。AI 服务读取该图片，通过多模态大模型分析图像内容，识别图中的知识实体关系（四元组：主体、关系、客体、修饰信息），同时给出每个知识点在图像中的位置框（bbox 坐标），并自动将对应区域裁剪保存，供标注员对照审核。
-
-**Why this priority**: 图像四元组提取是图片标注流水线的核心入口，与文本三元组提取并列为平台两大主流水线的起点。
-
-**Independent Test**: 向 AI 服务发送一张包含可识别对象关系的测试图片路径，验证返回结果包含四元组信息和裁剪图的存储路径，即可独立验证此功能完整运行。
-
-**Acceptance Scenarios**:
-
-1. **Given** 存储系统中存有一张图片，**When** AI 服务收到该图片路径和提取请求，**Then** 返回包含至少一条四元组的结果，每条含 subject、predicate、object、qualifier 和 bbox 坐标。
-2. **Given** AI 服务成功提取四元组，**When** 处理完成，**Then** 每个四元组对应的图像区域已自动裁剪并上传至存储，响应中包含裁剪图的存储路径。
-3. **Given** bbox 坐标超出图像边界，**When** 裁剪时，**Then** 自动截断至图像有效区域，不报错。
-4. **Given** 大模型返回格式异常（非 JSON），**When** 解析响应，**Then** 返回解析失败错误，不返回部分结果。
-
----
-
-### User Story 3 - ADMIN 对视频进行帧提取（帧模式预处理） (Priority: P2)
-
-ADMIN 在标注平台选择一段已上传的视频，选择"帧提取"模式（按固定间隔或关键帧），触发 AI 服务处理。AI 服务在后台异步完成帧提取，将每一帧图片上传至存储，处理完成后主动通知 Java 后端，后端随即为每一帧创建图片标注任务，进入图片标注流程。
-
-**Why this priority**: 视频帧提取是视频进入图片标注流水线的预处理步骤，依赖图片提取流水线（P1）已就绪。
-
-**Independent Test**: 向 AI 服务发送一个测试视频的存储路径和 job_id，服务立即返回 202 Accepted，稍后验证回调接口收到含帧路径列表的成功通知，即可独立验证。
-
-**Acceptance Scenarios**:
-
-1. **Given** 存储系统中存有一段视频（大小在限制内），**When** AI 服务收到帧提取请求（interval 模式），**Then** 立即返回 202 Accepted 和 job_id，不等待处理完成。
-2. **Given** 帧提取任务在后台成功完成，**When** 处理完成，**Then** AI 服务向 Java 后端发送回调，包含 job_id、status=SUCCESS 和帧图存储路径列表。
-3. **Given** keyframe 模式，**When** AI 服务处理视频，**Then** 仅提取画面发生显著变化的帧，而非固定间隔。
-4. **Given** 视频文件大小超过系统上限（默认 200MB，可配置），**When** 收到请求，**Then** 立即返回 400 错误，不启动后台任务。
-5. **Given** 帧提取过程中发生错误，**When** 任务失败，**Then** AI 服务仍向 Java 后端发送回调，status=FAILED，包含错误描述。
-
----
-
-### User Story 4 - ADMIN 将视频片段转换为文字描述（片段模式预处理） (Priority: P2)
-
-ADMIN 在标注平台选择一段已上传视频的时间段，触发"视频转文本"预处理。AI 服务在后台均匀采样该时间段的视频帧，用多模态大模型理解视频内容，生成结构化文字描述，将描述文本上传存储，完成后通知 Java 后端，后端将其创建为新的文本类原始资料，进入文本标注流程。
-
-**Why this priority**: 视频转文本预处理使视频内容能够通过文本标注流水线处理，扩展了平台的数据来源范围。
-
-**Independent Test**: 向 AI 服务发送测试视频路径、时间段和 job_id，验证回调收到 output_path 指向一个可读的文字描述文件，即可独立验证。
-
-**Acceptance Scenarios**:
-
-1. **Given** 存储系统中存有一段视频（大小在限制内），**When** AI 服务收到视频转文本请求，**Then** 立即返回 202 Accepted 和 job_id。
-2. **Given** 视频转文本任务在后台成功完成，**When** 处理完成，**Then** AI 服务向 Java 后端发送回调，包含 job_id、status=SUCCESS 和文字描述的存储路径。
-3. **Given** 请求指定了起止时间段（start_sec、end_sec），**When** 处理视频，**Then** 仅分析该时间段内的内容，不处理其他片段。
-4. **Given** 视频文件大小超过上限，**When** 收到请求，**Then** 立即返回 400 错误。
-5. **Given** 大模型调用失败，**When** 任务异常，**Then** 回调 status=FAILED，包含错误描述。
-
----
-
-### User Story 5 - 系统自动为已审批三元组生成候选问答对 (Priority: P2)
-
-标注员提交的文本三元组经审批员审批通过后，系统自动调用 AI 服务，将三元组列表和对应原文片段批量输入大模型，生成符合微调格式的候选问答对，作为后续训练数据的来源。
-
-**Why this priority**: 问答对生成是平台训练数据产出流程的关键环节，依赖三元组提取（P1）已完成并通过审批。
-
-**Independent Test**: 向 AI 服务发送一组测试三元组（含原文片段），验证返回包含可读、合理的问答对列表，即可独立验证。
-
-**Acceptance Scenarios**:
-
-1. **Given** 一组已审批的文本三元组，**When** AI 服务收到文本 QA 生成请求，**Then** 返回包含 question 和 answer 的问答对列表，每个三元组至少对应一个问答对。
-2. **Given** 大模型返回合法 JSON，**When** 解析响应，**Then** 正确提取每对问答并返回。
-3. **Given** 大模型返回格式异常，**When** 解析响应，**Then** 返回解析失败错误。
-4. **Given** 大模型服务不可用，**When** 调用失败，**Then** 返回明确的服务不可用错误。
-
----
-
-### User Story 6 - 系统自动为已审批四元组生成候选图文问答对 (Priority: P2)
-
-图像四元组经审批通过后，系统自动调用 AI 服务，将四元组信息与对应裁剪图一起输入多模态大模型，生成图文问答对，用于后续图像类训练数据集。
-
-**Why this priority**: 图像 QA 生成是图片标注流水线产出训练数据的最终步骤，优先级与文本 QA 生成（P2）相同。
-
-**Independent Test**: 向 AI 服务发送一组四元组（含裁剪图存储路径），验证返回的问答对引用了图片路径，即可独立验证。
-
-**Acceptance Scenarios**:
-
-1. **Given** 一组已审批的图像四元组（含裁剪图路径），**When** AI 服务收到图像 QA 生成请求，**Then** 返回包含 question、answer 和 image_path 的问答对列表。
-2. **Given** 裁剪图存储路径有效，**When** AI 服务处理，**Then** 自动获取图片内容并结合四元组信息生成问答，无需调用方额外传输图片数据。
-3. **Given** 裁剪图无法从存储获取，**When** 处理请求，**Then** 返回存储错误，不返回空结果。
-
----
-
-### User Story 7 - ADMIN 提交微调任务并查询进度 (Priority: P3)
-
-ADMIN 在标注平台完成训练数据集导出后，选择提交大模型微调任务。平台调用 AI 服务提交微调请求（包含训练数据文件地址、基础模型和超参数），获取微调任务 ID。此后，ADMIN 可随时查询该任务的运行状态（进行中/成功/失败）和完成进度。
-
-**Why this priority**: 微调任务管理是平台最终目标（产出定制化模型）的关键步骤，但需要前置数据准备流程全部完成，故列为 P3。
-
-**Independent Test**: 向 AI 服务发送微调请求，获取 job_id，再调用状态查询接口，验证能正确返回当前状态，即可独立验证。
-
-**Acceptance Scenarios**:
-
-1. **Given** 训练数据 JSONL 文件已在存储中准备就绪，**When** AI 服务收到微调提交请求（含文件地址、基础模型、超参数），**Then** 返回微调任务 ID。
-2. **Given** 微调任务已提交，**When** 查询任务状态，**Then** 返回 job_id、当前状态（RUNNING/SUCCESS/FAILED）和进度百分比。
-3. **Given** 任务处于运行中，**When** 多次查询状态，**Then** 每次均返回最新状态，不缓存旧状态。
-4. **Given** 传入不存在的 job_id 查询状态，**When** 处理请求，**Then** 返回明确错误，不崩溃。
-
----
-
-### User Story 8 - 运维监控服务健康状态 (Priority: P3)
-
-运维人员或监控系统定期探测 AI 服务的健康状态，判断服务是否正常运行，以便在异常时及时告警或自动重启。
-
-**Why this priority**: 健康检查是服务稳定运行的基础保障，但不属于业务功能，列为 P3。
-
-**Independent Test**: 对健康检查接口发起 HTTP GET 请求，验证收到表示正常的响应，即可独立验证。
-
-**Acceptance Scenarios**:
-
-1. **Given** AI 服务正常运行，**When** 任何系统对健康检查接口发起请求，**Then** 立即返回服务正常的响应，响应时间不超过 1 秒。
-2. **Given** 容器运行中，**When** 容器编排系统定期发起健康探测，**Then** 通过探测的容器才被标记为可用状态并接收流量。
-
----
-
-### Edge Cases
-
-- 文件存在于存储系统但内容损坏（如 PDF 页面为空）时，如何处理？→ 返回解析结果为空，不报错，日志记录警告。
-- 视频帧提取结果为零帧（如视频文件损坏或间隔过大）时，如何处理？→ 回调 SUCCESS，返回空帧列表，Java 后端决定是否重试。
-- 大模型返回的三元组/四元组超过合理数量（如数百条）时，如何处理？→ 全量返回，由 Java 后端或标注员筛选，AI 服务不做截断。
-- 多个视频任务并发执行时，是否会互相影响？→ 每个任务独立使用临时文件，处理完成后清理，互不干扰。
-- 视频文件大小恰好等于上限时，如何处理？→ 视为超限，拒绝处理，避免边界情况下的内存压力。
-- 大模型以 Markdown 代码块格式（\`\`\`json ... \`\`\`）返回 JSON 时，如何处理？→ 自动提取代码块内的 JSON 内容，兼容此格式。
-
----
-
-## Requirements *(mandatory)*
-
-### Functional Requirements
-
-**文本处理**
-
-- **FR-001**: 系统 MUST 支持从 TXT、PDF、DOCX 三种格式的文档中提取知识三元组（subject / predicate / object），并为每条三元组提供原文出处片段和字符偏移位置。
-- **FR-002**: 系统 MUST 在文件格式不受支持时，返回明确的格式不支持错误（HTTP 400），拒绝处理请求。
-
-**图像处理**
-
-- **FR-003**: 系统 MUST 支持从图片中提取知识四元组（subject / predicate / object / qualifier），并提供每个知识点在图像中的位置框（bbox：x, y, w, h 像素坐标）。
-- **FR-004**: 系统 MUST 在返回四元组结果时，自动将每个知识点对应的图像区域裁剪并保存至存储，响应中包含裁剪图的存储路径。
-
-**视频处理**
-
-- **FR-005**: 系统 MUST 支持视频帧提取，提供两种模式：固定间隔模式（按帧数间隔）和关键帧模式（场景切换时提取）。
-- **FR-006**: 系统 MUST 以异步方式处理视频任务，接受请求后立即返回接受确认（HTTP 202），在后台完成处理后主动通知调用方。
-- **FR-007**: 系统 MUST 支持视频片段转文字描述，输入起止时间段，输出视频内容的结构化文字描述，并将描述文本保存至存储。
-- **FR-008**: 系统 MUST 在视频文件大小超过上限时，拒绝处理并返回明确错误；大小上限 MUST 支持运行时配置（默认 200MB），不需要重新构建服务即可调整。
-
-**问答对生成**
-
-- **FR-009**: 系统 MUST 支持基于文本三元组（含原文片段）批量生成候选问答对，每条三元组至少生成一个问答对。
-- **FR-010**: 系统 MUST 支持基于图像四元组（含裁剪图存储路径）生成图文候选问答对，图片内容由系统自动从存储获取，调用方只需提供存储路径。
-
-**微调管理**
-
-- **FR-011**: 系统 MUST 支持向大模型服务提交微调任务，输入训练数据文件地址、基础模型名称和超参数，返回微调任务 ID。
-- **FR-012**: 系统 MUST 支持通过任务 ID 查询微调任务当前状态（RUNNING / SUCCESS / FAILED）和完成进度。
-
-**服务运维**
-
-- **FR-013**: 系统 MUST 提供轻量健康检查接口，可被容器编排系统、反向代理和监控工具调用，无需认证，响应时间不超过 1 秒。
-- **FR-014**: 系统 MUST 对每次请求记录结构化日志，包含请求路径、响应状态和耗时；对每次大模型调用记录模型名称和耗时；对视频后台任务记录任务 ID、阶段和结果；日志 MUST NOT 包含文件原文内容。
-- **FR-015**: 系统 MUST 在大模型返回非法格式时（HTTP 502）、存储不可达时（HTTP 502）、大模型服务不可用时（HTTP 503），分别返回不同的结构化错误响应，便于调用方判断根因。
-- **FR-016**: 系统 MUST 提供 Swagger/OpenAPI 自动文档，描述所有接口的请求和响应格式。
-
-**可扩展性**
-
-- **FR-017**: 系统 MUST 将大模型调用和存储访问封装为可替换的适配层，当前实现 ZhipuAI GLM 系列和 RustFS，替换实现时业务逻辑层无需修改。
-- **FR-018**: 系统 MUST 通过配置文件和环境变量管理所有可变参数（模型名称、存储地址、密钥、视频大小上限等），支持不重建服务镜像的情况下切换环境配置。
-
-### Key Entities
-
-- **三元组（Triple）**: 从文本中提取的知识关系，由主语（subject）、谓语（predicate）、宾语（object）、原文片段（source_snippet）和字符偏移（source_offset: start/end）组成。
-- **四元组（Quadruple）**: 从图像中提取的知识关系，在三元组基础上增加修饰信息（qualifier）和图像位置框（bbox: x/y/w/h），并关联裁剪图存储路径（cropped_image_path）。
-- **问答对（QA Pair）**: 由 question 和 answer 组成，文本类关联三元组上下文，图像类额外携带图片存储路径（image_path）。
-- **视频任务回调（Video Job Callback）**: 异步任务完成通知，包含 job_id、status（SUCCESS/FAILED）、结果数据（帧路径列表或文字描述路径）和错误信息。
-- **微调任务（Finetune Job）**: 包含任务 ID、当前状态（RUNNING/SUCCESS/FAILED）和进度百分比。
-
----
-
-## Success Criteria *(mandatory)*
-
-### Measurable Outcomes
-
-- **SC-001**: 对于长度在 10,000 字以内的文档，三元组提取请求在 60 秒内完成并返回结果，满足标注员实时等待的体验预期。
-- **SC-002**: 对于分辨率在 4K 以内的图片，四元组提取和裁剪图上传在 30 秒内完成，裁剪图区域与 bbox 坐标对应准确（误差 ≤2 像素）。
-- **SC-003**: 视频帧提取和视频转文本任务提交后，接受响应在 1 秒内返回；后台处理完成后回调通知在 10 分钟内送达（针对 200MB 以内的视频）。
-- **SC-004**: 视频大小超限的请求，拒绝响应在 3 秒内返回（含存储查询耗时），不启动任何后台处理。
-- **SC-005**: 问答对生成请求（≤10 条三元组/四元组），在 90 秒内完成并返回全部问答对。
-- **SC-006**: 健康检查接口在服务正常运行时，响应时间不超过 1 秒，容器编排系统依此判断服务可用状态。
-- **SC-007**: 所有错误响应均返回结构化错误信息（含错误类型和描述），不返回通用服务器错误，便于调用方在不查看日志的情况下判断根因。
-- **SC-008**: 替换大模型服务商或存储实现时，业务逻辑层代码零修改，仅需变更配置和适配层实现。
-- **SC-009**: 所有业务接口通过自动化单元测试覆盖，包括正常路径、存储错误、大模型错误、格式解析错误等场景。
-
----
-
-## Technical Environment *(mandatory)*
-
-> 注：本节记录项目已确定的技术约束，这些决定已由团队确认，不作为需求变更点。
-
-- **运行时**: Python 3.12.13
-- **Web 框架**: FastAPI（含 uvicorn 服务器）
-- **运行环境**: conda 虚拟环境，环境名称 `label`
-- **大模型**: ZhipuAI GLM 系列（文本：glm-4-flash，视觉：glm-4v-flash），通过官方 SDK 调用
-- **对象存储**: RustFS，通过 S3 兼容 API（boto3）访问
-- **文档解析**: TXT（UTF-8 解码）、PDF（pdfplumber）、DOCX（python-docx）
-- **视频处理**: OpenCV（帧提取 + 帧差分关键帧检测）
-- **容器化**: Docker + Docker Compose，提供 Dockerfile 和 docker-compose.yml
-
----
-
-## Assumptions
-
-- Java 后端（label-backend）是 AI 服务的唯一调用方，AI 服务不对外直接暴露，无需用户认证机制。
-- 大模型服务部署在公网（ZhipuAI 云端 API），RustFS 部署在 Docker 内网；因此图片内容必须以 base64 方式传递给大模型，不能依赖 RustFS 内网地址被云端服务访问。
-- 文档、图片、视频等原始文件由 Java 后端负责上传至存储，AI 服务仅通过存储路径读取，不处理文件上传逻辑。
-- 微调任务提交后的训练过程由 ZhipuAI 平台托管，AI 服务仅负责提交和查询，不管理训练算力资源。
-- 视频任务为低频操作（由 ADMIN 手动触发），并发量有限（预计同时不超过 5 个视频任务），当前无需专用任务队列。
-- 日志仅输出到标准输出（stdout），由容器运行时或日志收集系统负责落盘和归档；不记录文件原文内容，防止敏感信息泄露。
-- ZhipuAI SDK 为同步阻塞调用；为保持服务并发能力，SDK 调用将在线程池中执行，不阻塞主事件循环。
-- 视频大小上限默认 200MB，可通过环境变量（MAX_VIDEO_SIZE_MB）在容器运行时覆盖，无需重建镜像。
diff --git a/specs/001-ai-service-requirements/tasks.md b/specs/001-ai-service-requirements/tasks.md
deleted file mode 100644
index 2f26606..0000000
--- a/specs/001-ai-service-requirements/tasks.md
+++ /dev/null
@@ -1,318 +0,0 @@
-# Tasks: AI 服务（知识图谱标注平台 AI 计算服务）
-
-**Input**: Design documents from `/specs/001-ai-service-requirements/`  
-**Prerequisites**: plan.md ✅, spec.md ✅, research.md ✅, data-model.md ✅, contracts/api.md ✅  
-**Tests**: Included — spec and plan explicitly mandate TDD（全量 TDD 开发）
-
-**Organization**: Tasks grouped by user story. Each phase is independently implementable and testable.
-
-## Format: `[ID] [P?] [Story?] Description`
-
-- **[P]**: Can run in parallel (different files, no shared dependencies)
-- **[Story]**: Which user story this task belongs to (US1–US8)
-- All paths are relative to project root `label_ai_service/`
-
----
-
-## Phase 1: Setup（项目初始化）
-
-**Purpose**: Create project skeleton and configuration files before any code is written.
-
-- [ ] T001 Create directory structure: `app/core/`, `app/clients/llm/`, `app/clients/storage/`, `app/services/`, `app/routers/`, `app/models/`, `tests/`
-- [ ] T002 Create `requirements.txt` with pinned dependencies: fastapi≥0.111, uvicorn[standard]≥0.29, pydantic≥2.7, zhipuai≥2.1, boto3≥1.34, pdfplumber≥0.11, python-docx≥1.1, opencv-python-headless≥4.9, numpy≥1.26, httpx≥0.27, python-dotenv≥1.0, pyyaml≥6.0, pytest≥8.0, pytest-asyncio≥0.23
-- [ ] T003 [P] Create `config.yaml` with default server/storage/video/models configuration (port 8000, buckets, max_file_size_mb 200, glm-4-flash / glm-4v-flash)
-- [ ] T004 [P] Create `.env` template with required env var keys (ZHIPUAI_API_KEY, STORAGE_ACCESS_KEY, STORAGE_SECRET_KEY, STORAGE_ENDPOINT, BACKEND_CALLBACK_URL, LOG_LEVEL, MAX_VIDEO_SIZE_MB)
-- [ ] T005 [P] Create `Dockerfile` (python:3.12-slim base, install requirements, expose 8000, CMD uvicorn)
-- [ ] T006 [P] Create `docker-compose.yml` with ai-service and rustfs services, env_file, healthcheck (curl /health every 30s)
-
----
-
-## Phase 2: Foundational（核心基础设施）
-
-**Purpose**: Core infrastructure that MUST be complete before ANY user story can be implemented.
-
-**⚠️ CRITICAL**: No user story work can begin until this phase is complete.
-
-### Config & Core Utilities
-
-- [ ] T007 Implement `app/core/config.py`: load `config.yaml` with PyYAML + override via `_ENV_OVERRIDES` dict mapping env vars to nested YAML paths (including `MAX_VIDEO_SIZE_MB → video.max_file_size_mb`), expose `get_config()` with `@lru_cache`
-- [ ] T008 [P] Implement `app/core/logging.py`: JSON structured logging via `logging` module, `RequestLoggingMiddleware` that logs path/status/latency, helper `get_logger(name)`
-- [ ] T009 [P] Implement `app/core/exceptions.py`: custom exception classes `UnsupportedFileTypeError(400)`, `VideoTooLargeError(400)`, `StorageError(502)`, `LLMParseError(502)`, `LLMCallError(503)`, plus global exception handler that returns `{"code": ..., "message": ...}` JSON
-- [ ] T010 [P] Implement `app/core/json_utils.py`: `extract_json(text) -> dict` that strips Markdown code fences (` ```json ... ``` `) before `json.loads`, raises `LLMParseError` on invalid JSON
-- [ ] T011 Write `tests/test_config.py`: verify YAML defaults load correctly; verify `MAX_VIDEO_SIZE_MB=500` env var overrides `video.max_file_size_mb`; verify missing required env vars surface clear errors
-
-### LLM Client（大模型适配层）
-
-- [ ] T012 [P] Implement `app/clients/llm/base.py`: `LLMClient` ABC with abstract methods `chat(model, messages) -> str` and `chat_vision(model, messages) -> str`
-- [ ] T013 Implement `app/clients/llm/zhipuai_client.py`: `ZhipuAIClient(LLMClient)` that wraps synchronous ZhipuAI SDK calls via `asyncio.get_event_loop().run_in_executor(None, ...)` in a thread pool; raise `LLMCallError` on SDK exceptions
-- [ ] T014 [P] Write `tests/test_llm_client.py`: mock ZhipuAI SDK to verify `chat()` and `chat_vision()` call the SDK correctly; verify `LLMCallError` is raised on SDK exception; verify thread-pool wrapping does not block the event loop
-
-### Storage Client（存储适配层）
-
-- [ ] T015 [P] Implement `app/clients/storage/base.py`: `StorageClient` ABC with abstract methods `download_bytes(bucket, path) -> bytes`, `upload_bytes(bucket, path, data, content_type) -> None`, `get_presigned_url(bucket, path, expires) -> str`, `get_object_size(bucket, path) -> int`
-- [ ] T016 Implement `app/clients/storage/rustfs_client.py`: `RustFSClient(StorageClient)` using boto3 S3 client; all calls wrapped via `run_in_executor`; `get_object_size` uses `head_object`; raise `StorageError` on `ClientError`
-- [ ] T017 [P] Write `tests/test_storage_client.py`: mock boto3 S3 client; verify `download_bytes` returns correct bytes; verify `get_object_size` calls `head_object` and returns `ContentLength`; verify `StorageError` raised on S3 exception
-
-### FastAPI Application Entry
-
-- [ ] T018 Implement `app/main.py`: create FastAPI app with lifespan, register `RequestLoggingMiddleware`, register global exception handlers from `exceptions.py`, mount all routers (empty stubs initially), expose `GET /health → {"status": "ok"}`
-- [ ] T019 [P] Implement `app/core/dependencies.py`: `get_llm_client() -> LLMClient` and `get_storage_client() -> StorageClient` as `@lru_cache` singletons, instantiated from `get_config()` values
-- [ ] T020 Write `tests/conftest.py`: `mock_llm` fixture (AsyncMock implementing LLMClient), `mock_storage` fixture (AsyncMock implementing StorageClient with `get_object_size` returning 10MB), `test_app` fixture overriding Depends, `client` fixture using `TestClient`
-
-**Checkpoint**: Foundation complete — all user story phases can now begin in parallel.
-
----
-
-## Phase 3: User Story 1 — ADMIN 从文档中提取知识三元组 (Priority: P1) 🎯 MVP
-
-**Goal**: `POST /api/v1/text/extract` reads a TXT/PDF/DOCX file from RustFS, calls GLM, returns structured triples with source offsets.
-
-**Independent Test**: Send `{"file_path": "text/test.txt", "file_name": "test.txt"}` to the endpoint; verify response contains `items` with `subject`, `predicate`, `object`, `source_snippet`, `source_offset.start/end`.
-
-### Tests for User Story 1 ⚠️ Write FIRST — verify FAIL before implementing
-
-- [ ] T021 [P] [US1] Write `tests/test_text_service.py`: test TXT parsing returns triples; test PDF parsing (mock pdfplumber); test DOCX parsing (mock python-docx); test unsupported format raises `UnsupportedFileTypeError`; test storage failure raises `StorageError`; test LLM parse error raises `LLMParseError`
-
-### Implementation for User Story 1
-
-- [ ] T022 [P] [US1] Create `app/models/text_models.py`: `SourceOffset(start: int, end: int)`, `TripleItem(subject, predicate, object, source_snippet, source_offset)`, `TextExtractRequest(file_path, file_name, model?, prompt_template?)`, `TextExtractResponse(items: list[TripleItem])`
-- [ ] T023 [US1] Implement `app/services/text_service.py`: `extract_triples(req, llm, storage) -> TextExtractResponse`; dispatch to `_parse_txt / _parse_pdf / _parse_docx` by file extension; build prompt from content + optional `prompt_template`; call `llm.chat()`; parse JSON response via `extract_json()`; validate triple fields; raise typed exceptions
-- [ ] T024 [US1] Write `tests/test_text_router.py`: POST `/api/v1/text/extract` returns 200 with items; unsupported format returns 400 with `UNSUPPORTED_FILE_TYPE`; storage error returns 502 with `STORAGE_ERROR`; LLM parse error returns 502 with `LLM_PARSE_ERROR`
-- [ ] T025 [US1] Implement `app/routers/text.py`: `APIRouter(prefix="/api/v1")` with `POST /text/extract` handler that injects `storage` and `llm` via Depends, calls `text_service.extract_triples()`; register router in `app/main.py`
-
-**Checkpoint**: `POST /api/v1/text/extract` fully functional. Run `pytest tests/test_text_service.py tests/test_text_router.py -v` — all green.
-
----
-
-## Phase 4: User Story 2 — ADMIN 从图片中提取知识四元组并自动裁剪 (Priority: P1)
-
-**Goal**: `POST /api/v1/image/extract` downloads an image from RustFS, calls GLM-4V, crops bbox regions, uploads crops, returns quads with cropped_image_path.
-
-**Independent Test**: Send `{"file_path": "image/test.jpg", "task_id": 1}` to the endpoint; verify response contains `items` each with `bbox`, `qualifier`, and `cropped_image_path` matching pattern `crops/1/{n}.jpg`.
-
-### Tests for User Story 2 ⚠️ Write FIRST — verify FAIL before implementing
-
-- [ ] T026 [P] [US2] Write `tests/test_image_service.py`: test full quad extraction pipeline with mock LLM returning valid JSON; test bbox crop uses correct pixel coordinates; test out-of-bounds bbox is clamped to image dimensions; test crop upload path follows `crops/{task_id}/{index}.jpg` convention; test LLM parse error raises `LLMParseError`
-
-### Implementation for User Story 2
-
-- [ ] T027 [P] [US2] Create `app/models/image_models.py`: `BBox(x, y, w, h: int)`, `QuadrupleItem(subject, predicate, object, qualifier?, bbox, cropped_image_path)`, `ImageExtractRequest(file_path, task_id, model?, prompt_template?)`, `ImageExtractResponse(items: list[QuadrupleItem])`
-- [ ] T028 [US2] Implement `app/services/image_service.py`: `extract_quads(req, llm, storage) -> ImageExtractResponse`; download image bytes → decode with OpenCV (`cv2.imdecode`); base64 encode image for GLM-4V multimodal message; call `llm.chat_vision()`; parse JSON via `extract_json()`; for each quad, clamp bbox to image dimensions, crop with numpy slicing, encode as JPEG, upload to `crops/{task_id}/{index}.jpg`; return quads with paths
-- [ ] T029 [US2] Write `tests/test_image_router.py`: POST `/api/v1/image/extract` returns 200 with items; LLM parse error returns 502; storage download failure returns 502
-- [ ] T030 [US2] Implement `app/routers/image.py`: `POST /image/extract` handler; register in `app/main.py`
-
-**Checkpoint**: `POST /api/v1/image/extract` fully functional. Run `pytest tests/test_image_service.py tests/test_image_router.py -v` — all green.
-
----
-
-## Phase 5: User Stories 3 & 4 — 视频帧提取 + 视频转文本 (Priority: P2)
-
-**Goal**: `POST /api/v1/video/extract-frames` and `POST /api/v1/video/to-text` immediately return 202, process video in background via FastAPI BackgroundTasks, then POST callback to Java backend with results.
-
-**Independent Test (US3)**: Send extract-frames request; verify immediate 202 with job_id; mock storage and callback URL; verify callback received with `status=SUCCESS` and non-empty `frames` list.
-
-**Independent Test (US4)**: Send to-text request with `start_sec=0, end_sec=10`; verify immediate 202; verify callback received with `status=SUCCESS` and `output_path` pointing to an uploaded text file.
-
-### Tests for User Stories 3 & 4 ⚠️ Write FIRST — verify FAIL before implementing
-
-- [ ] T031 [P] [US3] Write `tests/test_video_service.py` (frame extraction tests): generate small test video via `cv2.VideoWriter`; test interval mode extracts correct frame indices; test keyframe mode only extracts frames exceeding difference threshold; test each extracted frame is uploaded to `frames/{source_id}/{index}.jpg`; test failed extraction triggers FAILED callback with error_message
-- [ ] T032 [P] [US4] Append to `tests/test_video_service.py` (to-text tests): test uniform sampling selects `frame_sample_count` frames from `[start_sec, end_sec]` window; test sampled frames are passed as base64 to `llm.chat_vision()`; test output text is uploaded to `video-text/{source_id}/{timestamp}.txt`; test LLM failure triggers FAILED callback
-
-### Implementation for User Stories 3 & 4
-
-- [ ] T033 [US3] Create `app/models/video_models.py`: `ExtractFramesRequest(file_path, source_id, job_id, mode="interval", frame_interval=30)`, `VideoToTextRequest(file_path, source_id, job_id, start_sec, end_sec, model?, prompt_template?)`, `FrameInfo(frame_index, time_sec, frame_path)`, `VideoJobCallback(job_id, status, frames?, output_path?, error_message?)`, `VideoAcceptedResponse(message, job_id)`
-- [ ] T034 [US3] Implement frame extraction in `app/services/video_service.py`: `extract_frames_task(req, llm, storage, callback_url)` background function; download video to temp file; open with `cv2.VideoCapture`; interval mode: step by `frame_interval`; keyframe mode: compute grayscale frame diff, extract when diff > threshold (default 30.0); upload each frame JPEG; POST callback with `FrameInfo` list; clean up temp file; catch all exceptions and POST FAILED callback
-- [ ] T035 [US4] Implement to-text in `app/services/video_service.py`: `video_to_text_task(req, llm, storage, callback_url)` background function; download video to temp file; sample `frame_sample_count` frames uniformly within `[start_sec, end_sec]`; base64 encode frames; call `llm.chat_vision()` with all frames in one multimodal message; upload text result to `video-text/{source_id}/{timestamp}.txt`; POST callback with `output_path`; clean up temp file
-- [ ] T036 [US3] Write `tests/test_video_router.py`: POST `/api/v1/video/extract-frames` returns 202 immediately; video exceeding `max_file_size_mb` returns 400 with `VIDEO_TOO_LARGE`; background task is registered (mock BackgroundTasks)
-- [ ] T037 [US4] Append to `tests/test_video_router.py`: POST `/api/v1/video/to-text` returns 202; size limit applies equally
-- [ ] T038 [US3] Implement `app/routers/video.py`: `_check_video_size(storage, bucket, file_path, max_mb)` helper that calls `storage.get_object_size()` and raises `VideoTooLargeError`; `POST /video/extract-frames` and `POST /video/to-text` handlers check size then enqueue background task; register router in `app/main.py`
-
-**Checkpoint**: Both video endpoints fully functional. Run `pytest tests/test_video_service.py tests/test_video_router.py -v` — all green.
-
----
-
-## Phase 6: User Stories 5 & 6 — 文本QA生成 + 图像QA生成 (Priority: P2)
-
-**Goal**: `POST /api/v1/qa/gen-text` generates QA pairs from text triples; `POST /api/v1/qa/gen-image` generates multimodal QA pairs from image quads (images fetched and base64-encoded internally).
-
-**Independent Test (US5)**: Send `{"items": [{"subject":"变压器","predicate":"额定电压","object":"110kV","source_snippet":"..."}]}` to gen-text; verify response contains `pairs` with non-empty `question` and `answer`.
-
-**Independent Test (US6)**: Send `{"items": [{"subject":"...","cropped_image_path":"crops/1/0.jpg",...}]}` to gen-image; verify response contains `pairs` with `image_path` matching `crops/1/0.jpg`.
-
-### Tests for User Stories 5 & 6 ⚠️ Write FIRST — verify FAIL before implementing
-
-- [ ] T039 [P] [US5] Write `tests/test_qa_service.py` (text QA tests): test triples are formatted into prompt correctly; test LLM response JSON is parsed into `QAPair` list; test `LLMParseError` on malformed LLM response; test `LLMCallError` propagates correctly
-- [ ] T040 [P] [US6] Append to `tests/test_qa_service.py` (image QA tests): test storage downloads cropped image and encodes as base64 before LLM call; test multimodal message includes both text (quad info) and inline image data URI; test `StorageError` on failed image download
-
-### Implementation for User Stories 5 & 6
-
-- [ ] T041 [P] [US5] Create `app/models/qa_models.py`: `TextQAItem(subject, predicate, object, source_snippet)`, `GenTextQARequest(items, model?, prompt_template?)`, `QAPair(question, answer)`, `ImageQAItem(subject, predicate, object, qualifier?, cropped_image_path)`, `GenImageQARequest(items, model?, prompt_template?)`, `ImageQAPair(question, answer, image_path)`, `TextQAResponse(pairs)`, `ImageQAResponse(pairs)`
-- [ ] T042 [US5] Implement `gen_text_qa(req, llm) -> TextQAResponse` in `app/services/qa_service.py`: format all triples + source snippets into a single batch prompt; call `llm.chat()`; parse JSON array via `extract_json()`; return `QAPair` list
-- [ ] T043 [US6] Implement `gen_image_qa(req, llm, storage) -> ImageQAResponse` in `app/services/qa_service.py`: for each `ImageQAItem`, download `cropped_image_path` bytes from `source-data` bucket; base64 encode; build multimodal message with quad text + `data:image/jpeg;base64,...` inline URL; call `llm.chat_vision()`; parse JSON; return `ImageQAPair` with `image_path = item.cropped_image_path`
-- [ ] T044 [US5] Write `tests/test_qa_router.py`: POST `/api/v1/qa/gen-text` returns 200 with pairs; POST `/api/v1/qa/gen-image` returns 200 with pairs including image_path; LLM errors return 502/503
-- [ ] T045 [US5] Implement `app/routers/qa.py`: `POST /qa/gen-text` and `POST /qa/gen-image` handlers; register router in `app/main.py`
-
-**Checkpoint**: Both QA endpoints fully functional. Run `pytest tests/test_qa_service.py tests/test_qa_router.py -v` — all green.
-
----
-
-## Phase 7: User Stories 7 & 8 — 微调任务管理 + 健康检查 (Priority: P3)
-
-**Goal**: `POST /api/v1/finetune/start` submits a ZhipuAI fine-tune job; `GET /api/v1/finetune/status/{jobId}` queries its state; `GET /health` returns service liveness.
-
-**Independent Test (US7)**: Call `POST /finetune/start` with mock LLM returning a job ID; then call `GET /finetune/status/{jobId}`; verify `status` is one of `RUNNING/SUCCESS/FAILED` and `progress` is an integer.
-
-**Independent Test (US8)**: `GET /health` returns `{"status": "ok"}` with HTTP 200 in under 1 second.
-
-### Tests for User Stories 7 & 8 ⚠️ Write FIRST — verify FAIL before implementing
-
-- [ ] T046 [P] [US7] Write `tests/test_finetune_service.py`: test `submit_finetune()` calls ZhipuAI finetune API with correct params and returns job_id; test `get_status()` maps ZhipuAI `"running"→RUNNING`, `"succeeded"→SUCCESS`, `"failed"→FAILED`, unknown status→RUNNING (conservative); test `LLMCallError` on SDK failure
-- [ ] T047 [P] [US8] Write health check test in `tests/test_finetune_router.py` (or new `tests/test_health.py`): `GET /health` returns 200 with `{"status": "ok"}`
-
-### Implementation for User Stories 7 & 8
-
-- [ ] T048 [P] [US7] Create `app/models/finetune_models.py`: `FinetuneStartRequest(jsonl_url, base_model, hyperparams?)`, `FinetuneStartResponse(job_id)`, `FinetuneStatusResponse(job_id, status, progress?, error_message?)`
-- [ ] T049 [US7] Implement `app/services/finetune_service.py`: `submit_finetune(req, llm) -> FinetuneStartResponse` calls ZhipuAI fine-tune create API via `run_in_executor`; `get_finetune_status(job_id, llm) -> FinetuneStatusResponse` calls ZhipuAI fine-tune retrieve API and maps status strings; raise `LLMCallError` on failure
-- [ ] T050 [US7] Write `tests/test_finetune_router.py`: `POST /api/v1/finetune/start` returns 200 with job_id; `GET /api/v1/finetune/status/{jobId}` returns 200 with status fields; unknown job_id propagates error response
-- [ ] T051 [US7] Implement `app/routers/finetune.py`: `POST /finetune/start` and `GET /finetune/status/{job_id}` handlers; register router in `app/main.py`
-
-**Checkpoint**: All 8 user stories complete. Run `pytest tests/ -v` — all green.
-
----
-
-## Phase 8: Polish & Cross-Cutting Concerns
-
-**Purpose**: Final integration, documentation verification, and deployment readiness.
-
-- [ ] T052 [P] Create `.gitignore` for Python project (`.env`, `__pycache__/`, `*.pyc`, `.pytest_cache/`, `tmp/` for video temp files)
-- [ ] T053 Run full test suite `conda run -n label pytest tests/ -v --cov=app --cov-report=term-missing` and fix any remaining failures or coverage gaps
-- [ ] T054 [P] Verify Swagger/OpenAPI docs at `http://localhost:8000/docs` show all 9 endpoints with correct request/response schemas
-- [ ] T055 Validate quickstart.md end-to-end: `conda activate label && pip install -r requirements.txt && conda run -n label uvicorn app.main:app --reload` starts cleanly; `GET /health` returns 200; `docker-compose up -d` builds and healthcheck passes
-
----
-
-## Dependencies & Execution Order
-
-### Phase Dependencies
-
-```
-Phase 1 (Setup)
-    └─→ Phase 2 (Foundational) ← BLOCKS everything
-            ├─→ Phase 3 (US1, P1) ─┐
-            ├─→ Phase 4 (US2, P1) ─┤ Can run in parallel after Phase 2
-            ├─→ Phase 5 (US3+4, P2)─┤
-            ├─→ Phase 6 (US5+6, P2)─┤
-            └─→ Phase 7 (US7+8, P3)─┘
-                    └─→ Phase 8 (Polish)
-```
-
-### User Story Dependencies
-
-| Story | Priority | Depends On | Blocking |
-|-------|----------|-----------|---------|
-| US1 (文本三元组) | P1 | Phase 2 only | Nothing |
-| US2 (图像四元组) | P1 | Phase 2 only | US6 (shares image downloading pattern) |
-| US3 (视频帧提取) | P2 | Phase 2 only | Nothing |
-| US4 (视频转文本) | P2 | Phase 2, US3 (shares video_service.py) | Nothing |
-| US5 (文本QA) | P2 | Phase 2 only | Nothing |
-| US6 (图像QA) | P2 | Phase 2 only | Nothing |
-| US7 (微调管理) | P3 | Phase 2 only | Nothing |
-| US8 (健康检查) | P3 | T018 (main.py) | Nothing |
-
-### Within Each User Story
-
-1. Tests MUST be written first and verified to **FAIL** before implementation
-2. Models → Services → Routers (in dependency order)
-3. Register router in `main.py` after router file is complete
-4. Run story-specific tests before marking story done
-
-### Parallel Opportunities
-
-All tasks marked `[P]` within a phase can run concurrently (different files):
-- **Phase 2**: T008, T009, T010 (core utilities) + T012, T014 (LLM) + T015, T017 (Storage) + T019 (dependencies)
-- **Phase 3**: T021 (tests) and T022 (models) can start together
-- **Phase 4**: T026 (tests) and T027 (models) can start together
-- **Phase 5**: T031 (US3 tests) and T032 (US4 tests) can start together
-- **Phase 6**: T039 (US5 tests) and T040, T041 (US6 tests + models) can start together
-- **Phase 7**: T046, T047, T048 can start together
-
----
-
-## Parallel Example: Phase 2 Foundational
-
-```bash
-# Kick off these in parallel (all different files):
-[T008] app/core/logging.py
-[T009] app/core/exceptions.py
-[T010] app/core/json_utils.py
-[T012] app/clients/llm/base.py
-[T014] tests/test_llm_client.py
-[T015] app/clients/storage/base.py
-[T017] tests/test_storage_client.py
-[T019] app/core/dependencies.py
-
-# Then in sequence (each depends on previous):
-[T007] app/core/config.py  →  [T011] tests/test_config.py
-[T013] app/clients/llm/zhipuai_client.py (needs T012)
-[T016] app/clients/storage/rustfs_client.py (needs T015)
-[T018] app/main.py (needs T009, T008)
-[T020] tests/conftest.py (needs T018, T013, T016)
-```
-
----
-
-## Implementation Strategy
-
-### MVP First (US1 + US2 — P1 Stories Only)
-
-1. Complete Phase 1: Setup
-2. Complete Phase 2: Foundational (CRITICAL — blocks all stories)
-3. Complete Phase 3: US1 (文本三元组提取) → validate independently
-4. Complete Phase 4: US2 (图像四元组提取) → validate independently
-5. **STOP and DEMO**: Core extraction pipeline is production-ready
-
-### Incremental Delivery
-
-```
-Phase 1+2 complete  →  Foundation ready (commit)
-Phase 3 complete    →  Text extraction works  (commit, demo)
-Phase 4 complete    →  Image extraction works (commit, demo)
-Phase 5 complete    →  Video processing works (commit, demo)
-Phase 6 complete    →  QA generation works   (commit, demo)
-Phase 7 complete    →  Fine-tune management  (commit, demo)
-Phase 8 complete    →  Production-ready      (tag release)
-```
-
-### Parallel Team Strategy
-
-With two developers after Phase 2 completes:
-- **Dev A**: US1 (text) → US5 (text QA) → US7 (finetune)
-- **Dev B**: US2 (image) → US6 (image QA) → US3+US4 (video)
-
----
-
-## Summary
-
-| Phase | Tasks | User Story | Priority |
-|-------|-------|-----------|---------|
-| Phase 1: Setup | T001–T006 (6) | — | — |
-| Phase 2: Foundational | T007–T020 (14) | — | — |
-| Phase 3 | T021–T025 (5) | US1 文本三元组 | P1 🎯 MVP |
-| Phase 4 | T026–T030 (5) | US2 图像四元组 | P1 |
-| Phase 5 | T031–T038 (8) | US3+US4 视频处理 | P2 |
-| Phase 6 | T039–T045 (7) | US5+US6 QA生成 | P2 |
-| Phase 7 | T046–T051 (6) | US7+US8 微调+健康检查 | P3 |
-| Phase 8: Polish | T052–T055 (4) | — | — |
-| **Total** | **55 tasks** | **8 user stories** | |
-
----
-
-## Notes
-
-- `[P]` tasks = different files, no shared dependencies within the same phase
-- `[US?]` label maps each task to its user story for traceability
-- Tests in `tests/conftest.py` (T020) use `AsyncMock` — no real ZhipuAI or RustFS calls in unit tests
-- Video tasks use a real small video file generated by `cv2.VideoWriter` in tests — no external media needed
-- All config is loaded via `get_config()` — never hardcode model names or bucket names in services
-- Commit after each phase checkpoint at minimum; commit after each task for clean git history
-- Stop at any checkpoint to validate the story independently before proceeding
diff --git a/tests/__init__.py b/tests/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/conftest.py b/tests/conftest.py
deleted file mode 100644
index ae81f4d..0000000
--- a/tests/conftest.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import pytest
-from unittest.mock import AsyncMock, MagicMock
-from fastapi.testclient import TestClient
-
-from app.clients.llm.base import LLMClient
-from app.clients.storage.base import StorageClient
-from app.core.dependencies import get_llm_client, get_storage_client
-
-
-@pytest.fixture
-def mock_llm() -> LLMClient:
-    client = MagicMock(spec=LLMClient)
-    client.chat = AsyncMock(return_value='[]')
-    client.chat_vision = AsyncMock(return_value='[]')
-    return client
-
-
-@pytest.fixture
-def mock_storage() -> StorageClient:
-    client = MagicMock(spec=StorageClient)
-    client.download_bytes = AsyncMock(return_value=b"")
-    client.upload_bytes = AsyncMock(return_value=None)
-    client.get_presigned_url = AsyncMock(return_value="http://example.com/presigned")
-    client.get_object_size = AsyncMock(return_value=10 * 1024 * 1024)  # 10 MB default
-    return client
-
-
-@pytest.fixture
-def test_app(mock_llm, mock_storage):
-    from app.main import app
-    app.dependency_overrides[get_llm_client] = lambda: mock_llm
-    app.dependency_overrides[get_storage_client] = lambda: mock_storage
-    yield app
-    app.dependency_overrides.clear()
-
-
-@pytest.fixture
-def client(test_app):
-    return TestClient(test_app)
diff --git a/tests/test_config.py b/tests/test_config.py
deleted file mode 100644
index aa8f464..0000000
--- a/tests/test_config.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import os
-import pytest
-
-
-def test_yaml_defaults_load(monkeypatch):
-    # Clear lru_cache so each test gets a fresh load
-    from app.core import config as cfg_module
-    cfg_module.get_config.cache_clear()
-
-    # Remove env overrides that might bleed from shell environment
-    for var in ["MAX_VIDEO_SIZE_MB", "LOG_LEVEL", "STORAGE_ENDPOINT"]:
-        monkeypatch.delenv(var, raising=False)
-
-    cfg = cfg_module.get_config()
-
-    assert cfg["server"]["port"] == 8000
-    assert cfg["video"]["max_file_size_mb"] == 200
-    assert cfg["models"]["default_text"] == "glm-4-flash"
-    assert cfg["models"]["default_vision"] == "glm-4v-flash"
-    assert cfg["storage"]["buckets"]["source_data"] == "source-data"
-
-
-def test_max_video_size_env_override(monkeypatch):
-    from app.core import config as cfg_module
-    cfg_module.get_config.cache_clear()
-
-    monkeypatch.setenv("MAX_VIDEO_SIZE_MB", "500")
-    cfg = cfg_module.get_config()
-
-    assert cfg["video"]["max_file_size_mb"] == 500
-
-
-def test_log_level_env_override(monkeypatch):
-    from app.core import config as cfg_module
-    cfg_module.get_config.cache_clear()
-
-    monkeypatch.setenv("LOG_LEVEL", "DEBUG")
-    cfg = cfg_module.get_config()
-
-    assert cfg["server"]["log_level"] == "DEBUG"
diff --git a/tests/test_finetune_router.py b/tests/test_finetune_router.py
deleted file mode 100644
index 6678195..0000000
--- a/tests/test_finetune_router.py
+++ /dev/null
@@ -1,112 +0,0 @@
-"""T050: Integration tests for finetune router endpoints."""
-import pytest
-from unittest.mock import MagicMock, patch
-
-from app.core.exceptions import LLMCallError
-from app.models.finetune_models import FinetuneStartResponse, FinetuneStatusResponse
-
-
-# ---------------------------------------------------------------------------
-# POST /api/v1/finetune/start
-# ---------------------------------------------------------------------------
-
-def test_finetune_start_returns_200_with_job_id(client):
-    start_resp = FinetuneStartResponse(job_id="glm-ft-router-test")
-
-    with patch("app.routers.finetune.finetune_service.submit_finetune") as mock_submit:
-        mock_submit.return_value = start_resp
-
-        resp = client.post(
-            "/api/v1/finetune/start",
-            json={
-                "jsonl_url": "s3://bucket/train.jsonl",
-                "base_model": "glm-4",
-                "hyperparams": {"n_epochs": 3},
-            },
-        )
-
-    assert resp.status_code == 200
-    data = resp.json()
-    assert data["job_id"] == "glm-ft-router-test"
-
-
-def test_finetune_start_without_hyperparams(client):
-    start_resp = FinetuneStartResponse(job_id="glm-ft-nohp")
-
-    with patch("app.routers.finetune.finetune_service.submit_finetune") as mock_submit:
-        mock_submit.return_value = start_resp
-
-        resp = client.post(
-            "/api/v1/finetune/start",
-            json={
-                "jsonl_url": "s3://bucket/train.jsonl",
-                "base_model": "glm-4",
-            },
-        )
-
-    assert resp.status_code == 200
-    assert resp.json()["job_id"] == "glm-ft-nohp"
-
-
-def test_finetune_start_llm_call_error_returns_503(client):
-    with patch("app.routers.finetune.finetune_service.submit_finetune") as mock_submit:
-        mock_submit.side_effect = LLMCallError("SDK failed")
-
-        resp = client.post(
-            "/api/v1/finetune/start",
-            json={
-                "jsonl_url": "s3://bucket/train.jsonl",
-                "base_model": "glm-4",
-            },
-        )
-
-    assert resp.status_code == 503
-    assert resp.json()["code"] == "LLM_CALL_ERROR"
-
-
-# ---------------------------------------------------------------------------
-# GET /api/v1/finetune/status/{job_id}
-# ---------------------------------------------------------------------------
-
-def test_finetune_status_returns_200_with_fields(client):
-    status_resp = FinetuneStatusResponse(
-        job_id="glm-ft-router-test",
-        status="RUNNING",
-        progress=30,
-    )
-
-    with patch("app.routers.finetune.finetune_service.get_finetune_status") as mock_status:
-        mock_status.return_value = status_resp
-
-        resp = client.get("/api/v1/finetune/status/glm-ft-router-test")
-
-    assert resp.status_code == 200
-    data = resp.json()
-    assert data["job_id"] == "glm-ft-router-test"
-    assert data["status"] == "RUNNING"
-    assert data["progress"] == 30
-
-
-def test_finetune_status_succeeded(client):
-    status_resp = FinetuneStatusResponse(
-        job_id="glm-ft-done",
-        status="SUCCESS",
-    )
-
-    with patch("app.routers.finetune.finetune_service.get_finetune_status") as mock_status:
-        mock_status.return_value = status_resp
-
-        resp = client.get("/api/v1/finetune/status/glm-ft-done")
-
-    assert resp.status_code == 200
-    assert resp.json()["status"] == "SUCCESS"
-
-
-def test_finetune_status_llm_call_error_returns_503(client):
-    with patch("app.routers.finetune.finetune_service.get_finetune_status") as mock_status:
-        mock_status.side_effect = LLMCallError("SDK failed")
-
-        resp = client.get("/api/v1/finetune/status/glm-ft-bad")
-
-    assert resp.status_code == 503
-    assert resp.json()["code"] == "LLM_CALL_ERROR"
diff --git a/tests/test_finetune_service.py b/tests/test_finetune_service.py
deleted file mode 100644
index 51d93dd..0000000
--- a/tests/test_finetune_service.py
+++ /dev/null
@@ -1,151 +0,0 @@
-"""Tests for finetune_service — uses LLMClient interface (no internal SDK access)."""
-import pytest
-from unittest.mock import MagicMock, AsyncMock
-
-from app.clients.llm.base import LLMClient
-from app.core.exceptions import LLMCallError
-from app.models.finetune_models import (
-    FinetuneStartRequest,
-    FinetuneStartResponse,
-    FinetuneStatusResponse,
-)
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-def _make_llm(job_id: str = "glm-ft-test", status: str = "running", progress: int | None = None):
-    """Return a MagicMock(spec=LLMClient) with submit_finetune and get_finetune_status as AsyncMocks."""
-    llm = MagicMock(spec=LLMClient)
-    llm.submit_finetune = AsyncMock(return_value=job_id)
-    llm.get_finetune_status = AsyncMock(return_value={
-        "job_id": job_id,
-        "status": status,
-        "progress": progress,
-        "error_message": None,
-    })
-    return llm
-
-
-# ---------------------------------------------------------------------------
-# submit_finetune
-# ---------------------------------------------------------------------------
-
-@pytest.mark.asyncio
-async def test_submit_finetune_returns_job_id():
-    from app.services.finetune_service import submit_finetune
-
-    llm = _make_llm(job_id="glm-ft-abc123")
-    req = FinetuneStartRequest(
-        jsonl_url="s3://bucket/train.jsonl",
-        base_model="glm-4",
-        hyperparams={"n_epochs": 3},
-    )
-
-    result = await submit_finetune(req, llm)
-
-    assert isinstance(result, FinetuneStartResponse)
-    assert result.job_id == "glm-ft-abc123"
-
-
-@pytest.mark.asyncio
-async def test_submit_finetune_calls_interface_with_correct_params():
-    from app.services.finetune_service import submit_finetune
-
-    llm = _make_llm(job_id="glm-ft-xyz")
-    req = FinetuneStartRequest(
-        jsonl_url="s3://bucket/train.jsonl",
-        base_model="glm-4",
-        hyperparams={"n_epochs": 5},
-    )
-
-    await submit_finetune(req, llm)
-
-    llm.submit_finetune.assert_awaited_once_with(
-        "s3://bucket/train.jsonl",
-        "glm-4",
-        {"n_epochs": 5},
-    )
-
-
-@pytest.mark.asyncio
-async def test_submit_finetune_none_hyperparams_passes_empty_dict():
-    """hyperparams=None should be passed as {} to the interface."""
-    from app.services.finetune_service import submit_finetune
-
-    llm = _make_llm(job_id="glm-ft-nohp")
-    req = FinetuneStartRequest(
-        jsonl_url="s3://bucket/train.jsonl",
-        base_model="glm-4",
-    )
-
-    await submit_finetune(req, llm)
-
-    llm.submit_finetune.assert_awaited_once_with(
-        "s3://bucket/train.jsonl",
-        "glm-4",
-        {},
-    )
-
-
-@pytest.mark.asyncio
-async def test_submit_finetune_raises_llm_call_error_on_failure():
-    from app.services.finetune_service import submit_finetune
-
-    llm = MagicMock(spec=LLMClient)
-    llm.submit_finetune = AsyncMock(side_effect=LLMCallError("微调任务提交失败: SDK exploded"))
-
-    req = FinetuneStartRequest(
-        jsonl_url="s3://bucket/train.jsonl",
-        base_model="glm-4",
-    )
-
-    with pytest.raises(LLMCallError):
-        await submit_finetune(req, llm)
-
-
-# ---------------------------------------------------------------------------
-# get_finetune_status — status mapping
-# ---------------------------------------------------------------------------
-
-@pytest.mark.asyncio
-@pytest.mark.parametrize("sdk_status,expected", [
-    ("running", "RUNNING"),
-    ("succeeded", "SUCCESS"),
-    ("failed", "FAILED"),
-    ("pending", "RUNNING"),      # unknown → conservative RUNNING
-    ("queued", "RUNNING"),       # unknown → conservative RUNNING
-    ("cancelled", "RUNNING"),    # unknown → conservative RUNNING
-])
-async def test_get_finetune_status_maps_status(sdk_status, expected):
-    from app.services.finetune_service import get_finetune_status
-
-    llm = _make_llm(status=sdk_status)
-
-    result = await get_finetune_status("glm-ft-test", llm)
-
-    assert isinstance(result, FinetuneStatusResponse)
-    assert result.status == expected
-    assert result.job_id == "glm-ft-test"
-
-
-@pytest.mark.asyncio
-async def test_get_finetune_status_includes_progress():
-    from app.services.finetune_service import get_finetune_status
-
-    llm = _make_llm(status="running", progress=42)
-    result = await get_finetune_status("glm-ft-test", llm)
-
-    assert result.progress == 42
-
-
-@pytest.mark.asyncio
-async def test_get_finetune_status_raises_llm_call_error_on_failure():
-    from app.services.finetune_service import get_finetune_status
-
-    llm = MagicMock(spec=LLMClient)
-    llm.get_finetune_status = AsyncMock(side_effect=LLMCallError("查询微调任务失败: SDK exploded"))
-
-    with pytest.raises(LLMCallError):
-        await get_finetune_status("glm-ft-bad", llm)
diff --git a/tests/test_health.py b/tests/test_health.py
deleted file mode 100644
index 0f2b3e4..0000000
--- a/tests/test_health.py
+++ /dev/null
@@ -1,8 +0,0 @@
-"""T047: Health check endpoint test — GET /health → 200 {"status": "ok"}"""
-from fastapi.testclient import TestClient
-
-
-def test_health_returns_ok(client: TestClient):
-    response = client.get("/health")
-    assert response.status_code == 200
-    assert response.json() == {"status": "ok"}
diff --git a/tests/test_image_router.py b/tests/test_image_router.py
deleted file mode 100644
index e98ce31..0000000
--- a/tests/test_image_router.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import json
-import numpy as np
-import cv2
-import pytest
-from unittest.mock import AsyncMock
-
-from app.core.exceptions import StorageError
-
-
-def _make_test_image_bytes() -> bytes:
-    img = np.zeros((80, 100, 3), dtype=np.uint8)
-    _, buf = cv2.imencode(".jpg", img)
-    return buf.tobytes()
-
-
-SAMPLE_QUADS_JSON = json.dumps([
-    {
-        "subject": "电缆接头",
-        "predicate": "位于",
-        "object": "配电箱左侧",
-        "qualifier": "2024年检修",
-        "bbox": {"x": 5, "y": 5, "w": 20, "h": 15},
-    }
-])
-
-
-def test_image_extract_returns_200(client, mock_llm, mock_storage):
-    mock_storage.download_bytes = AsyncMock(return_value=_make_test_image_bytes())
-    mock_llm.chat_vision = AsyncMock(return_value=SAMPLE_QUADS_JSON)
-    mock_storage.upload_bytes = AsyncMock(return_value=None)
-
-    resp = client.post(
-        "/api/v1/image/extract",
-        json={"file_path": "image/test.jpg", "task_id": 1},
-    )
-    assert resp.status_code == 200
-    data = resp.json()
-    assert "items" in data
-    assert data["items"][0]["subject"] == "电缆接头"
-    assert data["items"][0]["cropped_image_path"] == "crops/1/0.jpg"
-
-
-def test_image_extract_llm_parse_error_returns_502(client, mock_llm, mock_storage):
-    mock_storage.download_bytes = AsyncMock(return_value=_make_test_image_bytes())
-    mock_llm.chat_vision = AsyncMock(return_value="not json {{")
-
-    resp = client.post(
-        "/api/v1/image/extract",
-        json={"file_path": "image/test.jpg", "task_id": 1},
-    )
-    assert resp.status_code == 502
-    assert resp.json()["code"] == "LLM_PARSE_ERROR"
-
-
-def test_image_extract_storage_error_returns_502(client, mock_storage):
-    mock_storage.download_bytes = AsyncMock(side_effect=StorageError("RustFS down"))
-
-    resp = client.post(
-        "/api/v1/image/extract",
-        json={"file_path": "image/test.jpg", "task_id": 1},
-    )
-    assert resp.status_code == 502
-    assert resp.json()["code"] == "STORAGE_ERROR"
diff --git a/tests/test_image_service.py b/tests/test_image_service.py
deleted file mode 100644
index ee6e8ae..0000000
--- a/tests/test_image_service.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import io
-import json
-import pytest
-import numpy as np
-import cv2
-from unittest.mock import AsyncMock
-
-from app.core.exceptions import LLMParseError
-from app.models.image_models import ImageExtractRequest
-
-
-def _make_test_image_bytes(width=100, height=80) -> bytes:
-    img = np.zeros((height, width, 3), dtype=np.uint8)
-    img[10:50, 10:60] = (255, 0, 0)  # blue rectangle
-    _, buf = cv2.imencode(".jpg", img)
-    return buf.tobytes()
-
-
-SAMPLE_QUADS_JSON = json.dumps([
-    {
-        "subject": "电缆接头",
-        "predicate": "位于",
-        "object": "配电箱左侧",
-        "qualifier": "2024年检修",
-        "bbox": {"x": 10, "y": 10, "w": 40, "h": 30},
-    }
-])
-
-
-@pytest.fixture
-def image_bytes():
-    return _make_test_image_bytes()
-
-
-@pytest.fixture
-def req():
-    return ImageExtractRequest(file_path="image/test.jpg", task_id=1)
-
-
-@pytest.mark.asyncio
-async def test_extract_quads_returns_items(mock_llm, mock_storage, image_bytes, req):
-    mock_storage.download_bytes = AsyncMock(return_value=image_bytes)
-    mock_llm.chat_vision = AsyncMock(return_value=SAMPLE_QUADS_JSON)
-    mock_storage.upload_bytes = AsyncMock(return_value=None)
-
-    from app.services.image_service import extract_quads
-    result = await extract_quads(req, mock_llm, mock_storage)
-
-    assert len(result.items) == 1
-    item = result.items[0]
-    assert item.subject == "电缆接头"
-    assert item.predicate == "位于"
-    assert item.bbox.x == 10
-    assert item.bbox.y == 10
-    assert item.cropped_image_path == "crops/1/0.jpg"
-
-
-@pytest.mark.asyncio
-async def test_crop_is_uploaded(mock_llm, mock_storage, image_bytes, req):
-    mock_storage.download_bytes = AsyncMock(return_value=image_bytes)
-    mock_llm.chat_vision = AsyncMock(return_value=SAMPLE_QUADS_JSON)
-    mock_storage.upload_bytes = AsyncMock(return_value=None)
-
-    from app.services.image_service import extract_quads
-    await extract_quads(req, mock_llm, mock_storage)
-
-    # upload_bytes called once for the crop
-    mock_storage.upload_bytes.assert_called_once()
-    call_args = mock_storage.upload_bytes.call_args
-    assert call_args.args[1] == "crops/1/0.jpg"
-
-
-@pytest.mark.asyncio
-async def test_out_of_bounds_bbox_is_clamped(mock_llm, mock_storage, req):
-    img = _make_test_image_bytes(width=50, height=40)
-    mock_storage.download_bytes = AsyncMock(return_value=img)
-
-    # bbox goes outside image boundary
-    oob_json = json.dumps([{
-        "subject": "test",
-        "predicate": "rel",
-        "object": "obj",
-        "qualifier": None,
-        "bbox": {"x": 30, "y": 20, "w": 100, "h": 100},  # extends beyond 50x40
-    }])
-    mock_llm.chat_vision = AsyncMock(return_value=oob_json)
-    mock_storage.upload_bytes = AsyncMock(return_value=None)
-
-    from app.services.image_service import extract_quads
-    # Should not raise; bbox is clamped
-    result = await extract_quads(req, mock_llm, mock_storage)
-    assert len(result.items) == 1
-
-
-@pytest.mark.asyncio
-async def test_llm_parse_error_raised(mock_llm, mock_storage, image_bytes, req):
-    mock_storage.download_bytes = AsyncMock(return_value=image_bytes)
-    mock_llm.chat_vision = AsyncMock(return_value="bad json {{")
-
-    from app.services.image_service import extract_quads
-    with pytest.raises(LLMParseError):
-        await extract_quads(req, mock_llm, mock_storage)
diff --git a/tests/test_llm_client.py b/tests/test_llm_client.py
deleted file mode 100644
index e5d0734..0000000
--- a/tests/test_llm_client.py
+++ /dev/null
@@ -1,81 +0,0 @@
-import pytest
-from unittest.mock import MagicMock, patch
-
-from app.clients.llm.zhipuai_client import ZhipuAIClient
-from app.core.exceptions import LLMCallError
-
-
-@pytest.fixture
-def mock_sdk_response():
-    resp = MagicMock()
-    resp.choices[0].message.content = '{"result": "ok"}'
-    return resp
-
-
-@pytest.fixture
-def client():
-    with patch("app.clients.llm.zhipuai_client.ZhipuAI"):
-        c = ZhipuAIClient(api_key="test-key")
-        return c
-
-
-@pytest.mark.asyncio
-async def test_chat_returns_content(client, mock_sdk_response):
-    client._client.chat.completions.create.return_value = mock_sdk_response
-    result = await client.chat("glm-4-flash", [{"role": "user", "content": "hello"}])
-    assert result == '{"result": "ok"}'
-
-
-@pytest.mark.asyncio
-async def test_chat_vision_returns_content(client, mock_sdk_response):
-    client._client.chat.completions.create.return_value = mock_sdk_response
-    result = await client.chat_vision("glm-4v-flash", [{"role": "user", "content": []}])
-    assert result == '{"result": "ok"}'
-
-
-@pytest.mark.asyncio
-async def test_llm_call_error_on_sdk_exception(client):
-    client._client.chat.completions.create.side_effect = RuntimeError("quota exceeded")
-    with pytest.raises(LLMCallError, match="大模型调用失败"):
-        await client.chat("glm-4-flash", [{"role": "user", "content": "hi"}])
-
-
-@pytest.mark.asyncio
-async def test_submit_finetune_returns_job_id(client):
-    """submit_finetune should call the SDK and return the job id."""
-    resp = MagicMock()
-    resp.id = "glm-ft-newjob"
-    client._client.fine_tuning.jobs.create.return_value = resp
-
-    job_id = await client.submit_finetune(
-        jsonl_url="s3://bucket/train.jsonl",
-        base_model="glm-4",
-        hyperparams={"n_epochs": 2},
-    )
-
-    assert job_id == "glm-ft-newjob"
-    client._client.fine_tuning.jobs.create.assert_called_once_with(
-        training_file="s3://bucket/train.jsonl",
-        model="glm-4",
-        hyperparameters={"n_epochs": 2},
-    )
-
-
-@pytest.mark.asyncio
-async def test_get_finetune_status_returns_correct_dict(client):
-    """get_finetune_status should return a normalized dict with progress coerced to int."""
-    resp = MagicMock()
-    resp.id = "glm-ft-abc"
-    resp.status = "running"
-    resp.progress = "75"  # SDK may return string; should be coerced to int
-    resp.error_message = None
-    client._client.fine_tuning.jobs.retrieve.return_value = resp
-
-    result = await client.get_finetune_status("glm-ft-abc")
-
-    assert result == {
-        "job_id": "glm-ft-abc",
-        "status": "running",
-        "progress": 75,
-        "error_message": None,
-    }
diff --git a/tests/test_qa_router.py b/tests/test_qa_router.py
deleted file mode 100644
index 8f82575..0000000
--- a/tests/test_qa_router.py
+++ /dev/null
@@ -1,121 +0,0 @@
-"""Tests for QA router: /api/v1/qa/gen-text and /api/v1/qa/gen-image."""
-import json
-import pytest
-from unittest.mock import AsyncMock
-
-from app.core.exceptions import LLMCallError, LLMParseError, StorageError
-
-
-SAMPLE_QA_JSON = json.dumps([
-    {"question": "电缆接头位于哪里？", "answer": "配电箱左侧"},
-])
-
-FAKE_IMAGE_BYTES = b"\xff\xd8\xff\xe0fake_jpeg_content"
-
-TEXT_QA_PAYLOAD = {
-    "items": [
-        {
-            "subject": "电缆接头",
-            "predicate": "位于",
-            "object": "配电箱左侧",
-            "source_snippet": "电缆接头位于配电箱左侧",
-        }
-    ]
-}
-
-IMAGE_QA_PAYLOAD = {
-    "items": [
-        {
-            "subject": "电缆接头",
-            "predicate": "位于",
-            "object": "配电箱左侧",
-            "cropped_image_path": "crops/1/0.jpg",
-        }
-    ]
-}
-
-
-# ---------------------------------------------------------------------------
-# POST /api/v1/qa/gen-text
-# ---------------------------------------------------------------------------
-
-
-def test_gen_text_qa_returns_200(client, mock_llm):
-    mock_llm.chat = AsyncMock(return_value=SAMPLE_QA_JSON)
-
-    resp = client.post("/api/v1/qa/gen-text", json=TEXT_QA_PAYLOAD)
-
-    assert resp.status_code == 200
-    data = resp.json()
-    assert "pairs" in data
-    assert len(data["pairs"]) == 1
-    assert data["pairs"][0]["question"] == "电缆接头位于哪里？"
-    assert data["pairs"][0]["answer"] == "配电箱左侧"
-
-
-def test_gen_text_qa_llm_parse_error_returns_502(client, mock_llm):
-    mock_llm.chat = AsyncMock(return_value="not valid json {{")
-
-    resp = client.post("/api/v1/qa/gen-text", json=TEXT_QA_PAYLOAD)
-
-    assert resp.status_code == 502
-    assert resp.json()["code"] == "LLM_PARSE_ERROR"
-
-
-def test_gen_text_qa_llm_call_error_returns_503(client, mock_llm):
-    mock_llm.chat = AsyncMock(side_effect=LLMCallError("GLM timeout"))
-
-    resp = client.post("/api/v1/qa/gen-text", json=TEXT_QA_PAYLOAD)
-
-    assert resp.status_code == 503
-    assert resp.json()["code"] == "LLM_CALL_ERROR"
-
-
-# ---------------------------------------------------------------------------
-# POST /api/v1/qa/gen-image
-# ---------------------------------------------------------------------------
-
-
-def test_gen_image_qa_returns_200(client, mock_llm, mock_storage):
-    mock_storage.download_bytes = AsyncMock(return_value=FAKE_IMAGE_BYTES)
-    mock_llm.chat_vision = AsyncMock(return_value=SAMPLE_QA_JSON)
-
-    resp = client.post("/api/v1/qa/gen-image", json=IMAGE_QA_PAYLOAD)
-
-    assert resp.status_code == 200
-    data = resp.json()
-    assert "pairs" in data
-    assert len(data["pairs"]) == 1
-    pair = data["pairs"][0]
-    assert pair["question"] == "电缆接头位于哪里？"
-    assert pair["answer"] == "配电箱左侧"
-    assert pair["image_path"] == "crops/1/0.jpg"
-
-
-def test_gen_image_qa_llm_parse_error_returns_502(client, mock_llm, mock_storage):
-    mock_storage.download_bytes = AsyncMock(return_value=FAKE_IMAGE_BYTES)
-    mock_llm.chat_vision = AsyncMock(return_value="bad json {{")
-
-    resp = client.post("/api/v1/qa/gen-image", json=IMAGE_QA_PAYLOAD)
-
-    assert resp.status_code == 502
-    assert resp.json()["code"] == "LLM_PARSE_ERROR"
-
-
-def test_gen_image_qa_llm_call_error_returns_503(client, mock_llm, mock_storage):
-    mock_storage.download_bytes = AsyncMock(return_value=FAKE_IMAGE_BYTES)
-    mock_llm.chat_vision = AsyncMock(side_effect=LLMCallError("GLM vision timeout"))
-
-    resp = client.post("/api/v1/qa/gen-image", json=IMAGE_QA_PAYLOAD)
-
-    assert resp.status_code == 503
-    assert resp.json()["code"] == "LLM_CALL_ERROR"
-
-
-def test_gen_image_qa_storage_error_returns_502(client, mock_storage):
-    mock_storage.download_bytes = AsyncMock(side_effect=StorageError("RustFS down"))
-
-    resp = client.post("/api/v1/qa/gen-image", json=IMAGE_QA_PAYLOAD)
-
-    assert resp.status_code == 502
-    assert resp.json()["code"] == "STORAGE_ERROR"
diff --git a/tests/test_qa_service.py b/tests/test_qa_service.py
deleted file mode 100644
index 7a6e258..0000000
--- a/tests/test_qa_service.py
+++ /dev/null
@@ -1,236 +0,0 @@
-"""Tests for qa_service: text QA (US5) and image QA (US6)."""
-import base64
-import json
-import pytest
-from unittest.mock import AsyncMock
-
-from app.core.exceptions import LLMCallError, LLMParseError, StorageError
-
-
-# ---------------------------------------------------------------------------
-# Shared fixtures / helpers
-# ---------------------------------------------------------------------------
-
-SAMPLE_QA_JSON = json.dumps([
-    {"question": "电缆接头位于哪里？", "answer": "配电箱左侧"},
-])
-
-
-# ---------------------------------------------------------------------------
-# T039 — Text QA service tests (US5)
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.asyncio
-async def test_gen_text_qa_prompt_contains_triples(mock_llm):
-    """Triple fields and source_snippet must appear in the message sent to LLM."""
-    from app.models.qa_models import GenTextQARequest, TextQAItem
-    from app.services.qa_service import gen_text_qa
-
-    mock_llm.chat = AsyncMock(return_value=SAMPLE_QA_JSON)
-
-    req = GenTextQARequest(items=[
-        TextQAItem(
-            subject="电缆接头",
-            predicate="位于",
-            object="配电箱左侧",
-            source_snippet="电缆接头位于配电箱左侧",
-        )
-    ])
-
-    await gen_text_qa(req, mock_llm)
-
-    assert mock_llm.chat.called
-    call_args = mock_llm.chat.call_args
-    messages = call_args.args[1] if call_args.args else call_args.kwargs["messages"]
-    prompt_text = messages[0]["content"]
-    assert "电缆接头" in prompt_text
-    assert "位于" in prompt_text
-    assert "配电箱左侧" in prompt_text
-    assert "电缆接头位于配电箱左侧" in prompt_text
-
-
-@pytest.mark.asyncio
-async def test_gen_text_qa_returns_qa_pair_list(mock_llm):
-    """Parsed JSON must be returned as QAPair list."""
-    from app.models.qa_models import GenTextQARequest, QAPair, TextQAItem
-    from app.services.qa_service import gen_text_qa
-
-    mock_llm.chat = AsyncMock(return_value=SAMPLE_QA_JSON)
-
-    req = GenTextQARequest(items=[
-        TextQAItem(
-            subject="电缆接头",
-            predicate="位于",
-            object="配电箱左侧",
-            source_snippet="电缆接头位于配电箱左侧",
-        )
-    ])
-
-    result = await gen_text_qa(req, mock_llm)
-
-    assert len(result.pairs) == 1
-    pair = result.pairs[0]
-    assert isinstance(pair, QAPair)
-    assert pair.question == "电缆接头位于哪里？"
-    assert pair.answer == "配电箱左侧"
-
-
-@pytest.mark.asyncio
-async def test_gen_text_qa_llm_parse_error_on_malformed_response(mock_llm):
-    """LLMParseError must be raised when LLM returns non-JSON."""
-    from app.models.qa_models import GenTextQARequest, TextQAItem
-    from app.services.qa_service import gen_text_qa
-
-    mock_llm.chat = AsyncMock(return_value="this is not json {{")
-
-    req = GenTextQARequest(items=[
-        TextQAItem(subject="s", predicate="p", object="o", source_snippet="snip")
-    ])
-
-    with pytest.raises(LLMParseError):
-        await gen_text_qa(req, mock_llm)
-
-
-@pytest.mark.asyncio
-async def test_gen_text_qa_llm_call_error_propagates(mock_llm):
-    """LLMCallError from LLM client must propagate unchanged."""
-    from app.models.qa_models import GenTextQARequest, TextQAItem
-    from app.services.qa_service import gen_text_qa
-
-    mock_llm.chat = AsyncMock(side_effect=LLMCallError("GLM timeout"))
-
-    req = GenTextQARequest(items=[
-        TextQAItem(subject="s", predicate="p", object="o", source_snippet="snip")
-    ])
-
-    with pytest.raises(LLMCallError):
-        await gen_text_qa(req, mock_llm)
-
-
-# ---------------------------------------------------------------------------
-# T040 — Image QA service tests (US6)
-# ---------------------------------------------------------------------------
-
-FAKE_IMAGE_BYTES = b"\xff\xd8\xff\xe0fake_jpeg_content"
-
-
-@pytest.mark.asyncio
-async def test_gen_image_qa_downloads_image_and_encodes_base64(mock_llm, mock_storage):
-    """Storage.download_bytes must be called, result base64-encoded in LLM message."""
-    from app.models.qa_models import GenImageQARequest, ImageQAItem
-    from app.services.qa_service import gen_image_qa
-
-    mock_storage.download_bytes = AsyncMock(return_value=FAKE_IMAGE_BYTES)
-    mock_llm.chat_vision = AsyncMock(return_value=SAMPLE_QA_JSON)
-
-    req = GenImageQARequest(items=[
-        ImageQAItem(
-            subject="电缆接头",
-            predicate="位于",
-            object="配电箱左侧",
-            cropped_image_path="crops/1/0.jpg",
-        )
-    ])
-
-    await gen_image_qa(req, mock_llm, mock_storage)
-
-    # Storage download must have been called with the correct path
-    mock_storage.download_bytes.assert_called_once()
-    call_args = mock_storage.download_bytes.call_args
-    path_arg = call_args.args[1] if len(call_args.args) > 1 else call_args.kwargs.get("path", call_args.kwargs.get("key"))
-    assert path_arg == "crops/1/0.jpg"
-
-
-@pytest.mark.asyncio
-async def test_gen_image_qa_multimodal_message_format(mock_llm, mock_storage):
-    """Multimodal message must contain inline base64 image_url and text."""
-    from app.models.qa_models import GenImageQARequest, ImageQAItem
-    from app.services.qa_service import gen_image_qa
-
-    mock_storage.download_bytes = AsyncMock(return_value=FAKE_IMAGE_BYTES)
-    mock_llm.chat_vision = AsyncMock(return_value=SAMPLE_QA_JSON)
-
-    req = GenImageQARequest(items=[
-        ImageQAItem(
-            subject="电缆接头",
-            predicate="位于",
-            object="配电箱左侧",
-            qualifier="2024检修",
-            cropped_image_path="crops/1/0.jpg",
-        )
-    ])
-
-    await gen_image_qa(req, mock_llm, mock_storage)
-
-    assert mock_llm.chat_vision.called
-    call_args = mock_llm.chat_vision.call_args
-    messages = call_args.args[1] if call_args.args else call_args.kwargs["messages"]
-
-    # Find the content list in messages
-    content = messages[0]["content"]
-    assert isinstance(content, list)
-
-    # Must have an image_url part with inline base64 data URI
-    image_parts = [p for p in content if p.get("type") == "image_url"]
-    assert len(image_parts) >= 1
-    url = image_parts[0]["image_url"]["url"]
-    expected_b64 = base64.b64encode(FAKE_IMAGE_BYTES).decode()
-    assert url == f"data:image/jpeg;base64,{expected_b64}"
-
-    # Must have a text part containing quad info
-    text_parts = [p for p in content if p.get("type") == "text"]
-    assert len(text_parts) >= 1
-    text = text_parts[0]["text"]
-    assert "电缆接头" in text
-    assert "位于" in text
-    assert "配电箱左侧" in text
-
-
-@pytest.mark.asyncio
-async def test_gen_image_qa_returns_image_qa_pair_with_image_path(mock_llm, mock_storage):
-    """Result ImageQAPair must include image_path from the item."""
-    from app.models.qa_models import GenImageQARequest, ImageQAItem, ImageQAPair
-    from app.services.qa_service import gen_image_qa
-
-    mock_storage.download_bytes = AsyncMock(return_value=FAKE_IMAGE_BYTES)
-    mock_llm.chat_vision = AsyncMock(return_value=SAMPLE_QA_JSON)
-
-    req = GenImageQARequest(items=[
-        ImageQAItem(
-            subject="电缆接头",
-            predicate="位于",
-            object="配电箱左侧",
-            cropped_image_path="crops/1/0.jpg",
-        )
-    ])
-
-    result = await gen_image_qa(req, mock_llm, mock_storage)
-
-    assert len(result.pairs) == 1
-    pair = result.pairs[0]
-    assert isinstance(pair, ImageQAPair)
-    assert pair.question == "电缆接头位于哪里？"
-    assert pair.answer == "配电箱左侧"
-    assert pair.image_path == "crops/1/0.jpg"
-
-
-@pytest.mark.asyncio
-async def test_gen_image_qa_storage_error_propagates(mock_llm, mock_storage):
-    """StorageError from download must propagate unchanged."""
-    from app.models.qa_models import GenImageQARequest, ImageQAItem
-    from app.services.qa_service import gen_image_qa
-
-    mock_storage.download_bytes = AsyncMock(side_effect=StorageError("RustFS down"))
-
-    req = GenImageQARequest(items=[
-        ImageQAItem(
-            subject="s",
-            predicate="p",
-            object="o",
-            cropped_image_path="crops/1/0.jpg",
-        )
-    ])
-
-    with pytest.raises(StorageError):
-        await gen_image_qa(req, mock_llm, mock_storage)
diff --git a/tests/test_storage_client.py b/tests/test_storage_client.py
deleted file mode 100644
index d124563..0000000
--- a/tests/test_storage_client.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import pytest
-from unittest.mock import MagicMock, patch
-from botocore.exceptions import ClientError
-
-from app.clients.storage.rustfs_client import RustFSClient
-from app.core.exceptions import StorageError
-
-
-@pytest.fixture
-def client():
-    with patch("app.clients.storage.rustfs_client.boto3") as mock_boto3:
-        c = RustFSClient(
-            endpoint="http://rustfs:9000",
-            access_key="key",
-            secret_key="secret",
-        )
-        c._s3 = MagicMock()
-        return c
-
-
-@pytest.mark.asyncio
-async def test_download_bytes_returns_bytes(client):
-    client._s3.get_object.return_value = {"Body": MagicMock(read=lambda: b"hello")}
-    result = await client.download_bytes("source-data", "text/test.txt")
-    assert result == b"hello"
-    client._s3.get_object.assert_called_once_with(Bucket="source-data", Key="text/test.txt")
-
-
-@pytest.mark.asyncio
-async def test_download_bytes_raises_storage_error(client):
-    client._s3.get_object.side_effect = ClientError(
-        {"Error": {"Code": "NoSuchKey", "Message": "Not Found"}}, "GetObject"
-    )
-    with pytest.raises(StorageError, match="存储下载失败"):
-        await client.download_bytes("source-data", "missing.txt")
-
-
-@pytest.mark.asyncio
-async def test_get_object_size_returns_content_length(client):
-    client._s3.head_object.return_value = {"ContentLength": 1024}
-    size = await client.get_object_size("source-data", "video/test.mp4")
-    assert size == 1024
-    client._s3.head_object.assert_called_once_with(Bucket="source-data", Key="video/test.mp4")
-
-
-@pytest.mark.asyncio
-async def test_get_object_size_raises_storage_error(client):
-    client._s3.head_object.side_effect = ClientError(
-        {"Error": {"Code": "NoSuchKey", "Message": "Not Found"}}, "HeadObject"
-    )
-    with pytest.raises(StorageError, match="获取文件大小失败"):
-        await client.get_object_size("source-data", "video/missing.mp4")
-
-
-@pytest.mark.asyncio
-async def test_upload_bytes_calls_put_object(client):
-    client._s3.put_object.return_value = {}
-    await client.upload_bytes("source-data", "frames/1/0.jpg", b"jpeg-data", "image/jpeg")
-    client._s3.put_object.assert_called_once()
-    call_kwargs = client._s3.put_object.call_args
-    assert call_kwargs.kwargs["Bucket"] == "source-data"
-    assert call_kwargs.kwargs["Key"] == "frames/1/0.jpg"
diff --git a/tests/test_text_router.py b/tests/test_text_router.py
deleted file mode 100644
index cbee082..0000000
--- a/tests/test_text_router.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import pytest
-from unittest.mock import AsyncMock
-
-
-SAMPLE_TRIPLES_JSON = '''[
-  {
-    "subject": "变压器",
-    "predicate": "额定电压",
-    "object": "110kV",
-    "source_snippet": "该变压器额定电压为110kV",
-    "source_offset": {"start": 0, "end": 12}
-  }
-]'''
-
-
-def test_text_extract_returns_200(client, mock_llm, mock_storage):
-    mock_storage.download_bytes = AsyncMock(return_value=b"some text content")
-    mock_llm.chat = AsyncMock(return_value=SAMPLE_TRIPLES_JSON)
-
-    resp = client.post(
-        "/api/v1/text/extract",
-        json={"file_path": "text/test.txt", "file_name": "test.txt"},
-    )
-    assert resp.status_code == 200
-    data = resp.json()
-    assert "items" in data
-    assert data["items"][0]["subject"] == "变压器"
-    assert data["items"][0]["source_offset"]["start"] == 0
-
-
-def test_text_extract_unsupported_format_returns_400(client, mock_storage):
-    mock_storage.download_bytes = AsyncMock(return_value=b"data")
-
-    resp = client.post(
-        "/api/v1/text/extract",
-        json={"file_path": "text/test.xlsx", "file_name": "data.xlsx"},
-    )
-    assert resp.status_code == 400
-    assert resp.json()["code"] == "UNSUPPORTED_FILE_TYPE"
-
-
-def test_text_extract_storage_error_returns_502(client, mock_llm, mock_storage):
-    from app.core.exceptions import StorageError
-    mock_storage.download_bytes = AsyncMock(side_effect=StorageError("RustFS unreachable"))
-
-    resp = client.post(
-        "/api/v1/text/extract",
-        json={"file_path": "text/test.txt", "file_name": "test.txt"},
-    )
-    assert resp.status_code == 502
-    assert resp.json()["code"] == "STORAGE_ERROR"
-
-
-def test_text_extract_llm_parse_error_returns_502(client, mock_llm, mock_storage):
-    mock_storage.download_bytes = AsyncMock(return_value=b"content")
-    mock_llm.chat = AsyncMock(return_value="not json {{{{")
-
-    resp = client.post(
-        "/api/v1/text/extract",
-        json={"file_path": "text/test.txt", "file_name": "test.txt"},
-    )
-    assert resp.status_code == 502
-    assert resp.json()["code"] == "LLM_PARSE_ERROR"
diff --git a/tests/test_text_service.py b/tests/test_text_service.py
deleted file mode 100644
index 4f85e45..0000000
--- a/tests/test_text_service.py
+++ /dev/null
@@ -1,122 +0,0 @@
-import pytest
-from unittest.mock import AsyncMock, MagicMock
-
-from app.core.exceptions import LLMParseError, StorageError, UnsupportedFileTypeError
-from app.models.text_models import TextExtractRequest
-
-
-SAMPLE_TRIPLES_JSON = '''[
-  {
-    "subject": "变压器",
-    "predicate": "额定电压",
-    "object": "110kV",
-    "source_snippet": "该变压器额定电压为110kV",
-    "source_offset": {"start": 0, "end": 12}
-  }
-]'''
-
-
-@pytest.fixture
-def req_txt():
-    return TextExtractRequest(file_path="text/test.txt", file_name="test.txt")
-
-
-@pytest.fixture
-def req_pdf():
-    return TextExtractRequest(file_path="text/test.pdf", file_name="report.pdf")
-
-
-@pytest.fixture
-def req_docx():
-    return TextExtractRequest(file_path="text/test.docx", file_name="doc.docx")
-
-
-@pytest.fixture
-def llm(mock_llm):
-    mock_llm.chat = AsyncMock(return_value=SAMPLE_TRIPLES_JSON)
-    return mock_llm
-
-
-@pytest.mark.asyncio
-async def test_txt_extraction_returns_triples(llm, mock_storage):
-    mock_storage.download_bytes = AsyncMock(return_value=b"test content")
-    from app.services.text_service import extract_triples
-    req = TextExtractRequest(file_path="text/test.txt", file_name="test.txt")
-    result = await extract_triples(req, llm, mock_storage)
-    assert len(result.items) == 1
-    assert result.items[0].subject == "变压器"
-    assert result.items[0].predicate == "额定电压"
-    assert result.items[0].object == "110kV"
-    assert result.items[0].source_offset.start == 0
-    assert result.items[0].source_offset.end == 12
-
-
-@pytest.mark.asyncio
-async def test_pdf_extraction(llm, mock_storage, tmp_path):
-    import pdfplumber, io
-    # We mock download_bytes to return a minimal PDF-like response
-    # and mock pdfplumber.open to return pages with text
-    mock_storage.download_bytes = AsyncMock(return_value=b"%PDF fake")
-
-    with pytest.MonkeyPatch().context() as mp:
-        mock_page = MagicMock()
-        mock_page.extract_text.return_value = "PDF content here"
-        mock_pdf = MagicMock()
-        mock_pdf.__enter__ = lambda s: s
-        mock_pdf.__exit__ = MagicMock(return_value=False)
-        mock_pdf.pages = [mock_page]
-        mp.setattr("pdfplumber.open", lambda f: mock_pdf)
-
-        from app.services import text_service
-        import importlib
-        importlib.reload(text_service)
-        req = TextExtractRequest(file_path="text/test.pdf", file_name="doc.pdf")
-        result = await text_service.extract_triples(req, llm, mock_storage)
-    assert len(result.items) == 1
-
-
-@pytest.mark.asyncio
-async def test_docx_extraction(llm, mock_storage):
-    mock_storage.download_bytes = AsyncMock(return_value=b"PK fake docx bytes")
-
-    with pytest.MonkeyPatch().context() as mp:
-        mock_para = MagicMock()
-        mock_para.text = "Word paragraph content"
-        mock_doc = MagicMock()
-        mock_doc.paragraphs = [mock_para]
-        mp.setattr("docx.Document", lambda f: mock_doc)
-
-        from app.services import text_service
-        import importlib
-        importlib.reload(text_service)
-        req = TextExtractRequest(file_path="text/test.docx", file_name="doc.docx")
-        result = await text_service.extract_triples(req, llm, mock_storage)
-    assert len(result.items) == 1
-
-
-@pytest.mark.asyncio
-async def test_unsupported_format_raises_error(llm, mock_storage):
-    mock_storage.download_bytes = AsyncMock(return_value=b"data")
-    from app.services.text_service import extract_triples
-    req = TextExtractRequest(file_path="text/test.xlsx", file_name="data.xlsx")
-    with pytest.raises(UnsupportedFileTypeError):
-        await extract_triples(req, llm, mock_storage)
-
-
-@pytest.mark.asyncio
-async def test_storage_error_propagates(llm, mock_storage):
-    mock_storage.download_bytes = AsyncMock(side_effect=StorageError("not found"))
-    from app.services.text_service import extract_triples
-    req = TextExtractRequest(file_path="text/test.txt", file_name="test.txt")
-    with pytest.raises(StorageError):
-        await extract_triples(req, llm, mock_storage)
-
-
-@pytest.mark.asyncio
-async def test_llm_parse_error_propagates(mock_llm, mock_storage):
-    mock_storage.download_bytes = AsyncMock(return_value=b"content")
-    mock_llm.chat = AsyncMock(return_value="not json {{")
-    from app.services.text_service import extract_triples
-    req = TextExtractRequest(file_path="text/test.txt", file_name="test.txt")
-    with pytest.raises(LLMParseError):
-        await extract_triples(req, mock_llm, mock_storage)
diff --git a/tests/test_video_router.py b/tests/test_video_router.py
deleted file mode 100644
index 703dc40..0000000
--- a/tests/test_video_router.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import pytest
-from unittest.mock import AsyncMock, patch
-
-from app.core.exceptions import VideoTooLargeError
-
-
-def test_extract_frames_returns_202(client, mock_storage):
-    mock_storage.get_object_size = AsyncMock(return_value=10 * 1024 * 1024)  # 10 MB
-
-    with patch("app.routers.video.BackgroundTasks.add_task"):
-        resp = client.post(
-            "/api/v1/video/extract-frames",
-            json={
-                "file_path": "video/test.mp4",
-                "source_id": 10,
-                "job_id": 42,
-            },
-        )
-    assert resp.status_code == 202
-    data = resp.json()
-    assert data["job_id"] == 42
-
-
-def test_extract_frames_video_too_large_returns_400(client, mock_storage):
-    mock_storage.get_object_size = AsyncMock(return_value=300 * 1024 * 1024)  # 300 MB > 200 MB
-
-    resp = client.post(
-        "/api/v1/video/extract-frames",
-        json={
-            "file_path": "video/big.mp4",
-            "source_id": 10,
-            "job_id": 99,
-        },
-    )
-    assert resp.status_code == 400
-    assert resp.json()["code"] == "VIDEO_TOO_LARGE"
-
-
-def test_video_to_text_returns_202(client, mock_storage):
-    mock_storage.get_object_size = AsyncMock(return_value=10 * 1024 * 1024)
-
-    with patch("app.routers.video.BackgroundTasks.add_task"):
-        resp = client.post(
-            "/api/v1/video/to-text",
-            json={
-                "file_path": "video/test.mp4",
-                "source_id": 10,
-                "job_id": 43,
-                "start_sec": 0,
-                "end_sec": 60,
-            },
-        )
-    assert resp.status_code == 202
-    assert resp.json()["job_id"] == 43
-
-
-def test_video_to_text_too_large_returns_400(client, mock_storage):
-    mock_storage.get_object_size = AsyncMock(return_value=300 * 1024 * 1024)
-
-    resp = client.post(
-        "/api/v1/video/to-text",
-        json={
-            "file_path": "video/big.mp4",
-            "source_id": 10,
-            "job_id": 99,
-            "start_sec": 0,
-            "end_sec": 60,
-        },
-    )
-    assert resp.status_code == 400
-    assert resp.json()["code"] == "VIDEO_TOO_LARGE"
diff --git a/tests/test_video_service.py b/tests/test_video_service.py
deleted file mode 100644
index 3e33483..0000000
--- a/tests/test_video_service.py
+++ /dev/null
@@ -1,195 +0,0 @@
-import io
-import json
-import os
-import tempfile
-import pytest
-import numpy as np
-import cv2
-from unittest.mock import AsyncMock, MagicMock, patch
-
-from app.models.video_models import ExtractFramesRequest, VideoToTextRequest
-
-
-def _make_test_video(path: str, num_frames: int = 10, fps: float = 10.0, width=64, height=64):
-    """Write a small test video to `path` using cv2.VideoWriter."""
-    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
-    out = cv2.VideoWriter(path, fourcc, fps, (width, height))
-    for i in range(num_frames):
-        frame = np.full((height, width, 3), (i * 20) % 256, dtype=np.uint8)
-        out.write(frame)
-    out.release()
-
-
-# ── US3: Frame Extraction ──────────────────────────────────────────────────────
-
-@pytest.fixture
-def frames_req():
-    return ExtractFramesRequest(
-        file_path="video/test.mp4",
-        source_id=10,
-        job_id=42,
-        mode="interval",
-        frame_interval=3,
-    )
-
-
-@pytest.mark.asyncio
-async def test_interval_mode_extracts_correct_frames(mock_storage, frames_req, tmp_path):
-    video_path = str(tmp_path / "test.mp4")
-    _make_test_video(video_path, num_frames=10, fps=10.0)
-
-    with open(video_path, "rb") as f:
-        video_bytes = f.read()
-
-    mock_storage.download_bytes = AsyncMock(return_value=video_bytes)
-    mock_storage.upload_bytes = AsyncMock(return_value=None)
-
-    callback_payloads = []
-
-    async def fake_callback(url, payload):
-        callback_payloads.append(payload)
-
-    with patch("app.services.video_service._post_callback", new=fake_callback):
-        from app.services.video_service import extract_frames_task
-        await extract_frames_task(frames_req, mock_storage, "http://backend/callback")
-
-    assert len(callback_payloads) == 1
-    cb = callback_payloads[0]
-    assert cb["status"] == "SUCCESS"
-    assert cb["job_id"] == 42
-    # With 10 frames and interval=3, we expect frames at indices 0, 3, 6, 9 → 4 frames
-    assert len(cb["frames"]) == 4
-
-
-@pytest.mark.asyncio
-async def test_keyframe_mode_extracts_scene_changes(mock_storage, tmp_path):
-    video_path = str(tmp_path / "kf.mp4")
-    # Create video with 2 distinct scenes separated by sudden color change
-    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
-    out = cv2.VideoWriter(video_path, fourcc, 10.0, (64, 64))
-    for _ in range(5):
-        out.write(np.zeros((64, 64, 3), dtype=np.uint8))        # black frames
-    for _ in range(5):
-        out.write(np.full((64, 64, 3), 200, dtype=np.uint8))    # bright frames
-    out.release()
-
-    with open(video_path, "rb") as f:
-        video_bytes = f.read()
-
-    mock_storage.download_bytes = AsyncMock(return_value=video_bytes)
-    mock_storage.upload_bytes = AsyncMock(return_value=None)
-
-    callback_payloads = []
-
-    async def fake_callback(url, payload):
-        callback_payloads.append(payload)
-
-    req = ExtractFramesRequest(
-        file_path="video/kf.mp4",
-        source_id=10,
-        job_id=43,
-        mode="keyframe",
-    )
-    with patch("app.services.video_service._post_callback", new=fake_callback):
-        from app.services.video_service import extract_frames_task
-        await extract_frames_task(req, mock_storage, "http://backend/callback")
-
-    cb = callback_payloads[0]
-    assert cb["status"] == "SUCCESS"
-    # Should capture at least the scene-change frame
-    assert len(cb["frames"]) >= 1
-
-
-@pytest.mark.asyncio
-async def test_frame_upload_path_convention(mock_storage, frames_req, tmp_path):
-    video_path = str(tmp_path / "test.mp4")
-    _make_test_video(video_path, num_frames=3, fps=10.0)
-    with open(video_path, "rb") as f:
-        mock_storage.download_bytes = AsyncMock(return_value=f.read())
-    mock_storage.upload_bytes = AsyncMock(return_value=None)
-
-    callback_payloads = []
-    async def fake_callback(url, payload):
-        callback_payloads.append(payload)
-
-    req = ExtractFramesRequest(
-        file_path="video/test.mp4", source_id=10, job_id=99, mode="interval", frame_interval=1
-    )
-    with patch("app.services.video_service._post_callback", new=fake_callback):
-        from app.services.video_service import extract_frames_task
-        await extract_frames_task(req, mock_storage, "http://backend/callback")
-
-    uploaded_paths = [call.args[1] for call in mock_storage.upload_bytes.call_args_list]
-    for i, path in enumerate(uploaded_paths):
-        assert path == f"frames/10/{i}.jpg"
-
-
-@pytest.mark.asyncio
-async def test_failed_extraction_sends_failed_callback(mock_storage, frames_req):
-    mock_storage.download_bytes = AsyncMock(side_effect=Exception("storage failure"))
-
-    callback_payloads = []
-    async def fake_callback(url, payload):
-        callback_payloads.append(payload)
-
-    with patch("app.services.video_service._post_callback", new=fake_callback):
-        from app.services.video_service import extract_frames_task
-        await extract_frames_task(frames_req, mock_storage, "http://backend/callback")
-
-    assert callback_payloads[0]["status"] == "FAILED"
-    assert callback_payloads[0]["error_message"] is not None
-
-
-# ── US4: Video To Text ─────────────────────────────────────────────────────────
-
-@pytest.fixture
-def totext_req():
-    return VideoToTextRequest(
-        file_path="video/test.mp4",
-        source_id=10,
-        job_id=44,
-        start_sec=0.0,
-        end_sec=1.0,
-    )
-
-
-@pytest.mark.asyncio
-async def test_video_to_text_samples_frames_and_calls_llm(mock_llm, mock_storage, totext_req, tmp_path):
-    video_path = str(tmp_path / "totext.mp4")
-    _make_test_video(video_path, num_frames=20, fps=10.0)
-    with open(video_path, "rb") as f:
-        mock_storage.download_bytes = AsyncMock(return_value=f.read())
-    mock_llm.chat_vision = AsyncMock(return_value="视频描述内容")
-    mock_storage.upload_bytes = AsyncMock(return_value=None)
-
-    callback_payloads = []
-    async def fake_callback(url, payload):
-        callback_payloads.append(payload)
-
-    with patch("app.services.video_service._post_callback", new=fake_callback):
-        from app.services.video_service import video_to_text_task
-        await video_to_text_task(totext_req, mock_llm, mock_storage, "http://backend/callback")
-
-    assert callback_payloads[0]["status"] == "SUCCESS"
-    assert "output_path" in callback_payloads[0]
-    assert callback_payloads[0]["output_path"].startswith("video-text/10/")
-    mock_llm.chat_vision.assert_called_once()
-
-
-@pytest.mark.asyncio
-async def test_video_to_text_llm_failure_sends_failed_callback(mock_llm, mock_storage, totext_req, tmp_path):
-    video_path = str(tmp_path / "fail.mp4")
-    _make_test_video(video_path, num_frames=5, fps=10.0)
-    with open(video_path, "rb") as f:
-        mock_storage.download_bytes = AsyncMock(return_value=f.read())
-    mock_llm.chat_vision = AsyncMock(side_effect=Exception("LLM unavailable"))
-
-    callback_payloads = []
-    async def fake_callback(url, payload):
-        callback_payloads.append(payload)
-
-    with patch("app.services.video_service._post_callback", new=fake_callback):
-        from app.services.video_service import video_to_text_task
-        await video_to_text_task(totext_req, mock_llm, mock_storage, "http://backend/callback")
-
-    assert callback_payloads[0]["status"] == "FAILED"