Merge branch '001-label-backend-spec'

This commit is contained in:
wh
2026-04-09 14:09:02 +08:00
54 changed files with 4850 additions and 58 deletions

7
.dockerignore Normal file
View File

@@ -0,0 +1,7 @@
.git
.claude
specs
docs
target
*.md
.gitignore

69
.gitignore vendored
View File

@@ -1,37 +1,32 @@
target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst
target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst
target/surefire-reports/2026-04-09T13-02-42_141.dumpstream
target/surefire-reports/2026-04-09T13-05-35_797.dumpstream
target/surefire-reports/2026-04-09T13-10-00_741.dumpstream
target/surefire-reports/2026-04-09T13-12-36_692.dumpstream
target/surefire-reports/2026-04-09T13-12-48_346.dumpstream
.specify/init-options.json
.specify/integration.json
.specify/integrations/claude.manifest.json
.specify/integrations/speckit.manifest.json
.specify/integrations/claude/scripts/update-context.ps1
.specify/integrations/claude/scripts/update-context.sh
.specify/memory/constitution.md
.specify/scripts/powershell/check-prerequisites.ps1
.specify/scripts/powershell/common.ps1
.specify/scripts/powershell/create-new-feature.ps1
.specify/scripts/powershell/setup-plan.ps1
.specify/scripts/powershell/update-agent-context.ps1
.specify/templates/agent-file-template.md
.specify/templates/checklist-template.md
.specify/templates/constitution-template.md
.specify/templates/plan-template.md
.specify/templates/spec-template.md
.specify/templates/tasks-template.md
.claude/settings.local.json
.claude/skills/speckit-analyze/SKILL.md
.claude/skills/speckit-checklist/SKILL.md
.claude/skills/speckit-clarify/SKILL.md
.claude/skills/speckit-constitution/SKILL.md
.claude/skills/speckit-implement/SKILL.md
.claude/skills/speckit-plan/SKILL.md
.claude/skills/speckit-specify/SKILL.md
.claude/skills/speckit-tasks/SKILL.md
.claude/skills/speckit-taskstoissues/SKILL.md
# ==========================================
# 1. Maven/Java 构建产物 (一键忽略整个目录)
# ==========================================
target/
*.class
*.jar
*.war
*.ear
# ==========================================
# 2. IDE 配置文件
# ==========================================
.idea/
.vscode/
*.iml
*.ipr
*.iws
# ==========================================
# 3. 项目特定工具目录 (根据你的文件列表)
# ==========================================
# 忽略 Specifiy 工具生成的所有配置和脚本
.specify/
# 忽略 Claude 本地设置和技能文件
.claude/
# ==========================================
# 4. 操作系统文件
# ==========================================
.DS_Store
Thumbs.db

3
CLAUDE.md Normal file
View File

@@ -0,0 +1,3 @@
# language
请始终使用简体中文与我对话,并保持回答专业、简洁。

18
Dockerfile Normal file
View File

@@ -0,0 +1,18 @@
# Build stage: uses Maven + JDK 17 (Alpine) to compile and package the application.
FROM maven:3.9-eclipse-temurin-17-alpine AS builder
WORKDIR /app
# Copy pom.xml first to leverage Docker layer caching for dependency downloads.
COPY pom.xml .
RUN mvn dependency:go-offline -q
# Copy source and build the fat JAR, skipping tests.
COPY src ./src
RUN mvn clean package -DskipTests -q
# Runtime stage: slim JRE-only image for a smaller production footprint.
FROM eclipse-temurin:17-jre-alpine
WORKDIR /app
COPY --from=builder /app/target/*.jar app.jar
EXPOSE 8080
ENTRYPOINT ["java", "-jar", "app.jar"]

96
docker-compose.yml Normal file
View File

@@ -0,0 +1,96 @@
version: "3.9"
services:
postgres:
image: postgres:16-alpine
environment:
POSTGRES_DB: label_db
POSTGRES_USER: label
POSTGRES_PASSWORD: label_password
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
- ./sql/init.sql:/docker-entrypoint-initdb.d/init.sql
healthcheck:
test: ["CMD-SHELL", "pg_isready -U label -d label_db"]
interval: 10s
timeout: 5s
retries: 5
redis:
image: redis:7-alpine
command: redis-server --requirepass redis_password
ports:
- "6379:6379"
healthcheck:
test: ["CMD", "redis-cli", "-a", "redis_password", "ping"]
interval: 10s
timeout: 5s
retries: 5
# RustFS is an S3-compatible object storage service.
# Using MinIO as a drop-in S3 API substitute for development/testing.
# Replace with the actual RustFS image in production environments.
rustfs:
image: minio/minio:latest
command: server /data --console-address ":9001"
environment:
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: minioadmin
ports:
- "9000:9000"
- "9001:9001"
volumes:
- rustfs_data:/data
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 10s
timeout: 5s
retries: 5
backend:
build: .
ports:
- "8080:8080"
environment:
SPRING_DATASOURCE_URL: jdbc:postgresql://postgres:5432/label_db
SPRING_DATASOURCE_USERNAME: label
SPRING_DATASOURCE_PASSWORD: label_password
SPRING_DATA_REDIS_HOST: redis
SPRING_DATA_REDIS_PORT: 6379
SPRING_DATA_REDIS_PASSWORD: redis_password
RUSTFS_ENDPOINT: http://rustfs:9000
RUSTFS_ACCESS_KEY: minioadmin
RUSTFS_SECRET_KEY: minioadmin
AI_SERVICE_BASE_URL: http://ai-service:8000
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
rustfs:
condition: service_healthy
healthcheck:
test: ["CMD-SHELL", "wget -q --spider http://localhost:8080/actuator/health 2>/dev/null || exit 1"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
# Placeholder AI service — replace with the actual FastAPI image in production.
ai-service:
image: python:3.11-slim
command: ["python3", "-m", "http.server", "8000"]
ports:
- "8000:8000"
# Placeholder frontend — replace with the actual Nginx + static build in production.
frontend:
image: nginx:alpine
ports:
- "80:80"
volumes:
postgres_data:
rustfs_data:

View File

@@ -208,7 +208,7 @@ CREATE TABLE source_data (
bucket_name VARCHAR(100) NOT NULL,
parent_source_id BIGINT REFERENCES source_data(id), -- 视频转文本时指向原视频
status VARCHAR(20) NOT NULL DEFAULT 'PENDING',
-- PENDING / PREPROCESSING / EXTRACTING / QA_REVIEW / APPROVED / REJECTED
-- PENDING / PREPROCESSING / EXTRACTING / QA_REVIEW / APPROVED(无 REJECTED 状态QA 驳回作用于 annotation_task
reject_reason TEXT,
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
@@ -961,10 +961,12 @@ public void unclaim(Long taskId) {
| 方法 | 路径 | 最低权限 | 说明 |
|------|------|----------|------|
| POST | `/api/tasks` | ADMIN | 为指定 source 创建 EXTRACTION 任务 |
| GET | `/api/tasks/pool` | ANNOTATOR | 查看可领取任务列表(按角色过滤,分页) |
| POST | `/api/tasks/{id}/claim` | ANNOTATOR | 领取任务(争抢式) |
| GET | `/api/tasks/pool` | ANNOTATOR | 查看可领取任务UNCLAIMED 状态。ANNOTATOR 只看到 EXTRACTION 类型REVIEWER 只看到 SUBMITTED 状态(即审批队列,与 pending-review 等价);两者均分页,不可无界查询 |
| POST | `/api/tasks/{id}/claim` | ANNOTATOR | 领取任务(争抢式Redis SET NX + DB 乐观锁 |
| POST | `/api/tasks/{id}/unclaim` | ANNOTATOR | 放弃任务,退回任务池 |
| GET | `/api/tasks/mine` | ANNOTATOR | 查询我领取的任务列表(分页) |
| GET | `/api/tasks/mine` | ANNOTATOR | 查询我领取的任务列表(包含 IN_PROGRESS、SUBMITTED、REJECTED 状态,分页) |
| POST | `/api/tasks/{id}/reclaim` | ANNOTATOR | 重领被驳回的任务task.status 必须为 REJECTED 且 claimedBy = 当前用户),状态流转 REJECTED → IN_PROGRESS |
| GET | `/api/tasks/pending-review` | REVIEWER | 查看待我审批的任务列表status = SUBMITTED分页REVIEWER 的专属审批入口 |
| GET | `/api/tasks/{id}` | ANNOTATOR | 查看任务详情 |
| GET | `/api/tasks` | ADMIN | 查询全部任务(支持过滤,分页) |
| PUT | `/api/tasks/{id}/reassign` | ADMIN | 强制转移任务归属 |
@@ -996,11 +998,16 @@ public void updateResult(Long taskId, String resultJsonStr) {
annotationResultMapper.updateResultJson(taskId, resultJsonStr, CompanyContext.get());
}
// 审批通过——级联触发,必须在同一事务内完
// 审批通过——两阶段:事务内完成同步步骤,事务提交后异步触发 QA 生
@Transactional
@OperationLog(type = "EXTRACTION_APPROVE")
public void approve(Long taskId) {
AnnotationTask task = validateAndGetTask(taskId, "SUBMITTED");
// 自审校验:提交者不能审批自己的任务
if (task.getClaimedBy().equals(getCurrentUserId()))
throw new BusinessException("SELF_REVIEW_FORBIDDEN", "不允许审批自己提交的任务");
AnnotationResult result = annotationResultMapper.selectByTaskId(taskId);
// 1. annotation_result.is_final = true
@@ -1016,22 +1023,55 @@ public void approve(Long taskId) {
// 3. 写入任务历史
insertHistory(taskId, "SUBMITTED", "APPROVED", getCurrentUserId(), null);
// 4. 调用 AI 生成候选问答对
String promptKey = "IMAGE".equals(getSourceType(task)) ? "prompt_qa_gen_image" : "prompt_qa_gen_text";
// 4. 发布领域事件,事务提交后异步执行 QA 生成(步骤 5-7
// 注AI HTTP 调用禁止在 @Transactional 内同步执行——会占用数据库连接直至 AI 响应,
// 且 AI 失败会错误地回滚已完成的审批。
// 使用 @TransactionalEventListener(phase = AFTER_COMMIT) 保证先提交再触发。
eventPublisher.publishEvent(new ExtractionApprovedEvent(taskId, task.getSourceId(),
getSourceType(task), CompanyContext.get()));
}
// 驳回——状态回退,标注员可重领
@Transactional
@OperationLog(type = "EXTRACTION_REJECT")
public void reject(Long taskId, String reason) {
AnnotationTask task = validateAndGetTask(taskId, "SUBMITTED");
// 自审校验
if (task.getClaimedBy().equals(getCurrentUserId()))
throw new BusinessException("SELF_REVIEW_FORBIDDEN", "不允许驳回自己提交的任务");
StateValidator.assertTransition(TaskStatus.SUBMITTED, TaskStatus.REJECTED, TaskStatus.TRANSITIONS);
task.setStatus("REJECTED");
taskMapper.updateById(task);
insertHistory(taskId, "SUBMITTED", "REJECTED", getCurrentUserId(), reason);
// source_data.status 保持 EXTRACTING 不变,待标注员重新提交后再推进
}
// ExtractionApprovedEventListener@TransactionalEventListener独立事务
// 负责 5-7 步AI 调用 → 写 training_dataset → 创建 QA 任务 → 更新 source_data
@TransactionalEventListener(phase = TransactionPhase.AFTER_COMMIT)
@Transactional(propagation = Propagation.REQUIRES_NEW)
public void onExtractionApproved(ExtractionApprovedEvent event) {
AnnotationTask task = taskMapper.selectById(event.getTaskId());
AnnotationResult result = annotationResultMapper.selectByTaskId(event.getTaskId());
// 5. 调用 AI 生成候选问答对(在事务外执行,失败不影响审批结果)
String promptKey = "IMAGE".equals(event.getSourceType()) ? "prompt_qa_gen_image" : "prompt_qa_gen_text";
String promptTemplate = sysConfigService.get(promptKey);
QaGenResponse qaResponse = generateQa(task, result, promptTemplate);
// 5. 将候选问答对写入 training_datasetPENDING_REVIEW
// 6. 将候选问答对写入 training_datasetPENDING_REVIEW
List<TrainingDataset> samples = buildTrainingSamples(task, result, qaResponse);
trainingDatasetMapper.batchInsert(samples);
// 6. 创建 QA_GENERATION 阶段任务UNCLAIMED
// 7. 创建 QA_GENERATION 阶段任务UNCLAIMED
AnnotationTask qaTask = buildQaTask(task);
taskMapper.insert(qaTask);
insertHistory(qaTask.getId(), null, "UNCLAIMED", getCurrentUserId(), null);
insertHistory(qaTask.getId(), null, "UNCLAIMED", task.getClaimedBy(), null);
// 7. source_data.status → QA_REVIEW
sourceDataMapper.updateStatus(task.getSourceId(), "QA_REVIEW", CompanyContext.get());
// 8. source_data.status → QA_REVIEW
sourceDataMapper.updateStatus(event.getSourceId(), "QA_REVIEW", event.getCompanyId());
}
```
@@ -1057,21 +1097,48 @@ public void approve(Long taskId) {
@Transactional
@OperationLog(type = "QA_APPROVE")
public void approve(Long taskId) {
// 1. training_dataset.status → APPROVED
// 1. 先校验任务合法性(必须在任何 DB 写入之前执行,避免校验失败时数据已被修改)
AnnotationTask task = validateAndGetTask(taskId, "SUBMITTED");
// 自审校验:提交者不能审批自己的任务
if (task.getClaimedBy().equals(getCurrentUserId()))
throw new BusinessException("SELF_REVIEW_FORBIDDEN", "不允许审批自己提交的任务");
// 2. training_dataset.status → APPROVED
trainingDatasetMapper.approveByTaskId(taskId, getCurrentUserId(), CompanyContext.get());
// 2. annotation_task.status → APPROVED
AnnotationTask task = validateAndGetTask(taskId, "SUBMITTED");
// 3. annotation_task.status → APPROVED
StateValidator.assertTransition(TaskStatus.SUBMITTED, TaskStatus.APPROVED, TaskStatus.TRANSITIONS);
task.setStatus("APPROVED");
task.setCompletedAt(LocalDateTime.now());
taskMapper.updateById(task);
// 3. source_data.status → APPROVED整条流水线完成
// 4. source_data.status → APPROVED整条流水线完成
sourceDataMapper.updateStatus(task.getSourceId(), "APPROVED", CompanyContext.get());
// 4. 写入任务历史
// 5. 写入任务历史
insertHistory(taskId, "SUBMITTED", "APPROVED", getCurrentUserId(), null);
}
// 驳回问答对——任务退回 IN_PROGRESStraining_dataset 删除候选记录
@Transactional
@OperationLog(type = "QA_REJECT")
public void reject(Long taskId, String reason) {
AnnotationTask task = validateAndGetTask(taskId, "SUBMITTED");
// 自审校验
if (task.getClaimedBy().equals(getCurrentUserId()))
throw new BusinessException("SELF_REVIEW_FORBIDDEN", "不允许驳回自己提交的任务");
// 删除本次生成的候选问答对PENDING_REVIEW 状态),待标注员修改后重新提交
trainingDatasetMapper.deleteByTaskId(taskId, CompanyContext.get());
StateValidator.assertTransition(TaskStatus.SUBMITTED, TaskStatus.REJECTED, TaskStatus.TRANSITIONS);
task.setStatus("REJECTED");
taskMapper.updateById(task);
insertHistory(taskId, "SUBMITTED", "REJECTED", getCurrentUserId(), reason);
// source_data.status 保持 QA_REVIEW 不变
}
```
**接口清单:**
@@ -1241,14 +1308,15 @@ public final class StateValidator {
```java
public enum SourceStatus {
PENDING, PREPROCESSING, EXTRACTING, QA_REVIEW, APPROVED, REJECTED;
PENDING, PREPROCESSING, EXTRACTING, QA_REVIEW, APPROVED;
// 注source_data 无 REJECTED 状态。QA 阶段驳回的是 annotation_task→ REJECTED
// 不改变 source_data.status保持 QA_REVIEW重新提交后 source_data 随任务推进。
public static final Map<SourceStatus, Set<SourceStatus>> TRANSITIONS = Map.of(
PENDING, Set.of(EXTRACTING, PREPROCESSING),
PREPROCESSING, Set.of(PENDING),
EXTRACTING, Set.of(QA_REVIEW),
QA_REVIEW, Set.of(APPROVED, REJECTED),
REJECTED, Set.of(EXTRACTING) // 驳回后可重提
QA_REVIEW, Set.of(APPROVED)
);
}
```
@@ -1263,7 +1331,7 @@ public enum TaskStatus {
UNCLAIMED, Set.of(IN_PROGRESS),
IN_PROGRESS, Set.of(SUBMITTED, UNCLAIMED, IN_PROGRESS),
// IN_PROGRESS → IN_PROGRESS 用于 ADMIN 强制转移(持有人变更,状态不变)
SUBMITTED, Set.oAPPROVED, REJECTED),
SUBMITTED, Set.of(APPROVED, REJECTED),
REJECTED, Set.of(IN_PROGRESS) // 驳回后重拾
);
}

163
pom.xml Normal file
View File

@@ -0,0 +1,163 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.2.5</version>
<relativePath/>
</parent>
<groupId>com.label</groupId>
<artifactId>label-backend</artifactId>
<version>1.0.0-SNAPSHOT</version>
<packaging>jar</packaging>
<properties>
<java.version>17</java.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencyManagement>
<dependencies>
<!-- AWS SDK v2 BOM -->
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>bom</artifactId>
<version>2.26.31</version>
<type>pom</type>
<scope>import</scope>
</dependency>
<!-- Testcontainers BOM -->
<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>testcontainers-bom</artifactId>
<version>1.20.1</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<!-- Spring Boot Web -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- Spring Boot Actuator (health check endpoint) -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
</dependency>
<!-- Spring Boot Data Redis (Lettuce) -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-redis</artifactId>
</dependency>
<!-- Spring Boot AOP -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-aop</artifactId>
</dependency>
<!-- PostgreSQL JDBC Driver -->
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<scope>runtime</scope>
</dependency>
<!-- MyBatis Plus -->
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-boot-starter</artifactId>
<version>3.5.9</version>
</dependency>
<!-- MyBatis Plus JSqlParser (required for TenantLineInnerInterceptor in 3.5.7+) -->
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-jsqlparser</artifactId>
<version>3.5.9</version>
</dependency>
<!-- Apache Shiro -->
<dependency>
<groupId>org.apache.shiro</groupId>
<artifactId>shiro-spring-boot-web-starter</artifactId>
<version>1.13.0</version>
</dependency>
<!-- AWS SDK v2 - S3 -->
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>s3</artifactId>
</dependency>
<!-- AWS SDK v2 - STS -->
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>sts</artifactId>
</dependency>
<!-- Spring Security Crypto (BCrypt only, no web filter chain) -->
<dependency>
<groupId>org.springframework.security</groupId>
<artifactId>spring-security-crypto</artifactId>
</dependency>
<!-- Lombok -->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<!-- Spring Boot Test -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<!-- Testcontainers - PostgreSQL -->
<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>postgresql</artifactId>
<scope>test</scope>
</dependency>
<!-- Testcontainers - JUnit Jupiter -->
<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<excludes>
<exclude>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,34 @@
# 规格质量检查清单label_backend 知识图谱智能标注平台
**用途**: 在进入规划阶段前验证规格说明的完整性和质量
**创建日期**: 2026-04-09
**功能**: [查看规格说明](../spec.md)
## 内容质量
- [x] 无实现细节无编程语言、框架、API 引用)
- [x] 聚焦用户价值和业务需求
- [x] 面向非技术干系人编写
- [x] 所有必填章节均已完成
## 需求完整性
- [x] 无 [NEEDS CLARIFICATION] 标记残留
- [x] 需求可测试且无歧义
- [x] 成功标准可度量
- [x] 成功标准与技术无关(无实现细节)
- [x] 所有验收场景均已定义
- [x] 已识别边界情况
- [x] 范围边界清晰
- [x] 已识别依赖和假设
## 功能就绪性
- [x] 所有功能性需求均有明确验收标准
- [x] 用户场景覆盖主流程(认证、上传、标注、审批、导出)
- [x] 功能满足成功标准中定义的可度量结果
- [x] 无实现细节渗入规格说明
## 备注
所有检查项均通过。规格说明已就绪,可进行 `/speckit.plan` 规划阶段。

View File

@@ -0,0 +1,148 @@
# API 契约:认证与用户管理
**统一响应格式**:
- 成功:`{"code": "SUCCESS", "data": {...}}`
- 成功(无数据):`{"code": "SUCCESS", "data": null}`
- 失败:`{"code": "ERROR_CODE", "message": "描述"}`
- 分页成功:`{"code": "SUCCESS", "data": {"items": [...], "total": 100, "page": 1, "pageSize": 20}}`
---
## POST /api/auth/login
**权限**: 匿名
**描述**: 用户登录,返回会话凭证
**请求体**:
```json
{
"companyCode": "COMPANY_A",
"username": "zhangsan",
"password": "plaintext_password"
}
```
**成功响应** `200`:
```json
{
"code": "SUCCESS",
"data": {
"token": "550e8400-e29b-41d4-a716-446655440000",
"userId": 1,
"username": "zhangsan",
"role": "ANNOTATOR",
"expiresIn": 7200
}
}
```
**失败响应**:
- `401` `USER_NOT_FOUND`: 用户名或密码错误(不区分哪个错误,防止枚举)
- `403` `USER_DISABLED`: 账号已禁用
---
## POST /api/auth/logout
**权限**: 已登录Bearer Token
**描述**: 退出登录,立即删除 Redis 会话
**请求头**: `Authorization: Bearer {token}`
**响应** `200`: `{"code": "SUCCESS", "data": null}`
---
## GET /api/auth/me
**权限**: 已登录
**描述**: 获取当前用户信息
**响应** `200`:
```json
{
"code": "SUCCESS",
"data": {
"id": 1,
"username": "zhangsan",
"realName": "张三",
"role": "ANNOTATOR",
"companyId": 10,
"companyName": "测试公司"
}
}
```
---
## GET /api/users
**权限**: ADMIN
**描述**: 分页查询本公司用户列表
**查询参数**: `page`(默认 1`pageSize`(默认 20最大 100`role`(可选过滤)、`status`(可选过滤)
**响应** `200`:
```json
{
"code": "SUCCESS",
"data": {
"items": [
{"id": 1, "username": "zhangsan", "realName": "张三", "role": "ANNOTATOR", "status": "ACTIVE"}
],
"total": 50,
"page": 1,
"pageSize": 20
}
}
```
---
## POST /api/users
**权限**: ADMIN
**描述**: 创建用户
**请求体**:
```json
{
"username": "lisi",
"password": "initial_password",
"realName": "李四",
"role": "ANNOTATOR"
}
```
**响应** `201`: `{"code": "SUCCESS", "data": {"id": 2, "username": "lisi", ...}}`
**失败**: `409` `USERNAME_EXISTS`: 用户名已存在
---
## PUT /api/users/{id}
**权限**: ADMIN
**描述**: 更新用户基本信息
**请求体**: `{"realName": "新姓名"}`
**响应** `200`: `{"code": "SUCCESS", "data": null}`
---
## PUT /api/users/{id}/status
**权限**: ADMIN
**描述**: 启用或禁用账号,立即驱逐权限缓存
**请求体**: `{"status": "DISABLED"}`
**响应** `200`: `{"code": "SUCCESS", "data": null}`
---
## PUT /api/users/{id}/role
**权限**: ADMIN
**描述**: 变更用户角色,立即驱逐权限缓存
**请求体**: `{"role": "REVIEWER"}`
**响应** `200`: `{"code": "SUCCESS", "data": null}`
**失败**: `400` `INVALID_ROLE`: 角色值不合法

View File

@@ -0,0 +1,53 @@
# API 契约:系统配置
*所有接口需要 ADMIN 权限*
---
## GET /api/config
**描述**: 获取所有配置项(公司级配置 + 全局默认配置合并,公司级优先)
**响应** `200`:
```json
{
"code": "SUCCESS",
"data": {
"items": [
{
"configKey": "prompt_extract_text",
"configValue": "请提取以下文本中的主语-谓语-宾语三元组...",
"description": "文本三元组提取 Prompt 模板",
"scope": "GLOBAL",
"updatedAt": "2026-04-09T00:00:00"
},
{
"configKey": "model_default",
"configValue": "glm-4-turbo",
"description": "默认 AI 辅助模型",
"scope": "COMPANY",
"updatedAt": "2026-04-09T09:00:00"
}
]
}
}
```
`scope` 字段:`GLOBAL`(来自全局默认)、`COMPANY`(来自公司级覆盖)
---
## PUT /api/config/{key}
**描述**: 更新单项配置(若公司级配置不存在则创建;若存在则覆盖)
**请求体**:
```json
{
"configValue": "glm-4-turbo",
"description": "升级到 GLM-4-Turbo 模型"
}
```
**响应** `200`: `{"code": "SUCCESS", "data": null}`
**失败**: `400` `UNKNOWN_CONFIG_KEY`: 未知的配置键(防止拼写错误创建无效配置)

View File

@@ -0,0 +1,113 @@
# API 契约:训练数据导出与微调
*所有接口需要 ADMIN 权限*
---
## GET /api/training/samples
**描述**: 分页查询已审批、可导出的训练样本
**查询参数**: `page``pageSize``sampleType`TEXT / IMAGE / VIDEO_FRAME可选`exported`true/false可选
**响应** `200`:
```json
{
"code": "SUCCESS",
"data": {
"items": [
{
"id": 1001,
"sampleType": "TEXT",
"status": "APPROVED",
"exportBatchId": null,
"sourceId": 50,
"createdAt": "2026-04-09T12:00:00"
}
],
"total": 500,
"page": 1,
"pageSize": 20
}
}
```
---
## POST /api/export/batch
**描述**: 创建导出批次,合并选定样本为 JSONL 并上传 RustFS
**请求体**:
```json
{
"sampleIds": [1001, 1002, 1003]
}
```
**成功响应** `201`:
```json
{
"code": "SUCCESS",
"data": {
"id": 10,
"batchUuid": "550e8400-e29b-41d4-a716-446655440000",
"sampleCount": 3,
"datasetFilePath": "export/550e8400.jsonl",
"finetuneStatus": "NOT_STARTED"
}
}
```
**失败**:
- `400` `INVALID_SAMPLES`: 部分样本不处于 APPROVED 状态
- `400` `EMPTY_SAMPLES`: sampleIds 为空
---
## POST /api/export/{batchId}/finetune
**描述**: 向 GLM AI 服务提交微调任务
**响应** `200`:
```json
{
"code": "SUCCESS",
"data": {
"glmJobId": "glm-finetune-abc123",
"finetuneStatus": "RUNNING"
}
}
```
**失败**: `409` `FINETUNE_ALREADY_STARTED`: 微调任务已提交
---
## GET /api/export/{batchId}/status
**描述**: 查询微调任务状态(向 AI 服务实时查询)
**响应** `200`:
```json
{
"code": "SUCCESS",
"data": {
"batchId": 10,
"glmJobId": "glm-finetune-abc123",
"finetuneStatus": "RUNNING",
"progress": 45,
"errorMessage": null
}
}
```
---
## GET /api/export/list
**描述**: 分页查询所有导出批次
**查询参数**: `page``pageSize`
**响应** `200`: 批次列表(含 finetuneStatus、sampleCount、createdAt 等字段)

View File

@@ -0,0 +1,97 @@
# API 契约:提取阶段标注工作台
---
## GET /api/extraction/{taskId}
**权限**: ANNOTATOR且为任务持有者
**描述**: 获取当前提取结果(含 AI 预标注候选,供人工编辑)
**响应** `200`:
```json
{
"code": "SUCCESS",
"data": {
"taskId": 101,
"sourceType": "TEXT",
"sourceFilePath": "text/202604/50.txt",
"isFinal": false,
"resultJson": {
"items": [
{
"subject": "北京",
"predicate": "是...首都",
"object": "中国",
"sourceText": "北京是中国的首都",
"startOffset": 0,
"endOffset": 8
}
]
}
}
}
```
---
## PUT /api/extraction/{taskId}
**权限**: ANNOTATOR且为任务持有者
**描述**: 更新提取结果(**整体 JSONB 覆盖PUT 语义,禁止局部 PATCH**
**请求体**:
```json
{
"items": [
{
"subject": "北京",
"predicate": "是...首都",
"object": "中国",
"sourceText": "北京是中国的首都",
"startOffset": 0,
"endOffset": 8
}
]
}
```
**响应** `200`: `{"code": "SUCCESS", "data": null}`
**失败**: `400` `INVALID_JSON`: 提交的 JSON 格式不合法
---
## POST /api/extraction/{taskId}/submit
**权限**: ANNOTATOR且为任务持有者
**描述**: 提交提取结果,任务状态 IN_PROGRESS → SUBMITTED进入审批队列
**响应** `200`: `{"code": "SUCCESS", "data": null}`
**失败**: `409` `INVALID_STATE`: 任务当前状态不允许提交
---
## POST /api/extraction/{taskId}/approve
**权限**: REVIEWER
**描述**: 审批通过。**两阶段操作**
1. 同步(同一事务):`annotation_result.is_final = true`,任务状态 SUBMITTED → APPROVED写任务历史
2. 异步事务提交后AI 生成候选问答对 → 写 training_dataset → 创建 QA_GENERATION 任务 → source_data 状态推进
**响应** `200`: `{"code": "SUCCESS", "data": null}`
**失败**:
- `403` `SELF_REVIEW_FORBIDDEN`: 不允许审批自己提交的任务
- `409` `INVALID_STATE`: 任务状态不为 SUBMITTED
---
## POST /api/extraction/{taskId}/reject
**权限**: REVIEWER
**描述**: 驳回提取结果,任务状态 SUBMITTED → REJECTED标注员可重领
**请求体**: `{"reason": "三元组边界不准确,请重新标注"}`
**响应** `200`: `{"code": "SUCCESS", "data": null}`
**失败**:
- `403` `SELF_REVIEW_FORBIDDEN`: 不允许驳回自己提交的任务
- `409` `INVALID_STATE`: 任务状态不为 SUBMITTED
- `400` `REASON_REQUIRED`: 驳回原因不能为空

View File

@@ -0,0 +1,83 @@
# API 契约:问答生成阶段
---
## GET /api/qa/{taskId}
**权限**: ANNOTATOR且为任务持有者
**描述**: 获取候选问答对列表(由提取阶段审批触发 AI 生成)
**响应** `200`:
```json
{
"code": "SUCCESS",
"data": {
"taskId": 202,
"sourceType": "TEXT",
"items": [
{
"id": 1001,
"question": "北京是哪个国家的首都?",
"answer": "中国",
"status": "PENDING_REVIEW"
}
]
}
}
```
---
## PUT /api/qa/{taskId}
**权限**: ANNOTATOR且为任务持有者
**描述**: 修改问答对(**整体覆盖PUT 语义**,每次提交包含完整 items 数组)
**请求体**:
```json
{
"items": [
{
"question": "北京是哪个国家的首都?",
"answer": "中国。北京自1949年起成为中华人民共和国的首都。"
}
]
}
```
**响应** `200`: `{"code": "SUCCESS", "data": null}`
---
## POST /api/qa/{taskId}/submit
**权限**: ANNOTATOR且为任务持有者
**描述**: 提交问答对,任务状态 IN_PROGRESS → SUBMITTED
**响应** `200`: `{"code": "SUCCESS", "data": null}`
**失败**: `409` `INVALID_STATE`: 任务当前状态不允许提交
---
## POST /api/qa/{taskId}/approve
**权限**: REVIEWER
**描述**: 审批通过。同一事务中:先校验任务 → training_dataset 状态 → 任务状态 SUBMITTED → APPROVED → source_data 状态 → 写任务历史
**响应** `200`: `{"code": "SUCCESS", "data": null}`
**失败**:
- `403` `SELF_REVIEW_FORBIDDEN`: 不允许审批自己提交的任务
- `409` `INVALID_STATE`: 任务状态不为 SUBMITTED
---
## POST /api/qa/{taskId}/reject
**权限**: REVIEWER
**描述**: 驳回问答对,删除候选记录,任务状态 SUBMITTED → REJECTED
**请求体**: `{"reason": "问题描述不准确,请修改"}`
**响应** `200`: `{"code": "SUCCESS", "data": null}`
**失败**:
- `403` `SELF_REVIEW_FORBIDDEN`: 不允许驳回自己提交的任务
- `400` `REASON_REQUIRED`: 驳回原因不能为空

View File

@@ -0,0 +1,96 @@
# API 契约:资料管理
---
## POST /api/source/upload
**权限**: UPLOADER
**描述**: 上传文件,创建 source_data 记录,文件字节流写入 RustFS
**请求**: `multipart/form-data`,字段:`file`(必填)、`dataType`TEXT / IMAGE / VIDEO
**响应** `201`:
```json
{
"code": "SUCCESS",
"data": {
"id": 50,
"fileName": "document.txt",
"dataType": "TEXT",
"fileSize": 204800,
"status": "PENDING",
"createdAt": "2026-04-09T10:00:00"
}
}
```
**失败**:
- `400` `INVALID_TYPE`: 不支持的资料类型
- `400` `FILE_EMPTY`: 文件为空
---
## GET /api/source/list
**权限**: UPLOADER
**描述**: 分页查询资料列表。UPLOADER 只见自己上传的资料ADMIN 见本公司全部资料
**查询参数**: `page`(默认 1`pageSize`(默认 20`dataType`(可选)、`status`(可选)
**响应** `200`:
```json
{
"code": "SUCCESS",
"data": {
"items": [
{
"id": 50,
"fileName": "document.txt",
"dataType": "TEXT",
"status": "PENDING",
"uploaderId": 1,
"createdAt": "2026-04-09T10:00:00"
}
],
"total": 120,
"page": 1,
"pageSize": 20
}
}
```
---
## GET /api/source/{id}
**权限**: UPLOADER
**描述**: 查看资料详情,含 RustFS 预签名临时下载链接(有效期 15 分钟)
**响应** `200`:
```json
{
"code": "SUCCESS",
"data": {
"id": 50,
"dataType": "TEXT",
"fileName": "document.txt",
"fileSize": 204800,
"status": "EXTRACTING",
"presignedUrl": "https://rustfs.example.com/...",
"parentSourceId": null,
"createdAt": "2026-04-09T10:00:00"
}
}
```
---
## DELETE /api/source/{id}
**权限**: ADMIN
**描述**: 删除资料(同时删除 RustFS 文件及元数据)
**前置条件**: 资料状态为 PENDING不允许删除已进入流水线的资料
**响应** `204`: 无响应体
**失败**: `409` `SOURCE_IN_PIPELINE`: 资料已进入标注流程,不可删除

View File

@@ -0,0 +1,150 @@
# API 契约:任务管理
---
## GET /api/tasks/pool
**权限**: ANNOTATOR
**描述**: 查看可领取任务池。角色过滤规则:
- ANNOTATOR仅返回 EXTRACTION 阶段、status=UNCLAIMED 的任务
- REVIEWER/ADMIN仅返回 SUBMITTED 状态(待审批队列)的任务
**查询参数**: `page`(默认 1`pageSize`(默认 20
**响应** `200`:
```json
{
"code": "SUCCESS",
"data": {
"items": [
{
"id": 101,
"sourceId": 50,
"sourceType": "TEXT",
"phase": "EXTRACTION",
"status": "UNCLAIMED",
"createdAt": "2026-04-09T10:00:00"
}
],
"total": 30,
"page": 1,
"pageSize": 20
}
}
```
---
## GET /api/tasks/pending-review
**权限**: REVIEWER
**描述**: REVIEWER 专属审批入口,查看 status=SUBMITTED 的任务列表
**查询参数**: `page``pageSize``phase`可选EXTRACTION / QA_GENERATION
**响应**: 同 `/api/tasks/pool` 结构
---
## POST /api/tasks/{id}/claim
**权限**: ANNOTATOR
**描述**: 领取任务双重并发保障Redis SET NX + DB 乐观约束)
**响应** `200`: `{"code": "SUCCESS", "data": null}`
**失败**:
- `409` `TASK_CLAIMED`: 任务已被他人领取
- `404` `TASK_NOT_FOUND`: 任务不存在
---
## POST /api/tasks/{id}/unclaim
**权限**: ANNOTATOR且为任务持有者
**描述**: 放弃任务退回任务池status: IN_PROGRESS → UNCLAIMED
**响应** `200`: `{"code": "SUCCESS", "data": null}`
**失败**: `403` `NOT_TASK_OWNER`: 非任务持有者
---
## GET /api/tasks/mine
**权限**: ANNOTATOR
**描述**: 查询当前用户领取的任务(含 IN_PROGRESS、SUBMITTED、REJECTED 三种状态)
**查询参数**: `page``pageSize``status`(可选过滤)
**响应**: 同任务列表结构,含 `rejectReason` 字段REJECTED 状态时非空)
---
## POST /api/tasks/{id}/reclaim
**权限**: ANNOTATOR
**描述**: 重领被驳回的任务status 必须为 REJECTED 且 claimedBy = 当前用户,流转 REJECTED → IN_PROGRESS
**响应** `200`: `{"code": "SUCCESS", "data": null}`
**失败**:
- `403` `NOT_TASK_OWNER`: 非原持有者
- `409` `INVALID_STATE`: 任务状态不为 REJECTED
---
## GET /api/tasks/{id}
**权限**: ANNOTATOR
**描述**: 查看任务详情(含驳回原因、历史记录摘要)
**响应** `200`:
```json
{
"code": "SUCCESS",
"data": {
"id": 101,
"sourceId": 50,
"phase": "EXTRACTION",
"status": "IN_PROGRESS",
"claimedBy": 1,
"claimedAt": "2026-04-09T10:05:00",
"rejectReason": null,
"historyCount": 2
}
}
```
---
## GET /api/tasks
**权限**: ADMIN
**描述**: 查询全部任务(支持过滤,分页)
**查询参数**: `page``pageSize``phase``status``claimedBy``sourceId`
---
## PUT /api/tasks/{id}/reassign
**权限**: ADMIN
**描述**: 强制转移任务归属status 保持 IN_PROGRESS仅 claimedBy 变更)
**请求体**: `{"newOwnerId": 5, "reason": "原持有者长期未操作"}`
**响应** `200`: `{"code": "SUCCESS", "data": null}`
---
## POST /api/tasks
**权限**: ADMIN
**描述**: 为指定资料创建 EXTRACTION 任务
**请求体**:
```json
{
"sourceId": 50,
"taskType": "AI_ASSISTED",
"aiModel": "glm-4"
}
```
**响应** `201`: `{"code": "SUCCESS", "data": {"id": 101, ...}}`

View File

@@ -0,0 +1,87 @@
# API 契约:视频处理
---
## POST /api/video/process
**权限**: ADMIN
**描述**: 为已上传的视频资料创建异步处理任务
**请求体**:
```json
{
"sourceId": 50,
"jobType": "FRAME_EXTRACT",
"params": {
"frameInterval": 30,
"mode": "FRAME"
}
}
```
jobType 可选值:`FRAME_EXTRACT`(帧提取)、`VIDEO_TO_TEXT`(片段转文字)
**响应** `201`:
```json
{
"code": "SUCCESS",
"data": {
"jobId": 200,
"sourceId": 50,
"jobType": "FRAME_EXTRACT",
"status": "PENDING"
}
}
```
---
## GET /api/video/jobs/{jobId}
**权限**: ADMIN
**描述**: 查询视频处理任务状态
**响应** `200`:
```json
{
"code": "SUCCESS",
"data": {
"id": 200,
"status": "RUNNING",
"processedUnits": 15,
"totalUnits": 50,
"retryCount": 0,
"errorMessage": null,
"startedAt": "2026-04-09T10:05:00"
}
}
```
---
## POST /api/video/jobs/{jobId}/reset
**权限**: ADMIN
**描述**: 手动重置 FAILED 状态的任务为 PENDING允许重新触发FAILED → PENDING 不在自动状态机中)
**响应** `200`: `{"code": "SUCCESS", "data": null}`
**失败**: `409` `INVALID_STATE`: 任务状态不为 FAILED
---
## POST /api/video/callback内部接口
**权限**: AI 服务内部调用IP 白名单 / 服务密钥)
**描述**: AI 服务回调,通知视频处理结果(幂等:重复成功回调静默忽略)
**请求体**:
```json
{
"jobId": 200,
"success": true,
"outputPath": "frames/50/",
"errorMessage": null
}
```
**响应** `200`: `{"code": "SUCCESS", "data": null}`

View File

@@ -0,0 +1,355 @@
# 数据模型label_backend
**日期**: 2026-04-09
**分支**: `001-label-backend-spec`
---
## 实体关系概览
```
sys_company ─┬─ sys_user (company_id FK)
├─ source_data (company_id FK)
│ └─ source_data (parent_source_id 自引用,视频溯源链)
├─ annotation_task (company_id FK)
│ ├─ annotation_result (task_id FK)
│ └─ annotation_task_history (task_id FK)
├─ training_dataset (company_id FK)
├─ export_batch (company_id FK)
├─ sys_config (company_id FK可为 NULL 表示全局默认)
├─ sys_operation_log (company_id FK)
└─ video_process_job (company_id FK)
```
**多租户规则**:除 `sys_company` 本身外,所有业务表均包含 `company_id NOT NULL`。查询时由 `TenantLineInnerInterceptor` 自动注入 `WHERE company_id = ?`。唯一例外:`sys_config` 允许 `company_id = NULL` 表示全局默认配置。
---
## 实体详情
### 1. sys_company — 公司(租户)
| 字段 | 类型 | 约束 | 说明 |
|------|------|------|------|
| id | BIGSERIAL | PK | 自增主键 |
| company_name | VARCHAR(100) | NOT NULL UNIQUE | 公司名称 |
| company_code | VARCHAR(50) | NOT NULL UNIQUE | 公司编码 |
| status | VARCHAR(10) | NOT NULL DEFAULT 'ACTIVE' | ACTIVE / DISABLED |
| created_at | TIMESTAMP | NOT NULL DEFAULT NOW() | |
| updated_at | TIMESTAMP | NOT NULL DEFAULT NOW() | |
**状态**: 无状态机(仅 ACTIVE/DISABLED 标志)
---
### 2. sys_user — 用户
| 字段 | 类型 | 约束 | 说明 |
|------|------|------|------|
| id | BIGSERIAL | PK | |
| company_id | BIGINT | NOT NULL FK→sys_company | 租户隔离键 |
| username | VARCHAR(50) | NOT NULL | 同公司内唯一 |
| password_hash | VARCHAR(255) | NOT NULL | BCrypt 强度≥10禁止序列化到响应 |
| real_name | VARCHAR(50) | — | |
| role | VARCHAR(20) | NOT NULL | UPLOADER / ANNOTATOR / REVIEWER / ADMIN |
| status | VARCHAR(10) | NOT NULL DEFAULT 'ACTIVE' | ACTIVE / DISABLED |
| created_at / updated_at | TIMESTAMP | NOT NULL | |
**约束**: `UNIQUE(company_id, username)`
**索引**: `(company_id)`
**角色继承**: ADMIN ⊃ REVIEWER ⊃ ANNOTATOR ⊃ UPLOADER由 Shiro Realm 的 addInheritedRoles() 实现)
---
### 3. source_data — 原始资料
| 字段 | 类型 | 约束 | 说明 |
|------|------|------|------|
| id | BIGSERIAL | PK | |
| company_id | BIGINT | NOT NULL FK→sys_company | |
| uploader_id | BIGINT | FK→sys_user | |
| data_type | VARCHAR(20) | NOT NULL | TEXT / IMAGE / VIDEO |
| file_path | VARCHAR(500) | NOT NULL | RustFS 对象路径 |
| file_name | VARCHAR(255) | NOT NULL | 原始文件名 |
| file_size | BIGINT | — | 字节数 |
| bucket_name | VARCHAR(100) | NOT NULL | RustFS 桶名 |
| parent_source_id | BIGINT | FK→source_data | 视频片段转文本时指向原视频 |
| status | VARCHAR(20) | NOT NULL DEFAULT 'PENDING' | 见状态机 |
| reject_reason | TEXT | — | 保留字段(当前无 REJECTED 状态) |
| created_at / updated_at | TIMESTAMP | NOT NULL | |
**索引**: `(company_id)``(company_id, status)``(parent_source_id)`
**状态机**:
```
PENDING → EXTRACTING直接上传的文本/图片)
PENDING → PREPROCESSING视频上传后
PREPROCESSING → PENDING视频预处理完成后进入标注流程
EXTRACTING → QA_REVIEW提取任务审批通过后
QA_REVIEW → APPROVEDQA 任务审批通过后,整条流水线完成)
```
*注source_data 无 REJECTED 状态。QA 阶段驳回作用于 annotation_task→REJECTEDsource_data 保持 QA_REVIEW 不变。*
---
### 4. annotation_task — 标注任务
| 字段 | 类型 | 约束 | 说明 |
|------|------|------|------|
| id | BIGSERIAL | PK | |
| company_id | BIGINT | NOT NULL FK→sys_company | |
| source_id | BIGINT | NOT NULL FK→source_data | |
| phase | VARCHAR(20) | NOT NULL | EXTRACTION / QA_GENERATION |
| task_type | VARCHAR(20) | NOT NULL | AI_ASSISTED / MANUAL |
| ai_model | VARCHAR(50) | — | 使用的 AI 模型 |
| video_unit_type | VARCHAR(20) | — | FRAME视频帧模式/ NULL |
| video_unit_info | JSONB | — | `{frame_index, time_sec, frame_path}` |
| claimed_by | BIGINT | FK→sys_user | 当前持有者 |
| claimed_at | TIMESTAMP | — | |
| status | VARCHAR(20) | NOT NULL DEFAULT 'UNCLAIMED' | 见状态机 |
| reject_reason | TEXT | — | 驳回原因 |
| submitted_at | TIMESTAMP | — | |
| completed_at | TIMESTAMP | — | |
| created_at / updated_at | TIMESTAMP | NOT NULL | |
**索引**: `(company_id)``(company_id, phase, status)`(任务池查询)、`(claimed_by, status)`(我的任务)
**状态机**:
```
UNCLAIMED → IN_PROGRESS领取
IN_PROGRESS → SUBMITTED提交
IN_PROGRESS → UNCLAIMED放弃
IN_PROGRESS → IN_PROGRESSADMIN 强制转移,持有人变更,状态不变)
SUBMITTED → APPROVED审批通过
SUBMITTED → REJECTED审批驳回
REJECTED → IN_PROGRESS标注员重领
```
**并发控制**: 领取时双重保障:① Redis `SET NX task:claim:{taskId}` TTL 30s② DB `UPDATE ... WHERE status='UNCLAIMED'` 影响行数为 0 时返回错误
---
### 5. annotation_result — 标注结果(提取阶段)
| 字段 | 类型 | 约束 | 说明 |
|------|------|------|------|
| id | BIGSERIAL | PK | |
| company_id | BIGINT | NOT NULL FK→sys_company | |
| task_id | BIGINT | NOT NULL FK→annotation_task | |
| result_json | JSONB | NOT NULL | 整体覆盖,禁止局部 PATCH |
| is_final | BOOLEAN | NOT NULL DEFAULT FALSE | 审批通过后置 TRUE |
| submitted_by | BIGINT | FK→sys_user | |
| created_at / updated_at | TIMESTAMP | NOT NULL | |
**result_json 结构**(文本三元组示例):
```json
{
"items": [
{
"subject": "北京",
"predicate": "是...首都",
"object": "中国",
"source_text": "北京是中国的首都",
"start_offset": 0,
"end_offset": 8
}
]
}
```
**result_json 结构**(图片四元组示例):
```json
{
"items": [
{
"subject": "猫",
"relation": "坐在",
"object": "椅子",
"modifier": "白色的",
"bbox": [100, 200, 300, 400],
"crop_path": "crops/123/0.jpg"
}
]
}
```
**索引**: `(task_id)``(company_id, is_final)`
---
### 6. training_dataset — 训练样本
| 字段 | 类型 | 约束 | 说明 |
|------|------|------|------|
| id | BIGSERIAL | PK | |
| company_id | BIGINT | NOT NULL FK→sys_company | |
| task_id | BIGINT | NOT NULL FK→annotation_task | |
| source_id | BIGINT | NOT NULL FK→source_data | |
| extraction_result_id | BIGINT | NOT NULL FK→annotation_result | |
| sample_type | VARCHAR(20) | NOT NULL | TEXT / IMAGE / VIDEO_FRAME |
| glm_format_json | JSONB | NOT NULL | GLM 微调格式 |
| export_batch_id | VARCHAR(50) | — | NULL 表示未导出 |
| status | VARCHAR(20) | NOT NULL DEFAULT 'PENDING_REVIEW' | 见状态机 |
| reject_reason | TEXT | — | |
| reviewed_by | BIGINT | FK→sys_user | |
| exported_at | TIMESTAMP | — | |
| created_at / updated_at | TIMESTAMP | NOT NULL | |
**状态机**:
```
PENDING_REVIEW → APPROVEDQA 审批通过)
PENDING_REVIEW → REJECTEDQA 审批驳回)
REJECTED → PENDING_REVIEW标注员修改后重提
```
**glm_format_json 结构**:
```json
{
"conversations": [
{"role": "user", "content": "..."},
{"role": "assistant", "content": "..."}
],
"source_type": "TEXT"
}
```
**索引**: `(company_id)``(company_id, status)``(export_batch_id)`
---
### 7. export_batch — 导出批次
| 字段 | 类型 | 约束 | 说明 |
|------|------|------|------|
| id | BIGSERIAL | PK | |
| company_id | BIGINT | NOT NULL FK→sys_company | |
| batch_uuid | VARCHAR(50) | NOT NULL UNIQUE | 批次标识符 |
| dataset_file_path | VARCHAR(500) | — | RustFS JSONL 路径 |
| sample_count | INT | NOT NULL DEFAULT 0 | |
| glm_job_id | VARCHAR(100) | — | 微调任务 ID |
| finetune_status | VARCHAR(20) | NOT NULL DEFAULT 'NOT_STARTED' | 见状态 |
| error_message | TEXT | — | |
| created_by | BIGINT | FK→sys_user | |
| created_at / updated_at | TIMESTAMP | NOT NULL | |
**finetune_status 值**: NOT_STARTED / RUNNING / SUCCESS / FAILED
**索引**: `(company_id)`
---
### 8. sys_config — 系统配置
| 字段 | 类型 | 约束 | 说明 |
|------|------|------|------|
| id | BIGSERIAL | PK | |
| company_id | BIGINT | FK→sys_company可 NULL | NULL = 全局默认配置 |
| config_key | VARCHAR(100) | NOT NULL | |
| config_value | TEXT | NOT NULL | |
| description | TEXT | — | |
| updated_by | BIGINT | FK→sys_user | |
| updated_at | TIMESTAMP | NOT NULL | |
**约束**: `UNIQUE(company_id, config_key)`
**查询规则**: 先按 `(companyId, configKey)` 查;未命中则按 `(NULL, configKey)` 查全局默认。
**预置全局配置键**:
- `prompt_extract_text``prompt_extract_image``prompt_video_to_text`
- `prompt_qa_gen_text``prompt_qa_gen_image`
- `model_default`(默认:`glm-4`
- `video_frame_interval`(默认:`30`
- `token_ttl_seconds`(默认:`7200`
- `glm_api_base_url`
---
### 9. sys_operation_log — 操作审计日志
| 字段 | 类型 | 约束 | 说明 |
|------|------|------|------|
| id | BIGSERIAL | PK | |
| company_id | BIGINT | FK→sys_company | |
| operator_id | BIGINT | FK→sys_user | 登录失败时可为 NULL |
| operator_name | VARCHAR(50) | NOT NULL | **操作时用户名快照**(不随改名变化) |
| operation_type | VARCHAR(50) | NOT NULL | 见枚举列表 |
| target_type | VARCHAR(30) | — | |
| target_id | BIGINT | — | |
| detail | JSONB | — | 补充信息 |
| ip_address | VARCHAR(50) | — | |
| result | VARCHAR(10) | NOT NULL | SUCCESS / FAIL |
| error_message | TEXT | — | |
| created_at | TIMESTAMP | NOT NULL DEFAULT NOW() | 分区键 |
**只追加**:应用层禁止 UPDATE/DELETE建议 DB 层添加触发器强制执行
**分区**:按 `created_at` Range 分区,以月为单位(`sys_operation_log_YYYY_MM`
**operation_type 枚举**:
`USER_LOGIN``USER_LOGOUT``USER_CREATE``USER_UPDATE``USER_DISABLE``USER_ROLE_CHANGE``SOURCE_UPLOAD``SOURCE_DELETE``TASK_CREATE``TASK_CLAIM``TASK_UNCLAIM``TASK_SUBMIT``EXTRACTION_APPROVE``EXTRACTION_REJECT``QA_APPROVE``QA_REJECT``TASK_REASSIGN``EXPORT_CREATE``FINETUNE_START``CONFIG_UPDATE``VIDEO_JOB_RESET`
---
### 10. annotation_task_history — 任务流转历史
| 字段 | 类型 | 约束 | 说明 |
|------|------|------|------|
| id | BIGSERIAL | PK | |
| company_id | BIGINT | NOT NULL FK→sys_company | |
| task_id | BIGINT | NOT NULL FK→annotation_task | |
| from_status | VARCHAR(20) | — | 任务初建时为 NULL |
| to_status | VARCHAR(20) | NOT NULL | |
| operator_id | BIGINT | NOT NULL FK→sys_user | |
| operator_role | VARCHAR(20) | NOT NULL | **操作时角色快照** |
| note | TEXT | — | 驳回原因、转移说明等 |
| created_at | TIMESTAMP | NOT NULL | |
**只追加**:每次 annotation_task.status 变更时同步插入,与业务操作在同一事务中
**索引**: `(task_id)`
---
### 11. video_process_job — 视频异步处理任务
| 字段 | 类型 | 约束 | 说明 |
|------|------|------|------|
| id | BIGSERIAL | PK | |
| company_id | BIGINT | NOT NULL FK→sys_company | |
| source_id | BIGINT | NOT NULL FK→source_data | |
| job_type | VARCHAR(20) | NOT NULL | FRAME_EXTRACT / VIDEO_TO_TEXT |
| status | VARCHAR(20) | NOT NULL DEFAULT 'PENDING' | 见状态机 |
| params | JSONB | NOT NULL | 处理参数 |
| total_units | INT | — | 总帧数/片段数 |
| processed_units | INT | NOT NULL DEFAULT 0 | |
| output_path | VARCHAR(500) | — | |
| retry_count | INT | NOT NULL DEFAULT 0 | |
| max_retries | INT | NOT NULL DEFAULT 3 | |
| error_message | TEXT | — | |
| started_at / completed_at | TIMESTAMP | — | |
| created_at / updated_at | TIMESTAMP | NOT NULL | |
**状态机**:
```
PENDING → RUNNING
RUNNING → SUCCESS处理成功
RUNNING → RETRYING失败且 retry_count < max_retries
RUNNING → FAILED失败且 retry_count >= max_retries
RETRYING → RUNNINGAI 服务自动重试)
RETRYING → FAILED超过最大重试次数
```
*FAILED → PENDING由 ADMIN 手动触发接口,不在状态机自动流转中*
**幂等规则**: 回调时若 `status == SUCCESS` 则静默忽略,不执行任何 DB 写入
**索引**: `(source_id)``(status)`
---
## Redis 数据结构
| Key 模式 | 类型 | TTL | 内容 |
|---------|------|-----|------|
| `token:{uuid}` | Hash | 2h滑动 | `{userId, role, companyId, username}` |
| `user:perm:{userId}` | String | 5min | 用户角色字符串 |
| `task:claim:{taskId}` | String | 30s | 持有者 userId |
*禁止在上述三类命名空间之外自造 Key 用于认证、权限或锁目的。*

View File

@@ -0,0 +1,137 @@
# 实施计划label_backend 知识图谱智能标注平台
**分支**: `001-label-backend-spec` | **日期**: 2026-04-09 | **规格说明**: [spec.md](spec.md)
**输入**: 功能规格说明 `/specs/001-label-backend-spec/spec.md`
---
## 摘要
构建面向多租户的知识图谱智能标注平台后端服务,驱动**文本线**(三元组提取 → 问答对生成 → 训练样本)和**图片线**(四元组提取 → 问答对生成 → 训练样本)两条流水线。视频作为预处理入口异步汇入两条流水线。系统基于 Spring Boot 3 + Apache Shiro + MyBatis Plus + PostgreSQL + Redis + RustFS 构建,通过 HTTP 调用 Python FastAPI AI 服务完成 AI 辅助标注和问答生成能力。
---
## 技术上下文
**语言/版本**: Java 17LTS
**主要依赖**: Spring Boot ≥ 3.0.x、Apache Shiro ≥ 1.13.x、MyBatis Plus ≥ 3.5.x、Spring Data Redis
**存储**: PostgreSQL ≥ 14主库、Redis ≥ 6.x会话/权限缓存/分布式锁、RustFSS3 兼容对象存储)
**测试**: JUnit 5 + Testcontainers真实 PostgreSQL + Redis 实例、Spring Boot Test
**目标平台**: Linux 服务器Docker Compose 容器化部署
**项目类型**: Web ServiceREST API
**性能目标**: 任务领取并发下有且仅有一人成功;权限变更延迟 < 1 秒生效
**约束**: 禁止 JWT禁止 Spring Security禁止文件字节流存入数据库AI HTTP 调用禁止在 @Transactional 内同步执行所有列表接口强制分页
**规模**: 多租户多公司每公司独立数据空间11 张核心业务表
---
## 宪章合规检查
*门控Phase 0 研究前必须通过。Phase 1 设计后重检。*
| # | 宪章原则 | 状态 | 说明 |
|---|---------|------|------|
| 1 | 环境约束JDK 17SB 3ShiroMyBatis Plus | 通过 | pom.xml 中版本约束与宪章完全对齐 Spring Security 引入 |
| 2 | 多租户数据隔离company_id + ThreadLocal | 通过 | TenantLineInnerInterceptor 自动注入CompanyContext finally 块清理 |
| 3 | BCrypt 密码 + UUID Token + JWT | 通过 | AuthService 使用 BCrypt 10UUID v4 Token Redis JWT |
| 4 | 分级 RBAC + 权限注解 + 角色变更驱逐缓存 | 通过 | @RequiresRoles 声明权限updateRole() 立即删 user:perm:{userId} |
| 5 | 双流水线 + 级联触发 + parent_source_id 溯源 | 通过 | 仅文本线/图片线审批通过用 @TransactionalEventListener 触发 QA |
| 6 | 状态机完整性StateValidator | 通过 | 所有状态变更经 StateValidator.assertTransition()禁止绕过 Mapper 直写 |
| 7 | 任务争抢双重保障Redis SET NX + DB 乐观锁 | 通过 | task:claim:{taskId} TTL 30s + WHERE status='UNCLAIMED' |
| 8 | 异步视频处理幂等 + 重试上限 + FAILED 手动重置 | 通过 | SUCCESS 回调静默忽略retry_count max_retries FAILED |
| 9 | 只追加审计日志 + AOP 切面 + 审计失败不回滚业务 | 通过 | @OperationLog AOPsys_operation_log UPDATE/DELETE异常仅 error 日志 |
| 10 | RESTful URL + 统一响应格式 + 强制分页 | 通过 | Result<T> 包装无动词路径PageResult<T> 分页 |
| 11 | YAGNI业务在 ServiceController 只处理 HTTP | ✅ 通过 | 分层明确;无预测性抽象层 |
**门控结果:全部通过,可进入 Phase 0。**
---
## 项目结构
### 规格说明文档(本功能)
```text
specs/001-label-backend-spec/
├── plan.md # 本文件(/speckit.plan 输出)
├── research.md # Phase 0 输出
├── data-model.md # Phase 1 输出
├── quickstart.md # Phase 1 输出
├── contracts/ # Phase 1 输出REST API 契约)
│ ├── auth.md
│ ├── source.md
│ ├── tasks.md
│ ├── extraction.md
│ ├── qa.md
│ ├── export.md
│ ├── config.md
│ └── video.md
└── tasks.md # Phase 2 输出(/speckit.tasks 命令创建,非本命令)
```
### 源代码(仓库根目录)
```text
src/
└── main/
└── java/com/label/
├── LabelBackendApplication.java
├── common/
│ ├── result/ # Result<T>、ResultCode、PageResult<T>
│ ├── exception/ # BusinessException、GlobalExceptionHandler
│ ├── context/ # CompanyContextThreadLocal
│ ├── shiro/ # TokenFilter、UserRealm、ShiroConfig
│ ├── redis/ # RedisKeyManager、RedisService
│ ├── aop/ # AuditAspect、@OperationLog 注解
│ ├── storage/ # RustFsClientS3 兼容封装)
│ ├── ai/ # AiServiceClientRestClient 封装 8 个端点)
│ └── statemachine/ # StateValidator、各状态枚举
└── module/
├── user/ # AuthController、UserController、AuthService、UserService
├── source/ # SourceController、SourceService
├── task/ # TaskController、TaskService、TaskClaimService
├── annotation/ # ExtractionController、QaController、ExtractionService、QaService
├── export/ # ExportController、ExportService、FinetuneService
├── config/ # SysConfigController、SysConfigService
└── video/ # VideoController、VideoProcessService
src/
└── test/
└── java/com/label/
├── integration/ # Testcontainers真实 PG + Redis集成测试
│ ├── AuthIntegrationTest.java
│ ├── TaskClaimConcurrencyTest.java
│ ├── VideoCallbackIdempotencyTest.java
│ ├── MultiTenantIsolationTest.java
│ └── ShiroFilterIntegrationTest.java
└── unit/ # 纯单元测试(状态机、业务逻辑)
└── StateMachineTest.java
sql/
└── init.sql # 全部 DDL11 张表,按依赖顺序执行)
docker-compose.yml # postgres、redis、rustfs、backend、ai-service、frontend
Dockerfile # eclipse-temurin:17-jre-alpine
pom.xml
```
**结构决策**单一后端服务Web Service无前端代码。标准 Maven 项目布局,源代码在 `src/main/java/com/label/`,测试在 `src/test/java/com/label/`,按 `common/` + `module/` 两层分包。
---
## 复杂度追踪
> 宪章检查无违规,本节留空。
---
## GSTACK REVIEW REPORT
| Review | Trigger | Why | Runs | Status | Findings |
|--------|---------|-----|------|--------|----------|
| CEO Review | `/plan-ceo-review` | Scope & strategy | 0 | — | — |
| Codex Review | `/codex review` | Independent 2nd opinion | 0 | — | — |
| Eng Review | `/plan-eng-review` | Architecture & tests (required) | 0 | — | — |
| Design Review | `/plan-design-review` | UI/UX gaps | 0 | — | — |
**VERDICT:** NO REVIEWS YET — run `/autoplan` for full review pipeline, or individual reviews above.

View File

@@ -0,0 +1,179 @@
# 快速启动指南label_backend
**日期**: 2026-04-09
**分支**: `001-label-backend-spec`
---
## 前置条件
- Docker Desktop ≥ 4.x含 Docker Compose v2
- JDK 17本地开发时
- Maven ≥ 3.8(本地开发时)
---
## 一、使用 Docker Compose 启动完整环境
```bash
# 克隆仓库
git clone <repo-url>
cd label_backend
# 启动所有服务PostgreSQL + Redis + RustFS + AI Service + Backend + Frontend
docker compose up -d
# 查看后端启动日志
docker compose logs -f backend
# 检查健康状态
docker compose ps
```
**服务端口**:
| 服务 | 端口 |
|------|------|
| 前端Nginx | http://localhost:80 |
| 后端 REST API | http://localhost:8080 |
| AI 服务FastAPI | http://localhost:8000 |
| PostgreSQL | localhost:5432 |
| Redis | localhost:6379 |
| RustFS S3 API | http://localhost:9000 |
| RustFS Web 控制台 | http://localhost:9001 |
---
## 二、初始化数据库
数据库 DDL 通过 `./sql/init.sql` 在 PostgreSQL 容器启动时自动执行(`docker-entrypoint-initdb.d`)。
若需手动执行:
```bash
docker compose exec postgres psql -U label -d label_db -f /docker-entrypoint-initdb.d/init.sql
```
**初始账号**(由 `init.sql` 中的 INSERT 语句创建):
| 用户名 | 密码 | 角色 | 公司 |
|--------|------|------|------|
| admin | admin123 | ADMIN | 演示公司 |
| reviewer01 | review123 | REVIEWER | 演示公司 |
| annotator01 | annot123 | ANNOTATOR | 演示公司 |
| uploader01 | upload123 | UPLOADER | 演示公司 |
---
## 三、本地开发模式(不使用 Docker
```bash
# 启动依赖服务(仅 PostgreSQL + Redis + RustFS不启动后端
docker compose up -d postgres redis rustfs
# 设置环境变量
export SPRING_DATASOURCE_URL=jdbc:postgresql://localhost:5432/label_db
export SPRING_DATASOURCE_USERNAME=label
export SPRING_DATASOURCE_PASSWORD=label_password
export SPRING_REDIS_HOST=localhost
export SPRING_REDIS_PORT=6379
export SPRING_REDIS_PASSWORD=redis_password
export RUSTFS_ENDPOINT=http://localhost:9000
export RUSTFS_ACCESS_KEY=minioadmin
export RUSTFS_SECRET_KEY=minioadmin
export AI_SERVICE_BASE_URL=http://localhost:8000
# 编译并启动
mvn clean spring-boot:run
```
---
## 四、验证安装
```bash
# 1. 登录(获取 Token
curl -X POST http://localhost:8080/api/auth/login \
-H "Content-Type: application/json" \
-d '{"companyCode":"DEMO","username":"admin","password":"admin123"}'
# 期望响应:{"code":"SUCCESS","data":{"token":"...","role":"ADMIN",...}}
# 2. 使用 Token 访问受保护接口(将 {TOKEN} 替换为上一步返回的 token
curl http://localhost:8080/api/auth/me \
-H "Authorization: Bearer {TOKEN}"
# 期望响应:{"code":"SUCCESS","data":{"username":"admin","role":"ADMIN",...}}
```
---
## 五、运行测试
```bash
# 运行所有测试Testcontainers 会自动启动真实 PG + Redis 容器)
mvn test
# 运行特定测试(并发任务领取)
mvn test -Dtest=TaskClaimConcurrencyTest
# 运行集成测试套件
mvn test -Dtest=*IntegrationTest
```
**注意**: Testcontainers 需要本地 Docker 可用。首次运行会拉取 PostgreSQL 和 Redis 镜像(约 200MB
---
## 六、关键配置项说明
配置文件位于 `src/main/resources/application.yml`。以下配置项可在运行时通过 `PUT /api/config/{key}` 接口ADMIN 权限)动态调整,无需重启服务:
| 配置键 | 说明 | 默认值 |
|--------|------|--------|
| `token_ttl_seconds` | 会话凭证有效期(秒) | 72002小时 |
| `model_default` | AI 辅助默认模型 | glm-4 |
| `video_frame_interval` | 视频帧提取间隔(帧数) | 30 |
| `prompt_extract_text` | 文本三元组提取 Prompt | 见 init.sql |
| `prompt_extract_image` | 图片四元组提取 Prompt | 见 init.sql |
| `prompt_qa_gen_text` | 文本问答生成 Prompt | 见 init.sql |
| `prompt_qa_gen_image` | 图片问答生成 Prompt | 见 init.sql |
---
## 七、标注流水线快速验证
```bash
TOKEN="your-admin-token"
# 步骤 1上传文本资料
curl -X POST http://localhost:8080/api/source/upload \
-H "Authorization: Bearer $TOKEN" \
-F "file=@sample.txt" -F "dataType=TEXT"
# 步骤 2为资料创建提取任务sourceId 从上一步响应中获取)
curl -X POST http://localhost:8080/api/tasks \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-d '{"sourceId": 1, "taskType": "AI_ASSISTED", "aiModel": "glm-4"}'
# 步骤 3标注员领取任务使用 annotator01 的 Token
ANNOTATOR_TOKEN="annotator-token"
curl -X POST http://localhost:8080/api/tasks/1/claim \
-H "Authorization: Bearer $ANNOTATOR_TOKEN"
# 步骤 4获取 AI 预标注结果
curl http://localhost:8080/api/extraction/1 \
-H "Authorization: Bearer $ANNOTATOR_TOKEN"
# 步骤 5提交标注结果
curl -X PUT http://localhost:8080/api/extraction/1 \
-H "Authorization: Bearer $ANNOTATOR_TOKEN" \
-H "Content-Type: application/json" \
-d '{"items":[{"subject":"北京","predicate":"是首都","object":"中国","sourceText":"北京是中国的首都","startOffset":0,"endOffset":8}]}'
curl -X POST http://localhost:8080/api/extraction/1/submit \
-H "Authorization: Bearer $ANNOTATOR_TOKEN"
# 步骤 6审批员审批通过使用 reviewer01 的 Token
REVIEWER_TOKEN="reviewer-token"
curl -X POST http://localhost:8080/api/extraction/1/approve \
-H "Authorization: Bearer $REVIEWER_TOKEN"
```

View File

@@ -0,0 +1,150 @@
# Phase 0 研究报告label_backend
**日期**: 2026-04-09
**分支**: `001-label-backend-spec`
---
## 技术决策汇总
所有技术选型均由宪章强制约束,无需评估备选方案。本报告记录关键设计决策的理由,供后续实施参考。
---
## 决策 1认证机制
**决策**: UUID v4 Token 存储于 Redis滑动过期禁止 JWT
**理由**:
- JWT 自包含令牌无法按需吊销,无法满足"管理员禁用账号立即生效"的安全要求
- UUID Token 在 Redis 中可精确控制生命周期:退出登录或禁用账号时同步删除 Key下一次请求立即失效
- 滑动过期(每次有效请求重置 TTL确保活跃用户不被意外踢出
**备选方案放弃理由**:
- JWT无法即时吊销存在安全窗口
- Session Cookie在无状态 REST API 架构中不适用
- OAuth2过度设计当前场景无第三方授权需求
---
## 决策 2多租户隔离机制
**决策**: MyBatis Plus `TenantLineInnerInterceptor` + `ThreadLocal CompanyContext`
**理由**:
- `TenantLineInnerInterceptor` 在 SQL 拦截器层自动在每条查询的 WHERE 子句中注入 `company_id`,覆盖范围广且无需逐方法手动添加条件
- ThreadLocal 存储当前请求的 `companyId`,由 Shiro TokenFilter 在解析 Token 时从 Redis 会话数据注入,确保 companyId 来自服务端权威来源而非客户端参数
- `finally` 块强制清理 ThreadLocal防止线程池复用时数据串漏
**备选方案放弃理由**:
- 行级安全RLSPostgreSQL 原生支持,但与 MyBatis Plus 集成复杂,且宪章已指定 ThreadLocal 方案
- 逐方法手动添加 WHERE容易遗漏维护成本高
---
## 决策 3任务并发领取控制
**决策**: Redis `SET NX`(分布式锁)+ 数据库乐观约束(`WHERE status = 'UNCLAIMED'`)双重保障
**理由**:
- 单纯使用数据库乐观锁在高并发下存在写放大问题(大量 UPDATE 竞争)
- 单纯使用 Redis 锁若锁过期后 DB 写入失败可能导致数据不一致
- 双重保障Redis 锁TTL 30s快速拦截大部分并发请求减少数据库压力DB 乐观约束作为最终一致性兜底
**Key 命名**: `task:claim:{taskId}`TTL 30s与宪章 Redis Key 规范一致)
---
## 决策 4审批触发 QA 任务的异步解耦
**决策**: Spring `@TransactionalEventListener(phase = AFTER_COMMIT)` + `@Transactional(REQUIRES_NEW)`
**理由**:
- 提取阶段审批通过后需调用 AI HTTP 生成候选问答对,该 HTTP 调用延迟不确定(秒级到分钟级)
- 若在 `@Transactional` 内同步调用,数据库连接被长时间占用,且 AI 失败会错误地回滚已完成的审批操作
- `AFTER_COMMIT` 保证业务审批先提交再触发事件,避免事务回滚导致的幽灵任务
- `REQUIRES_NEW` 为 QA 生成开启独立事务AI 失败仅影响 QA 任务创建,不影响审批结果
**事件流**: `approve()` → publish `ExtractionApprovedEvent` → 事务提交 → `onExtractionApproved()` 异步执行AI 调用 + 创建 QA 任务)
---
## 决策 5标注结果存储语义
**决策**: JSONB 整体覆盖PUT 语义),禁止局部 PATCH
**理由**:
- 三元组/四元组条目具有强关联性(主语-谓语-宾语作为整体,或主体-关系-客体-修饰词作为整体),局部更新易导致不一致
- 整体替换简化服务端逻辑,前端每次提交完整 items 数组,服务端直接执行 UPDATE `result_json = ?`
- 避免局部追加导致的索引层数据不一致(如删除某条目后残留旧数据)
---
## 决策 6审计日志事务边界
**决策**: 审计日志写入不要求与业务操作在同一事务AOP `finally` 块中独立写入
**理由**:
- 审计写入失败不应回滚业务操作(用户的标注/审批结果比审计日志更重要)
- `@Around` 通知在业务方法执行完成commit 或 rollback后捕获最终 `result`,可记录准确的成功/失败状态
- 审计失败仅 error 级别日志 + 告警,不影响用户体验
---
## 决策 7视频预处理幂等回调
**决策**: 回调处理时检查 `video_process_job.status`,已为 `SUCCESS` 则静默忽略
**理由**:
- AI 服务可能因网络抖动对同一 jobId 发起多次成功回调
- 幂等检查确保第一次成功回调创建标注任务,后续重复回调无任何副作用
- 检查粒度:`status == SUCCESS` 即返回,不进行任何 DB 写入
---
## 决策 8对象存储路径规范
**决策**: RustFSS3 兼容),文件字节流禁止入库,路径按资源类型分桶分目录
**路径规范**:
| 资源 | 桶 | 路径格式 |
|------|-----|---------|
| 文本文件 | `source-data` | `text/{yyyyMM}/{source_id}.txt` |
| 图片 | `source-data` | `image/{yyyyMM}/{source_id}.jpg` |
| 视频 | `source-data` | `video/{yyyyMM}/{source_id}.mp4` |
| 视频帧 | `source-data` | `frames/{source_id}/{frame_index}.jpg` |
| 视频转文本 | `source-data` | `video-text/{parent_source_id}/{timestamp}.txt` |
| bbox 裁剪图 | `source-data` | `crops/{task_id}/{item_index}.jpg` |
| 导出 JSONL | `finetune-export` | `export/{batchUuid}.jsonl` |
---
## 决策 9测试策略
**决策**: 集成测试使用 Testcontainers真实 PG + Redis不允许 Mock 数据库
**必须覆盖的测试场景**:
1. **并发任务领取**10 线程同时争抢同一任务,验证恰好 1 人成功Redis + DB 双重锁)
2. **视频回调幂等**:同一 jobId 两次成功回调,验证只创建 1 个 annotation_task
3. **状态机越界拒绝**:非法状态转换(如 APPROVED → IN_PROGRESS抛出 BusinessException
4. **多租户隔离**:公司 A 身份访问公司 B 资源,验证被拒绝
5. **Shiro 过滤器链**:无 Token → 401Token 有效但角色不足 → 403
---
## 无需澄清事项汇总
| 项目 | 状态 | 来源 |
|------|------|------|
| 认证方案 | ✅ 已确定UUID Token | 宪章原则三 |
| 数据库选型 | ✅ 已确定PostgreSQL | 宪章原则一 |
| ORM | ✅ 已确定MyBatis Plus | 宪章原则一 |
| 缓存/锁 | ✅ 已确定Redis | 宪章原则一 |
| 对象存储 | ✅ 已确定RustFS S3 | 宪章原则一 |
| AI 集成方式 | ✅ 已确定HTTP RestClient | 宪章原则一 |
| 多租户隔离 | ✅ 已确定ThreadLocal + Interceptor | 宪章原则二 |
| 并发控制 | ✅ 已确定(双重锁) | 宪章原则七 |
| 审批事务边界 | ✅ 已确定(@TransactionalEventListener | 宪章原则五 |
| 测试策略 | ✅ 已确定Testcontainers | 宪章开发工作流 |

View File

@@ -0,0 +1,273 @@
# 功能规格说明label_backend 知识图谱智能标注平台
**功能分支**: `001-label-backend-spec`
**创建日期**: 2026-04-09
**状态**: 草稿
**输入**: 根据文档 docs/superpowers/specs/2026-04-09-label-backend-design.md 生成需求规格文档
---
## 用户场景与测试 *(必填)*
### 用户故事 1 - 用户登录与身份认证 (优先级: P1)
公司员工使用用户名和密码登录平台,获取会话凭证后访问受权限保护的功能。会话在持续活跃时保持有效,用户主动退出或管理员禁用账号后会话立即失效。
**优先级理由**: 认证是所有其他功能的前提,无法登录则所有功能均不可用。
**独立测试**: 可独立通过以下方式测试:用正确凭证登录,携带返回凭证请求受保护接口,验证正常访问;携带错误凭证或过期凭证,验证被拒绝。
**验收场景**:
1. **给定** 用户持有效用户名和密码,**当** 提交登录请求,**则** 系统返回会话凭证,且该凭证可用于后续请求
2. **给定** 用户已登录并持有效凭证,**当** 发起正常业务请求,**则** 会话有效期自动延长
3. **给定** 用户主动退出登录,**当** 使用旧凭证访问任意受保护接口,**则** 系统立即拒绝,返回未授权响应
4. **给定** 管理员禁用某用户账号,**当** 被禁用用户使用现有凭证访问接口,**则** 系统立即拒绝,不设任何宽限期
5. **给定** 用户使用错误密码,**当** 提交登录请求,**则** 系统返回认证失败,不泄露用户是否存在
---
### 用户故事 2 - 原始资料上传 (优先级: P1)
上传员将文本文件、图片或视频上传至平台,系统存储文件并记录元数据,视频文件额外触发异步预处理流程(帧提取或转文字)。
**优先级理由**: 资料上传是整条标注流水线的起点,没有资料则无法产生任何标注任务。
**独立测试**: 可独立测试:上传一个文本文件或图片,验证系统成功接收并记录;上传视频,验证系统创建预处理任务并开始异步处理。
**验收场景**:
1. **给定** 上传员已登录,**当** 上传一个文本文件,**则** 系统保存文件并创建资料记录,状态为"待提取"
2. **给定** 上传员已登录,**当** 上传一张图片,**则** 系统保存图片并创建资料记录,状态为"待提取"
3. **给定** 上传员已登录,**当** 上传一个视频文件,**则** 系统保存视频,创建预处理任务,资料状态变为"预处理中"
4. **给定** 视频预处理成功完成,**当** AI 服务回调成功,**则** 每帧(帧模式)或每段转译文本(片段模式)均作为独立资料进入标注队列,原视频状态变为"已完成"
5. **给定** 视频预处理因 AI 服务故障失败且已达最大重试次数,**当** 回调失败,**则** 任务标记为失败,管理员可查阅错误信息并手动重新触发
---
### 用户故事 3 - 提取阶段标注EXTRACTION (优先级: P1)
标注员从任务池中领取一个提取任务,借助 AI 辅助预标注对文本资料完成三元组标注或对图片资料完成四元组标注,提交后由审批员审核。
**优先级理由**: 提取阶段是双流水线的第一个生产阶段,直接产出结构化知识。
**独立测试**: 可独立测试:标注员领取任务,修改 AI 预标注结果,提交后验证任务进入"待审批"状态;同一任务被多人同时尝试领取,验证只有一人成功。
**验收场景**:
1. **给定** 存在未被领取的提取任务,**当** 标注员请求领取,**则** 任务归属到该标注员,状态变为"进行中"
2. **给定** 同一任务被 10 名标注员同时争抢,**当** 所有人同时发起领取请求,**则** 恰好一名标注员领取成功,其余人收到"任务已被他人领取"响应
3. **给定** 标注员已领取任务,**当** 请求 AI 辅助预标注,**则** 系统调用 AI 服务返回结构化候选结果(不直接提交,供人工编辑)
4. **给定** 标注员完成人工编辑,**当** 提交标注结果,**则** 任务状态变为"已提交",进入审批队列
5. **给定** 标注员领取任务后决定放弃,**当** 放弃任务,**则** 任务回到任务池,可被其他标注员重新领取
---
### 用户故事 4 - 提取阶段审批 (优先级: P1)
审批员查看提交的提取标注结果,选择通过或驳回。审批通过后系统自动创建问答生成任务;驳回时需填写驳回原因,标注员可重新领取该任务修改后再次提交。
**优先级理由**: 审批控制标注质量,是推进流水线到下一阶段的门控节点。
**独立测试**: 可独立测试:审批通过一个提取任务,验证系统自动创建 QA 生成任务;驳回一个任务,验证标注员可重领并修改。
**验收场景**:
1. **给定** 审批员进入待审批队列,**当** 查看列表,**则** 只看到状态为"已提交"的任务
2. **给定** 审批员查看某提取任务的标注结果,**当** 点击通过,**则** 标注结果标记为最终版,系统自动创建对应的问答生成任务并置于任务池中
3. **给定** 审批员本人提交了某提取任务,**当** 该审批员尝试审批自己提交的任务,**则** 系统拒绝,提示不允许自审
4. **给定** 审批员认为标注结果不合格,**当** 附带驳回原因并驳回,**则** 任务状态变为"已驳回",标注员可在我的任务列表中看到该任务及原因
5. **给定** 标注员查看被驳回的任务,**当** 重新领取并修改后提交,**则** 任务重新进入审批队列
---
### 用户故事 5 - 问答生成阶段标注与审批QA_GENERATION (优先级: P2)
标注员领取问答生成任务,在 AI 候选问答对基础上完成人工编辑,提交后由审批员审批。审批通过即写入训练样本库;驳回则退回标注员修改。
**优先级理由**: QA 阶段是流水线的最后生产阶段,直接决定训练样本质量。
**独立测试**: 可独立测试:领取 QA 任务,修改候选问答对并提交;审批员通过后,验证训练样本库中出现对应记录。
**验收场景**:
1. **给定** 存在由提取阶段审批通过自动创建的问答生成任务,**当** 标注员进入任务池,**则** 可以看到并领取该任务
2. **给定** 标注员已领取问答生成任务,**当** 整体提交修改后的问答对列表,**则** 任务进入审批队列(每次提交均为完整列表替换,不允许部分追加)
3. **给定** 审批员通过问答生成任务,**当** 审批完成,**则** 对应训练样本状态变为"已审批",整条资料流水线标记为完成
4. **给定** 审批员驳回问答生成任务,**当** 驳回完成,**则** 候选问答对记录被清除,标注员可重领任务重新生成
---
### 用户故事 6 - 训练数据导出与微调提交 (优先级: P2)
管理员从已审批的训练样本中选择一批次,导出为 GLM 微调格式的 JSONL 文件,并可选择一键提交至 GLM 微调服务。
**优先级理由**: 导出是将标注成果转化为 AI 训练价值的最终步骤。
**独立测试**: 可独立测试:选择若干已审批样本创建导出批次,验证生成 JSONL 文件;将批次提交微调服务,验证可查询到微调任务状态。
**验收场景**:
1. **给定** 管理员查看样本库,**当** 筛选已审批样本,**则** 只返回状态为"已审批"的样本(分页,不可无界查询)
2. **给定** 管理员选择若干已审批样本,**当** 创建导出批次,**则** 系统生成 JSONL 文件并存储,返回批次标识;若任意样本不处于已审批状态则整批失败
3. **给定** 导出批次已创建,**当** 管理员提交微调任务,**则** 系统向 AI 服务发起微调请求,记录微调任务标识,状态变为"进行中"
4. **给定** 微调任务已提交,**当** 管理员查询状态,**则** 返回最新的微调进度信息
---
### 用户故事 7 - 用户与权限管理 (优先级: P2)
管理员管理本公司用户,包括创建用户、分配角色、启用/禁用账号。角色变更和账号禁用在保存后立即生效,无延迟窗口。
**优先级理由**: 人员和权限管理是平台运营的基础管控能力。
**独立测试**: 可独立测试:创建一个标注员角色用户,验证该用户可以领取任务但无法执行审批;将其角色升为审批员,立即验证可以审批;禁用该用户,验证其现有会话立即失效。
**验收场景**:
1. **给定** 管理员创建一个新用户并分配角色,**当** 新用户登录,**则** 该用户拥有该角色对应的权限(高级角色自动包含低级角色权限)
2. **给定** 管理员将用户角色从标注员升为审批员,**当** 角色变更保存后,**则** 该用户无需重新登录即可使用审批功能
3. **给定** 管理员禁用某用户账号,**当** 被禁用用户下次发起请求,**则** 系统立即返回拒绝响应,不设过渡期
4. **给定** 管理员查询用户列表,**当** 获取结果,**则** 仅返回本公司用户,不可看到其他公司用户数据
---
### 用户故事 8 - 系统配置管理 (优先级: P3)
管理员维护 AI Prompt 模板、模型参数、Token 有效期等系统配置项,支持公司级配置覆盖全局默认值。
**优先级理由**: 配置管理是运营支撑能力,可在系统运行后按需调整,不影响核心标注流程。
**独立测试**: 可独立测试:修改某公司的 Prompt 模板配置,验证该公司后续标注使用新模板,其他公司仍使用全局默认值。
**验收场景**:
1. **给定** 管理员查看配置列表,**当** 获取结果,**则** 同时展示本公司专属配置和全局默认配置,公司专属配置对同一 Key 优先
2. **给定** 管理员更新某配置项,**当** 保存成功,**则** 后续相关操作立即使用新配置值
3. **给定** 某配置项仅有全局默认值无公司级覆盖,**当** 系统查询该配置,**则** 返回全局默认值
---
### 边界情况
- 标注员领取任务后长时间未操作——管理员可强制转移任务给其他标注员(状态保持"进行中",持有人变更)
- 视频预处理回调因网络抖动发生重复投递——系统对同一任务的重复成功回调静默忽略,不重复创建标注任务
- 某租户上传量极大时的无界查询——所有列表接口强制分页,无法绕过分页限制获取全量数据
- 审批员同时兼任标注员角色时尝试自审——系统按提交者身份校验,自审请求被拒绝
- 跨公司数据访问尝试——每次数据查询自动注入当前用户所属公司标识,无法通过参数篡改访问其他公司数据
- 操作日志写入失败——审计写入失败不影响业务操作,仅记录错误并触发告警
- 同一账号在多设备登录——每次登录生成独立会话凭证,互不影响;退出某设备仅使该设备凭证失效
---
## 需求说明 *(必填)*
### 功能性需求
**认证与会话**
- **FR-001**: 系统必须支持基于用户名和密码的登录认证,验证通过后返回会话凭证
- **FR-002**: 系统必须在每次有效请求时自动延长会话有效期(滑动过期)
- **FR-003**: 系统必须支持主动退出登录,退出后凭证立即失效
- **FR-004**: 系统必须在管理员禁用账号后立即使该账号所有有效凭证失效,不设任何宽限期
- **FR-005**: 系统必须拒绝无凭证或过期凭证的请求,返回未授权响应
**访问控制**
- **FR-006**: 系统必须实现四级角色体系:上传员 ⊂ 标注员 ⊂ 审批员 ⊂ 管理员,高级角色自动继承低级角色权限
- **FR-007**: 系统必须在接口层声明每个接口所需的最低角色,角色不足时拒绝访问
- **FR-008**: 系统必须在角色变更保存后立即生效,无需等待会话自然过期
**多租户数据隔离**
- **FR-009**: 系统必须保证每个公司的数据完全隔离,任何查询均只返回当前用户所属公司的数据
- **FR-010**: 系统必须禁止调用方通过请求参数指定公司标识来访问其他公司数据;公司标识必须从服务端会话中获取
- **FR-011**: 全局系统配置对所有公司可见,公司级配置对同一配置项优先覆盖全局值
**资料管理**
- **FR-012**: 系统必须支持文本、图片、视频三种原始资料的上传,文件内容存储至对象存储服务,数据库只保存元数据和存储路径
- **FR-013**: 视频上传后必须触发异步预处理任务,不阻塞上传响应
- **FR-014**: 系统必须支持视频帧提取模式(每帧作为独立图片进入图片标注流水线)和视频片段转文本模式(派生文本资料进入文本标注流水线)
- **FR-015**: 视频片段转文本产生的派生资料必须记录对原始视频资料的引用,可追溯来源
**任务管理**
- **FR-016**: 系统必须支持并发安全的任务领取机制,确保同一任务不会被两名标注员同时持有
- **FR-017**: 系统必须支持任务放弃(退回任务池)和管理员强制转移任务归属
- **FR-018**: 每次任务状态变更必须记录历史快照(含操作人、操作时间、驳回原因等),不可修改或删除历史记录
- **FR-019**: 所有任务列表接口必须强制分页,不允许无界查询
**提取阶段标注工作台**
- **FR-020**: 系统必须调用 AI 服务生成候选提取结果供标注员参考编辑,不直接写入最终结果
- **FR-021**: 标注员提交的提取结果以整体替换方式存储,禁止局部追加修改
- **FR-022**: 审批员审批通过时,系统必须在同一操作中将提取结果标记为最终版并自动创建问答生成任务,该级联操作不得由前端发起独立请求触发
- **FR-023**: 系统必须拒绝提交者本人审批或驳回自己提交的任务(禁止自审)
- **FR-024**: 审批驳回时,标注员必须可以看到被驳回任务及驳回原因,并可重新领取修改后再次提交
**问答生成阶段**
- **FR-025**: 问答生成任务的标注结果采用整体替换,每次提交包含完整问答对列表
- **FR-026**: 问答生成阶段审批通过时,对应训练样本必须写入训练样本库,资料状态标记为"已完成"
- **FR-027**: 问答生成阶段审批驳回时,候选问答对记录必须被清除,标注员可重领任务重新生成
**训练数据导出**
- **FR-028**: 系统必须支持将已审批的训练样本批量导出为 GLM 微调格式,每条样本一行
- **FR-029**: 导出时若任意选定样本不处于已审批状态,整批导出请求必须失败
- **FR-030**: 系统必须支持将导出批次提交至外部 AI 微调服务,并可追踪微调任务进度
**审计日志**
- **FR-031**: 系统必须对所有状态变更操作自动记录审计日志包含操作人姓名快照、操作类型、结果、IP 地址等信息
- **FR-032**: 审计日志只追加不修改,禁止对审计记录执行更新或删除
- **FR-033**: 审计日志写入失败不得导致业务操作失败或回滚
**视频异步处理**
- **FR-034**: 视频预处理任务必须支持自动重试,达到最大重试次数后置为失败状态,需管理员手动重新触发
- **FR-035**: AI 服务对同一视频处理任务的重复成功回调必须被幂等处理,不得重复创建标注任务
### 核心实体
- **公司Company**: 多租户根节点,每个公司拥有独立的用户、资料和任务数据空间
- **用户User**: 属于某公司,拥有角色(上传员/标注员/审批员/管理员),通过会话凭证访问系统
- **原始资料SourceData**: 待标注的文件(文本/图片/视频拥有状态流转待处理→提取中→QA审核中→已完成视频派生资料通过父资料引用保留溯源链
- **标注任务AnnotationTask**: 标注工作单元,分提取阶段和问答生成阶段,拥有领取、提交、审批、驳回完整生命周期
- **标注结果AnnotationResult**: 提取阶段的结构化输出(三元组或四元组),以整体 JSON 存储
- **训练样本TrainingDataset**: 经审批的问答对GLM 微调格式,待导出
- **导出批次ExportBatch**: 一批训练样本的导出记录,关联外部微调任务标识
- **视频处理任务VideoProcessJob**: 视频预处理的异步任务跟踪,包含重试计数和最终输出路径
- **系统配置SysConfig**: 配置键值对,分全局默认和公司级两层,公司级优先
---
## 成功标准 *(必填)*
### 可度量结果
- **SC-001**: 同一标注任务被多人同时争抢时,有且仅有一人领取成功,其余人立即收到明确的"已被领取"响应,成功率 100%,无数据竞争导致的双重持有
- **SC-002**: 管理员禁用账号或变更角色后,该账号的权限变更在下一次请求时立即生效(延迟小于 1 秒)
- **SC-003**: 提取阶段审批通过时,问答生成任务在同一次操作中自动出现在任务池,无需任何人工干预步骤
- **SC-004**: 视频预处理回调的重复投递(同一任务多次成功回调)不产生重复标注任务,幂等处理成功率 100%
- **SC-005**: 跨公司数据访问尝试 100% 被系统拒绝,无任何数据泄露至非所属租户
- **SC-006**: 审计日志对所有状态变更操作的覆盖率达到 100%,审计写入失败不影响业务成功率
- **SC-007**: 所有列表接口在数据量增长时保持稳定响应,用户无法绕过分页限制一次性获取不受限制数量的记录
- **SC-008**: 标注员完成一次任务领取→标注→提交的完整操作流程(不含 AI 辅助预标注等待时间)可在 5 分钟内完成
- **SC-009**: 从资料上传到训练样本进入样本库的完整流水线(含两次人工标注和两次审批)中,每个节点的操作人、时间、结果均可查询追溯
---
## 假设与前提
- 系统服务于多个公司,每家公司的用户、资料和标注数据完全独立,不存在跨公司协作场景
- 每位用户在同一时刻只属于一家公司,不存在用户跨公司兼职的场景
- 视频预处理(帧提取、转文字)由外部 AI 服务异步完成,后端只负责触发和回调处理
- 微调结果的质量评估不在本平台范围内,平台只负责提交微调任务并查询状态
- 前端应用已独立开发,本规格仅覆盖后端 API 能力
- 所有文件二进制内容存储在兼容 S3 协议的对象存储服务中,不存入关系型数据库
- 生产环境使用容器化部署,后端服务、数据库、缓存、对象存储均为独立容器
- AI 服务通过 HTTP 提供结构化的提取和问答生成能力,后端不内嵌 AI 模型
- 标注流水线中一条资料同一时间只有一个活跃的提取任务或问答生成任务,不支持并行多版本标注
- 审计日志的长期归档(超过月分区范围)由数据库运维团队负责,不在本系统范围内

View File

@@ -0,0 +1,310 @@
# 任务清单label_backend 知识图谱智能标注平台
**输入**: `/specs/001-label-backend-spec/` 全部设计文档
**前置条件**: plan.md ✅ | spec.md ✅ | research.md ✅ | data-model.md ✅ | contracts/ ✅ | quickstart.md ✅
## 格式说明
- **[P]**: 可并行执行(不同文件,无未完成任务的依赖)
- **[USn]**: 对应 spec.md 中的用户故事编号
- 每条任务包含精确的文件路径
---
## Phase 1: 项目初始化
**目标**: 创建 Maven 项目骨架、基础配置和 Docker 环境
- [ ] T001 创建 Maven 项目骨架(`com.label` GroupId`label-backend` ArtifactIdJava 17 编译目标)
- [ ] T002 配置 `pom.xml`Spring Boot 3、Apache Shiro 1.13.x、MyBatis Plus 3.5.x、Spring Data Redis、AWS S3 SDK v2、Testcontainers、Lombok
- [ ] T003 [P] 创建 `sql/init.sql`(按依赖顺序建全部 11 张表sys_company → sys_user → source_data → annotation_task → annotation_result → training_dataset → export_batch → sys_config → sys_operation_log → annotation_task_history → video_process_job含所有索引和初始配置数据
- [ ] T004 [P] 创建 `docker-compose.yml`postgres、redis、rustfs、backend、ai-service、frontend 六个服务,含健康检查)和后端 `Dockerfile`eclipse-temurin:17-jre-alpine
- [ ] T005 创建 `src/main/resources/application.yml`数据源、Redis、RustFS、AI 服务 base-url、Shiro 相关配置项)
**检查点**: Maven 编译通过(`mvn compile`Docker Compose `up -d` 全部服务健康
---
## Phase 2: 公共基础设施(阻塞性前置条件)
**目标**: 所有业务模块依赖的公共组件。**必须全部完成后用户故事阶段才能开始**
**⚠️ 重要**: 此阶段未完成前任何用户故事均不可开始实现
- [ ] T006 创建 `Result<T>``ResultCode``PageResult<T>``src/main/java/com/label/common/result/`(统一响应格式:`{"code":"SUCCESS","data":{...}}`
- [ ] T007 [P] 创建 `BusinessException`(含 `code``message``httpStatus`)和 `GlobalExceptionHandler``@RestControllerAdvice`)— `src/main/java/com/label/common/exception/`
- [ ] T008 [P] 创建 `CompanyContext`ThreadLocal`set/get/clear` 三个方法clear 必须在 finally 块调用)— `src/main/java/com/label/common/context/CompanyContext.java`
- [ ] T009 创建 `RedisKeyManager`(三个静态方法:`tokenKey``userPermKey``taskClaimKey`)和 `RedisService``src/main/java/com/label/common/redis/`
- [ ] T010 创建 MyBatis Plus 配置类 `MybatisPlusConfig`,注册 `TenantLineInnerInterceptor`(从 `CompanyContext` 获取 `companyId` 自动注入 WHERE 子句;`sys_company``sys_config` 加入忽略表列表)— `src/main/java/com/label/common/config/MybatisPlusConfig.java`
- [ ] T011 创建 `StateValidator``assertTransition` 泛型方法,违规时抛出 `BusinessException("INVALID_STATE_TRANSITION",...)`)— `src/main/java/com/label/common/statemachine/StateValidator.java`
- [ ] T012 [P] 创建 `SourceStatus` 枚举PENDING/PREPROCESSING/EXTRACTING/QA_REVIEW/APPROVED含 TRANSITIONS Map`src/main/java/com/label/common/statemachine/SourceStatus.java`
- [ ] T013 [P] 创建 `TaskStatus` 枚举UNCLAIMED/IN_PROGRESS/SUBMITTED/APPROVED/REJECTED含 TRANSITIONS Map含 IN_PROGRESS→IN_PROGRESS 用于 ADMIN 强制转移)— `src/main/java/com/label/common/statemachine/TaskStatus.java`
- [ ] T014 [P] 创建 `DatasetStatus` 枚举PENDING_REVIEW/APPROVED/REJECTED含 TRANSITIONS Map`src/main/java/com/label/common/statemachine/DatasetStatus.java`
- [ ] T015 [P] 创建 `VideoJobStatus` 枚举PENDING/RUNNING/SUCCESS/FAILED/RETRYING含 TRANSITIONS Map注释说明 FAILED→PENDING 由 ADMIN 手动触发)— `src/main/java/com/label/common/statemachine/VideoJobStatus.java`
- [ ] T016 创建 `@OperationLog` 注解(`type``targetType` 两个属性,`@Around` 级别)— `src/main/java/com/label/common/aop/OperationLog.java`
- [ ] T017 创建 `AuditAspect``@Around("@annotation(operationLog)")`,在 finally 块以独立操作写入 `sys_operation_log`;审计写入失败只记录 error 日志,禁止抛出异常回滚业务)— `src/main/java/com/label/common/aop/AuditAspect.java`
- [ ] T018 [P] 创建 `RustFsClient`AWS S3 SDK v2 封装endpoint 指向 RustFS实现 `upload``download``delete``getPresignedUrl`)— `src/main/java/com/label/common/storage/RustFsClient.java`
- [ ] T019 [P] 创建 `AiServiceClient``RestClient` 封装8 个端点:`extractText``extractImage``extractFrames``videoToText``genTextQa``genImageQa``startFinetune``getFinetuneStatus`)— `src/main/java/com/label/common/ai/AiServiceClient.java`
- [ ] T020 创建 Shiro 三件套:`TokenFilter`(解析 `Authorization: Bearer {uuid}`,查 Redis `token:{uuid}`,注入 `CompanyContext`,请求结束 finally 清理 ThreadLocal`UserRealm`(先查 Redis `user:perm:{userId}` TTL 5min未命中查 PG`addInheritedRoles`)、`ShiroConfig`(过滤器链:`/api/auth/login``anon``/api/**``tokenFilter`)— `src/main/java/com/label/common/shiro/`
- [ ] T021 创建 `AbstractIntegrationTest`Testcontainers启动真实 PostgreSQL + Redis 容器,执行 sql/init.sql注入测试用的公司和用户数据`src/test/java/com/label/AbstractIntegrationTest.java`
- [ ] T022 集成测试:`ShiroFilterIntegrationTest`(无 Token → 401有效 Token 但角色不足 → 403有效 Token 且角色满足 → 200`src/test/java/com/label/integration/ShiroFilterIntegrationTest.java`
- [ ] T023 单元测试:`StateMachineTest`(验证所有枚举的合法转换通过;非法转换抛出 `BusinessException("INVALID_STATE_TRANSITION")`)— `src/test/java/com/label/unit/StateMachineTest.java`
**检查点**: 基础设施就绪,所有 Phase 3+ 的用户故事可并行开始
---
## Phase 3: 用户故事 1 — 用户登录与身份认证(优先级: P1🎯 MVP
**目标**: 用户可以用用户名和密码登录,获得会话凭证,使用凭证访问受保护接口,退出后凭证立即失效
**独立测试**: 登录 → 获取 Token → 访问 `/api/auth/me` 返回用户信息 → 退出 → 再次访问返回 401
- [ ] T024 [P] [US1] 创建 `SysCompany` 实体MyBatis Plus `@TableName`)和 `SysCompanyMapper``src/main/java/com/label/module/user/entity/SysCompany.java` + `mapper/SysCompanyMapper.java`
- [ ] T025 [P] [US1] 创建 `SysUser` 实体(`passwordHash` 字段加 `@JsonIgnore`)和 `SysUserMapper`(含 `selectByCompanyAndUsername` 方法)— `src/main/java/com/label/module/user/entity/SysUser.java` + `mapper/SysUserMapper.java`
- [ ] T026 [US1] 实现 `AuthService``login()`BCrypt 校验密码 → UUID v4 Token → Redis Hash 存储 userId/role/companyId/username → 设置 TTL = `token_ttl_seconds` 配置值);`logout()`(删除 Redis Token Key`src/main/java/com/label/module/user/service/AuthService.java`
- [ ] T027 [US1] 实现 `AuthController``POST /api/auth/login``anon`,调用 `AuthService.login()`)、`POST /api/auth/logout`(已登录)、`GET /api/auth/me`(返回当前用户信息);所有响应用 `Result<T>` 包装 — `src/main/java/com/label/module/user/controller/AuthController.java`
- [ ] T028 [US1] 集成测试:正确密码登录返回 TokenToken 有效时 `/api/auth/me` 返回 200主动退出后再访问返回 401错误密码登录返回 401 — `src/test/java/com/label/integration/AuthIntegrationTest.java`
**检查点**: US1 独立可测试 — 登录/退出流程完整可用
---
## Phase 4: 用户故事 2 — 原始资料上传(优先级: P1
**目标**: 上传员可以上传文本/图片/视频,查询自己的资料列表;管理员可查看全公司资料
**独立测试**: 上传文本文件 → 列表查到 → 详情含预签名 URL → 管理员可删除
- [ ] T029 [P] [US2] 创建 `SourceData` 实体(含 `parentSourceId` 自引用字段)和 `SourceDataMapper`(含 `updateStatus` 方法)— `src/main/java/com/label/module/source/entity/SourceData.java` + `mapper/SourceDataMapper.java`
- [ ] T030 [US2] 实现 `SourceService``upload()`(先 insert 获取 ID → 构造路径 → 上传 RustFS → 更新 filePath`list()`UPLOADER 按 `uploaderId` 过滤ADMIN 不过滤,强制分页);`findById()`(含 15 分钟预签名 URL`delete()`(仅 PENDING 状态可删,同步删 RustFS 文件)— `src/main/java/com/label/module/source/service/SourceService.java`
- [ ] T031 [US2] 实现 `SourceController``POST /api/source/upload``GET /api/source/list``GET /api/source/{id}``DELETE /api/source/{id}``@RequiresRoles` 注解声明权限;所有响应 `Result<T>` 包装)— `src/main/java/com/label/module/source/controller/SourceController.java`
- [ ] T032 [US2] 集成测试UPLOADER 上传文本/图片 → 列表仅返回自己的资料ADMIN 查看列表返回全部;上传视频 → source_data 状态为 PENDING视频预处理 Phase 9 覆盖);已进入流水线的资料删除返回 409 — `src/test/java/com/label/integration/SourceIntegrationTest.java`
**检查点**: US2 独立可测试 — 上传/查询/删除流程完整可用
---
## Phase 5: 用户故事 3+4 — 提取阶段标注与审批(优先级: P1
**目标**: 标注员可以领取任务并发安全、AI 辅助预标注、编辑并提交;审批员可以通过(自动触发 QA 任务)或驳回(标注员可重领)
**独立测试**: 创建任务 → 标注员领取 → AI 预标注 → 提交 → 审批通过 → QA 任务自动出现在任务池
### 实体与数据层
- [ ] T033 [P] [US3] 创建 `AnnotationTask` 实体 + `AnnotationTaskMapper`(含 `claimTask(taskId, userId, companyId)` 方法SQL`UPDATE ... SET status='IN_PROGRESS', claimed_by=?, claimed_at=NOW() WHERE id=? AND status='UNCLAIMED' AND company_id=?`,返回影响行数)— `src/main/java/com/label/module/task/entity/AnnotationTask.java` + `mapper/AnnotationTaskMapper.java`
- [ ] T034 [P] [US3] 创建 `AnnotationTaskHistory` 实体 + `TaskHistoryMapper``src/main/java/com/label/module/task/entity/AnnotationTaskHistory.java` + `mapper/TaskHistoryMapper.java`
- [ ] T035 [P] [US3] 创建 `AnnotationResult` 实体 + `AnnotationResultMapper`(含 `updateResultJson` 整体覆盖方法和 `selectByTaskId` 方法)— `src/main/java/com/label/module/annotation/entity/AnnotationResult.java` + `mapper/AnnotationResultMapper.java`
### 任务管理服务与控制器
- [ ] T036 [US3] 实现 `TaskClaimService.claim()`(① Redis `SET NX task:claim:{taskId}` TTL 30s失败抛 `TASK_CLAIMED`;② DB `claimTask()` 影响行数为 0 时抛 `TASK_CLAIMED`;③ `insertHistory(UNCLAIMED→IN_PROGRESS)`)和 `unclaim()`StateValidator + 清 Redis 锁 + 历史)和 `reclaim()`(校验 REJECTED + claimedBy = 当前用户 + REJECTED→IN_PROGRESS + 历史)— `src/main/java/com/label/module/task/service/TaskClaimService.java`
- [ ] T037 [US3] 实现 `TaskService``createTask``getPool`按角色过滤ANNOTATOR→UNCLAIMED/EXTRACTIONREVIEWER→SUBMITTED`getMine`(含 IN_PROGRESS/SUBMITTED/REJECTED`getPendingReview`SUBMITTED分页`getById``reassign`ADMIN仅更新 claimedBy + 历史))— `src/main/java/com/label/module/task/service/TaskService.java`
- [ ] T038 [US3] 实现 `TaskController`10 个端点:`POST /api/tasks``GET /api/tasks/pool``POST /api/tasks/{id}/claim``POST /api/tasks/{id}/unclaim``GET /api/tasks/mine``POST /api/tasks/{id}/reclaim``GET /api/tasks/pending-review``GET /api/tasks/{id}``GET /api/tasks``PUT /api/tasks/{id}/reassign`)— `src/main/java/com/label/module/task/controller/TaskController.java`
### 提取标注服务与控制器
- [ ] T039 [US3] 实现 `ExtractionService.aiPreAnnotate()`(调用 `AiServiceClient.extractText/extractImage`,写入 `annotation_result`)和 `updateResult()`(整体覆盖 `result_json`,校验 JSON 格式)— `src/main/java/com/label/module/annotation/service/ExtractionService.java`
- [ ] T040 [US3] 实现 `ExtractionService.submit()``@Transactional`IN_PROGRESS→SUBMITTED + `submitted_at` + insertHistory`src/main/java/com/label/module/annotation/service/ExtractionService.java`
- [ ] T041 [US4] 创建 `ExtractionApprovedEvent`(携带 `taskId``sourceId``sourceType``companyId`)— `src/main/java/com/label/module/annotation/event/ExtractionApprovedEvent.java`
- [ ] T042 [US4] 实现 `ExtractionService.approve()``@Transactional`:① 自审校验;② `is_final=true`;③ SUBMITTED→APPROVED + `completedAt` + 历史;④ `publishEvent(ExtractionApprovedEvent)`AI 调用禁止在此事务内执行)— `src/main/java/com/label/module/annotation/service/ExtractionService.java`
- [ ] T043 [US4] 实现 `ExtractionApprovedEventListener``@TransactionalEventListener(AFTER_COMMIT)` + `@Transactional(REQUIRES_NEW)`:调用 AI 生成候选问答对 → 写 `training_dataset`PENDING_REVIEW→ 创建 QA_GENERATION 任务UNCLAIMED`source_data` 状态→ QA_REVIEW`src/main/java/com/label/module/annotation/service/ExtractionApprovedEventListener.java`
- [ ] T044 [US4] 实现 `ExtractionService.reject()``@Transactional`:① 自审校验;② StateValidator③ SUBMITTED→REJECTED + 历史)— `src/main/java/com/label/module/annotation/service/ExtractionService.java`
- [ ] T045 [US4] 实现 `ExtractionController`5 个端点:`GET /api/extraction/{taskId}``PUT /api/extraction/{taskId}``POST /api/extraction/{taskId}/submit``POST /api/extraction/{taskId}/approve``POST /api/extraction/{taskId}/reject`)— `src/main/java/com/label/module/annotation/controller/ExtractionController.java`
### 集成测试
- [ ] T046 [US3] 并发集成测试10 个线程同时争抢同一 UNCLAIMED 任务,验证恰好 1 人成功、其余均收到 `TASK_CLAIMED` 错误、DB 中 `claimed_by` 唯一 — `src/test/java/com/label/integration/TaskClaimConcurrencyTest.java`
- [ ] T047 [US4] 集成测试:审批通过 → QA 任务自动出现在任务池;自审返回 `SELF_REVIEW_FORBIDDEN` 403驳回后标注员可重领并再次提交 — `src/test/java/com/label/integration/ExtractionApprovalIntegrationTest.java`
**检查点**: US3+US4 独立可测试 — 完整提取流水线领取→标注→提交→审批→QA任务自动创建可用
---
## Phase 6: 用户故事 5 — 问答生成阶段标注与审批(优先级: P2
**目标**: 标注员领取 QA 任务、修改候选问答对并提交;审批员通过后训练样本入库,整条流水线完成
**独立测试**: 领取 QA 任务 → 修改问答对 → 提交 → 审批通过 → training_dataset 状态 APPROVEDsource_data 状态 APPROVED
- [ ] T048 [P] [US5] 创建 `TrainingDataset` 实体 + `TrainingDatasetMapper`(含 `approveByTaskId``deleteByTaskId` 方法)— `src/main/java/com/label/module/annotation/entity/TrainingDataset.java` + `mapper/TrainingDatasetMapper.java`
- [ ] T049 [US5] 实现 `QaService.updateResult()`(整体覆盖问答对 JSONB`submit()``@Transactional`IN_PROGRESS→SUBMITTED + 历史)— `src/main/java/com/label/module/annotation/service/QaService.java`
- [ ] T050 [US5] 实现 `QaService.approve()``@Transactional`:① `validateAndGetTask` 先于一切 DB 写入;② 自审校验;③ `training_dataset` → APPROVED`annotation_task` → APPROVED + 历史;⑤ `source_data` → APPROVED`src/main/java/com/label/module/annotation/service/QaService.java`
- [ ] T051 [US5] 实现 `QaService.reject()``@Transactional`:① 自审校验;② `deleteByTaskId` 清除候选问答对;③ SUBMITTED→REJECTED + 历史;④ `source_data` 保持 QA_REVIEW 不变)— `src/main/java/com/label/module/annotation/service/QaService.java`
- [ ] T052 [US5] 实现 `QaController`5 个端点:`GET /api/qa/{taskId}``PUT /api/qa/{taskId}``POST /api/qa/{taskId}/submit``POST /api/qa/{taskId}/approve``POST /api/qa/{taskId}/reject`)— `src/main/java/com/label/module/annotation/controller/QaController.java`
- [ ] T053 [US5] 集成测试QA 审批通过 → `training_dataset.status = APPROVED``source_data.status = APPROVED`QA 驳回 → 候选记录被删除,标注员可重领 — `src/test/java/com/label/integration/QaApprovalIntegrationTest.java`
**检查点**: US5 独立可测试 — 完整 QA 流水线可用training_dataset 产出验证通过
---
## Phase 7: 用户故事 6 — 训练数据导出与微调提交(优先级: P2
**目标**: 管理员将已审批样本批量导出为 JSONL并可提交 GLM 微调任务
**独立测试**: 选取已审批样本 → 创建批次 → RustFS 中存在 JSONL 文件 → 提交微调 → 可查询状态
- [ ] T054 [P] [US6] 创建 `ExportBatch` 实体 + `ExportBatchMapper``src/main/java/com/label/module/export/entity/ExportBatch.java` + `mapper/ExportBatchMapper.java`
- [ ] T055 [US6] 实现 `ExportService.createBatch()``@Transactional`:① 校验全部样本为 APPROVED② 生成 JSONL每行一个 `glm_format_json`);③ 上传 RustFS `finetune-export/export/{batchUuid}.jsonl`;④ 批量更新 `export_batch_id`/`exported_at`;⑤ 插入 `export_batch` 记录)— `src/main/java/com/label/module/export/service/ExportService.java`
- [ ] T056 [US6] 实现 `FinetuneService``trigger()`(调用 `AiServiceClient.startFinetune()`,更新 `glm_job_id``finetune_status = RUNNING`)和 `getStatus()`(调用 `AiServiceClient.getFinetuneStatus()`)— `src/main/java/com/label/module/export/service/FinetuneService.java`
- [ ] T057 [US6] 实现 `ExportController``GET /api/training/samples``POST /api/export/batch``POST /api/export/{batchId}/finetune``GET /api/export/{batchId}/status``GET /api/export/list`;全部 `@RequiresRoles("ADMIN")`)— `src/main/java/com/label/module/export/controller/ExportController.java`
- [ ] T058 [US6] 集成测试:成功创建批次后 JSONL 文件存在于 RustFS包含非 APPROVED 样本时返回 `INVALID_SAMPLES` 400 — `src/test/java/com/label/integration/ExportIntegrationTest.java`
**检查点**: US6 独立可测试 — 导出批次创建和微调提交流程可用
---
## Phase 8: 用户故事 7 — 用户与权限管理(优先级: P2
**目标**: 管理员可以创建用户、变更角色(立即生效)、禁用账号(立即失效)
**独立测试**: 创建标注员用户 → 验证其能领取任务 → 升为审批员 → 验证立即可以审批 → 禁用账号 → 已有 Token 立即失效
- [ ] T059 [US7] 实现 `UserService``createUser()`BCrypt 哈希密码,强度因子 ≥ 10`updateUser()``updateRole()`DB 写入后立即 `redisTemplate.delete(userPermKey(userId))``updateStatus()`(禁用时删 Redis Token + 权限缓存)— `src/main/java/com/label/module/user/service/UserService.java`
- [ ] T060 [US7] 实现 `UserController``GET /api/users``POST /api/users``PUT /api/users/{id}``PUT /api/users/{id}/status``PUT /api/users/{id}/role`;全部 `@RequiresRoles("ADMIN")`)— `src/main/java/com/label/module/user/controller/UserController.java`
- [ ] T061 [US7] 集成测试:变更角色后权限下一次请求立即生效(无需重新登录);禁用账号后现有 Token 下一次请求立即返回 401 — `src/test/java/com/label/integration/UserManagementIntegrationTest.java`
**检查点**: US7 独立可测试 — 用户管理和即时权限变更可用
---
## Phase 9: 用户故事 8 — 视频处理与系统配置(优先级: P3
**目标**: 上传视频后触发异步预处理(帧提取/转文字AI 回调幂等处理;管理员可配置 Prompt 模板等系统参数
**独立测试(视频)**: 上传视频 → 创建处理任务 → 模拟成功回调 → annotation_task 出现在任务池;重复成功回调 → 任务数量不增加
**独立测试(配置)**: 为公司设置专属 Prompt → 验证该公司使用新值;其他公司使用全局默认
- [ ] T062 [P] [US8] 创建 `VideoProcessJob` 实体 + `VideoProcessJobMapper``src/main/java/com/label/module/video/entity/VideoProcessJob.java` + `mapper/VideoProcessJobMapper.java`
- [ ] T063 [P] [US8] 创建 `SysConfig` 实体 + `SysConfigMapper`(含 `selectByCompanyAndKey(companyId, configKey)` 方法,支持 `companyId IS NULL` 查询)— `src/main/java/com/label/module/config/entity/SysConfig.java` + `mapper/SysConfigMapper.java`
- [ ] T064 [US8] 实现 `VideoProcessService``createJob()``@Transactional``source_data.status → PREPROCESSING` + 插入 job + 触发 AI 异步调用);`handleCallback()``@Transactional`:幂等检查 status==SUCCESS 则 return成功 → SUCCESS + `source_data.status → PENDING`;失败 → 按 retry_count 决定 RETRYING 或 FAILED`reset()`FAILED → PENDING`src/main/java/com/label/module/video/service/VideoProcessService.java`
- [ ] T065 [US8] 实现 `VideoController``POST /api/video/process``GET /api/video/jobs/{jobId}``POST /api/video/jobs/{jobId}/reset``POST /api/video/callback`内部接口IP 白名单或服务密钥保护))— `src/main/java/com/label/module/video/controller/VideoController.java`
- [ ] T066 [US8] 实现 `SysConfigService.get(configKey)`(先按 `(companyId, key)` 查;未命中按 `(NULL, key)` 查全局默认)和 `update(key, value)`UPSERT公司级配置不存在则创建存在则覆盖`src/main/java/com/label/module/config/service/SysConfigService.java`
- [ ] T067 [US8] 实现 `SysConfigController``GET /api/config`(合并公司级 + 全局,标注 scope`PUT /api/config/{key}`;均 `@RequiresRoles("ADMIN")`)— `src/main/java/com/label/module/config/controller/SysConfigController.java`
- [ ] T068 [US8] 集成测试:同一 jobId 两次成功回调,`annotation_task` 记录数为 1幂等达最大重试次数后 status = FAILED — `src/test/java/com/label/integration/VideoCallbackIdempotencyTest.java`
- [ ] T069 [US8] 集成测试:公司级配置覆盖同 Key 的全局默认;其他公司读取全局默认 — `src/test/java/com/label/integration/SysConfigIntegrationTest.java`
**检查点**: US8 独立可测试 — 视频处理幂等和配置管理可用
---
## Phase 10: 收尾与横切关注点
**目标**: 多租户隔离验证、整体合规检查、快速启动验证
- [ ] T070 集成测试:`MultiTenantIsolationTest`(公司 A 身份查询公司 B 的资料/任务 → 返回空列表或 404不泄露数据`src/test/java/com/label/integration/MultiTenantIsolationTest.java`
- [ ] T071 [P] 代码审查:检查所有 Controller 方法返回值均为 `Result<T>``Result<PageResult<T>>`,无裸 POJO 或裸 List 返回
- [ ] T072 [P] 代码审查:检查所有列表查询方法均含分页参数(`page`/`pageSize`),无 `selectAll()` 或不分页的查询
- [ ] T073 [P] 代码审查:检查 `sys_operation_log` 相关代码,确认应用层零处 UPDATE 或 DELETE
- [ ] T074 [P] 代码审查:检查所有 `@Transactional` 方法内无 `AiServiceClient` 的同步 HTTP 调用(审批触发 AI 必须通过 `@TransactionalEventListener`
- [ ] T075 运行 `quickstart.md` 端到端验证:`docker compose up -d` → 登录 → 上传文件 → 创建任务 → 领取 → 提交 → 审批通过 → 确认 QA 任务出现
---
## 依赖关系与执行顺序
### 阶段依赖
```
Phase 1初始化
Phase 2基础设施[全部完成后解锁所有用户故事]
Phase 3US1 认证) ← 可与 Phase 4/5/6/7/8/9 并行
Phase 4US2 上传) ← 依赖 Phase 2独立于其他用户故事
Phase 5US3+4 提取) ← 依赖 Phase 2上传已有资料的集成测试依赖 US2
Phase 6US5 QA ← 依赖 Phase 5 完成QA 任务由提取审批自动创建)
Phase 7US6 导出) ← 依赖 Phase 6 完成(需要 APPROVED 的 training_dataset
Phase 8US7 用户管理) ← 依赖 Phase 3UserService 在 AuthService 基础上扩展)
Phase 9US8 视频+配置) ← 依赖 Phase 2其余独立
Phase 10收尾
```
### 用户故事间依赖
- **US1认证**: 仅依赖 Phase 2完全独立
- **US2上传**: 仅依赖 Phase 2完全独立
- **US3+4提取**: 依赖 Phase 2集成测试中使用已上传资料需 US2
- **US5QA**: 依赖 US3+4QA 任务来源于提取阶段审批通过的级联触发)
- **US6导出**: 依赖 US5需要 APPROVED 状态的 training_dataset
- **US7用户管理**: 依赖 US1UserService 扩展 AuthService 的用户实体)
- **US8视频+配置)**: 仅依赖 Phase 2
### 阶段内并行机会
- Phase 2T007-T010、T012-T015、T018-T019 均可并行(独立文件)
- Phase 3T024、T025 可并行(独立文件)
- Phase 5T033、T034、T035 可并行(独立文件)
- Phase 9T062、T063 可并行(独立文件)
- Phase 10T071-T074 全部可并行(仅代码审查,无文件修改)
---
## 并行执行示例
### Phase 2 基础设施并行
```
同时启动:
任务: "创建 BusinessException、GlobalExceptionHandler — common/exception/" [T007]
任务: "创建 CompanyContextThreadLocal— common/context/" [T008]
任务: "创建 RustFsClient — common/storage/" [T018]
任务: "创建 AiServiceClient — common/ai/" [T019]
任务: "创建 SourceStatus 枚举" [T012]
任务: "创建 TaskStatus 枚举" [T013]
```
### Phase 5 提取阶段并行
```
同时启动(实体/Mapper
任务: "创建 AnnotationTask 实体 + Mapper" [T033]
任务: "创建 AnnotationTaskHistory 实体 + Mapper" [T034]
任务: "创建 AnnotationResult 实体 + Mapper" [T035]
```
---
## 实施策略
### MVP 优先(仅用户故事 1
1. 完成 Phase 1初始化
2. 完成 Phase 2基础设施**关键,阻塞所有故事**
3. 完成 Phase 3US1 认证)
4. **停止并验证**: 登录/退出/权限校验全流程可用
5. 可以独立部署演示认证功能
### 增量交付
1. Phase 1 + Phase 2 → 基础就绪
2. Phase 3US1→ 验证 → 演示MVP
3. Phase 4US2→ 验证 → 演示(上传功能)
4. Phase 5US3+4→ 验证 → 演示(标注流程)
5. Phase 6US5→ 验证 → 演示(完整双阶段流水线)
6. Phase 7US6→ 验证 → 演示(训练数据产出)
7. Phase 8+9 → 验证 → 演示(完整平台)
8. Phase 10 → 收尾
### 多人协作策略
Phase 2 完成后:
- 开发者 APhase 3US1 认证)+ Phase 8US7 用户管理)
- 开发者 BPhase 4US2 上传)+ Phase 5US3+4 提取)
- 开发者 CPhase 9US8 视频+配置)
Phase 5 完成后:
- 开发者 A/B 合力Phase 6US5 QA→ Phase 7US6 导出)
---
## 说明
- `[P]` 任务 = 不同文件,无依赖,可并行
- `[USn]` 标签将任务映射到具体用户故事,便于追踪
- 每个用户故事应独立可完成和可测试
- 每完成一个阶段后提交 git commit
- 在每个检查点停下来独立验证该用户故事
- 避免:模糊任务、同文件并发冲突、破坏独立性的跨故事依赖

332
sql/init.sql Normal file
View File

@@ -0,0 +1,332 @@
-- label_backend init.sql
-- PostgreSQL 14+
-- 按依赖顺序建全部 11 张表:
-- sys_company → sys_user → source_data → annotation_task → annotation_result
-- → training_dataset → export_batch → sys_config → sys_operation_log
-- → annotation_task_history → video_process_job
-- 含所有索引及初始配置数据
-- ============================================================
-- 扩展
-- ============================================================
CREATE EXTENSION IF NOT EXISTS pgcrypto;
-- ============================================================
-- 1. sys_company租户
-- ============================================================
CREATE TABLE IF NOT EXISTS sys_company (
id BIGSERIAL PRIMARY KEY,
company_name VARCHAR(100) NOT NULL,
company_code VARCHAR(50) NOT NULL,
status VARCHAR(10) NOT NULL DEFAULT 'ACTIVE', -- ACTIVE / DISABLED
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
CONSTRAINT uk_sys_company_name UNIQUE (company_name),
CONSTRAINT uk_sys_company_code UNIQUE (company_code)
);
-- ============================================================
-- 2. sys_user用户
-- ============================================================
CREATE TABLE IF NOT EXISTS sys_user (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
username VARCHAR(50) NOT NULL,
password_hash VARCHAR(255) NOT NULL, -- BCrypt, strength >= 10
real_name VARCHAR(50),
role VARCHAR(20) NOT NULL, -- UPLOADER / ANNOTATOR / REVIEWER / ADMIN
status VARCHAR(10) NOT NULL DEFAULT 'ACTIVE', -- ACTIVE / DISABLED
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
CONSTRAINT uk_sys_user_company_username UNIQUE (company_id, username)
);
CREATE INDEX IF NOT EXISTS idx_sys_user_company_id
ON sys_user (company_id);
-- ============================================================
-- 3. source_data原始资料
-- ============================================================
CREATE TABLE IF NOT EXISTS source_data (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
uploader_id BIGINT REFERENCES sys_user(id),
data_type VARCHAR(20) NOT NULL, -- TEXT / IMAGE / VIDEO
file_path VARCHAR(500) NOT NULL, -- RustFS object path
file_name VARCHAR(255) NOT NULL,
file_size BIGINT,
bucket_name VARCHAR(100) NOT NULL,
parent_source_id BIGINT REFERENCES source_data(id), -- 视频帧 / 文本片段
status VARCHAR(20) NOT NULL DEFAULT 'PENDING',
-- PENDING / PREPROCESSING / EXTRACTING / QA_REVIEW / APPROVED
reject_reason TEXT, -- 保留字段(当前无 REJECTED 状态)
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_source_data_company_id
ON source_data (company_id);
CREATE INDEX IF NOT EXISTS idx_source_data_company_status
ON source_data (company_id, status);
CREATE INDEX IF NOT EXISTS idx_source_data_parent_source_id
ON source_data (parent_source_id);
-- ============================================================
-- 4. annotation_task标注任务
-- ============================================================
CREATE TABLE IF NOT EXISTS annotation_task (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
source_id BIGINT NOT NULL REFERENCES source_data(id),
task_type VARCHAR(30) NOT NULL, -- EXTRACTION / QA_GENERATION
status VARCHAR(20) NOT NULL DEFAULT 'UNCLAIMED',
-- UNCLAIMED / IN_PROGRESS / SUBMITTED / APPROVED / REJECTED
claimed_by BIGINT REFERENCES sys_user(id),
claimed_at TIMESTAMP,
submitted_at TIMESTAMP,
completed_at TIMESTAMP,
is_final BOOLEAN NOT NULL DEFAULT FALSE, -- true 即 APPROVED 且无需再审
ai_model VARCHAR(50),
reject_reason TEXT,
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_annotation_task_company_status
ON annotation_task (company_id, status);
CREATE INDEX IF NOT EXISTS idx_annotation_task_source_id
ON annotation_task (source_id);
CREATE INDEX IF NOT EXISTS idx_annotation_task_claimed_by
ON annotation_task (claimed_by);
-- ============================================================
-- 5. annotation_result标注结果JSONB
-- ============================================================
CREATE TABLE IF NOT EXISTS annotation_result (
id BIGSERIAL NOT NULL,
task_id BIGINT NOT NULL REFERENCES annotation_task(id),
company_id BIGINT NOT NULL REFERENCES sys_company(id),
result_json JSONB NOT NULL DEFAULT '[]'::jsonb, -- 整体替换语义
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
CONSTRAINT pk_annotation_result PRIMARY KEY (id),
CONSTRAINT uk_annotation_result_task_id UNIQUE (task_id)
);
CREATE INDEX IF NOT EXISTS idx_annotation_result_task_id
ON annotation_result (task_id);
CREATE INDEX IF NOT EXISTS idx_annotation_result_company_id
ON annotation_result (company_id);
-- ============================================================
-- 6. training_dataset训练数据集
-- export_batch_id FK 在 export_batch 建完后补加
-- ============================================================
CREATE TABLE IF NOT EXISTS training_dataset (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
task_id BIGINT NOT NULL REFERENCES annotation_task(id),
source_id BIGINT NOT NULL REFERENCES source_data(id),
sample_type VARCHAR(20) NOT NULL, -- TEXT / IMAGE / VIDEO_FRAME
glm_format_json JSONB NOT NULL, -- GLM fine-tune 格式
status VARCHAR(20) NOT NULL DEFAULT 'PENDING_REVIEW',
-- PENDING_REVIEW / APPROVED / REJECTED
export_batch_id BIGINT, -- 导出后填写FK 在下方补加
exported_at TIMESTAMP,
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_training_dataset_company_status
ON training_dataset (company_id, status);
CREATE INDEX IF NOT EXISTS idx_training_dataset_task_id
ON training_dataset (task_id);
-- ============================================================
-- 7. export_batch导出批次
-- ============================================================
CREATE TABLE IF NOT EXISTS export_batch (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
batch_uuid UUID NOT NULL DEFAULT gen_random_uuid(),
sample_count INT NOT NULL DEFAULT 0,
dataset_file_path VARCHAR(500), -- 导出 JSONL 的 RustFS 路径
glm_job_id VARCHAR(100), -- GLM fine-tune 任务 ID
finetune_status VARCHAR(20) NOT NULL DEFAULT 'NOT_STARTED',
-- NOT_STARTED / RUNNING / COMPLETED / FAILED
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_export_batch_company_id
ON export_batch (company_id);
-- 补加 training_dataset.export_batch_id FK
ALTER TABLE training_dataset
ADD CONSTRAINT fk_training_dataset_export_batch
FOREIGN KEY (export_batch_id) REFERENCES export_batch(id)
NOT VALID; -- 允许已有 NULL 行,不强制回溯校验
-- ============================================================
-- 8. sys_config系统配置
-- ============================================================
CREATE TABLE IF NOT EXISTS sys_config (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT REFERENCES sys_company(id), -- NULL = 全局默认
config_key VARCHAR(100) NOT NULL,
config_value TEXT NOT NULL,
description VARCHAR(255),
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
-- 公司级配置唯一索引
CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_company_key
ON sys_config (company_id, config_key)
WHERE company_id IS NOT NULL;
-- 全局配置唯一索引
CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_global_key
ON sys_config (config_key)
WHERE company_id IS NULL;
CREATE INDEX IF NOT EXISTS idx_sys_config_company_key
ON sys_config (company_id, config_key);
-- ============================================================
-- 9. sys_operation_log操作日志仅追加
-- ============================================================
CREATE TABLE IF NOT EXISTS sys_operation_log (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
operator_id BIGINT REFERENCES sys_user(id),
operation_type VARCHAR(50) NOT NULL, -- 例如 EXTRACTION_APPROVE / USER_LOGIN
target_id BIGINT,
target_type VARCHAR(50),
detail JSONB,
result VARCHAR(10), -- SUCCESS / FAILURE
error_message TEXT,
operated_at TIMESTAMP NOT NULL DEFAULT NOW()
-- 无 updated_at仅追加表永不更新
);
CREATE INDEX IF NOT EXISTS idx_sys_operation_log_company_operated_at
ON sys_operation_log (company_id, operated_at);
CREATE INDEX IF NOT EXISTS idx_sys_operation_log_operator_id
ON sys_operation_log (operator_id);
-- ============================================================
-- 10. annotation_task_history任务状态历史仅追加
-- ============================================================
CREATE TABLE IF NOT EXISTS annotation_task_history (
id BIGSERIAL PRIMARY KEY,
task_id BIGINT NOT NULL REFERENCES annotation_task(id),
company_id BIGINT NOT NULL REFERENCES sys_company(id),
from_status VARCHAR(20),
to_status VARCHAR(20) NOT NULL,
operator_id BIGINT REFERENCES sys_user(id),
operator_role VARCHAR(20),
comment TEXT,
created_at TIMESTAMP NOT NULL DEFAULT NOW()
-- 无 updated_at仅追加表永不更新
);
CREATE INDEX IF NOT EXISTS idx_annotation_task_history_task_id
ON annotation_task_history (task_id);
CREATE INDEX IF NOT EXISTS idx_annotation_task_history_company_id
ON annotation_task_history (company_id);
-- ============================================================
-- 11. video_process_job视频处理作业
-- ============================================================
CREATE TABLE IF NOT EXISTS video_process_job (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
source_id BIGINT NOT NULL REFERENCES source_data(id),
job_type VARCHAR(30) NOT NULL, -- FRAME_EXTRACT / VIDEO_TO_TEXT
status VARCHAR(20) NOT NULL DEFAULT 'PENDING',
-- PENDING / RUNNING / SUCCESS / FAILED / RETRYING
params JSONB, -- 例如 {"frameInterval": 30, "mode": "FRAME"}
output_path VARCHAR(500), -- 完成后的 RustFS 输出路径
retry_count INT NOT NULL DEFAULT 0,
max_retries INT NOT NULL DEFAULT 3,
error_message TEXT,
started_at TIMESTAMP,
completed_at TIMESTAMP,
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_video_process_job_company_id
ON video_process_job (company_id);
CREATE INDEX IF NOT EXISTS idx_video_process_job_source_id
ON video_process_job (source_id);
CREATE INDEX IF NOT EXISTS idx_video_process_job_status
ON video_process_job (status);
-- ============================================================
-- 初始数据
-- ============================================================
-- 1. 演示公司
INSERT INTO sys_company (company_name, company_code, status)
VALUES ('演示公司', 'DEMO', 'ACTIVE')
ON CONFLICT DO NOTHING;
-- 2. 初始用户BCrypt strength=10
-- admin / admin123
-- reviewer01/ review123
-- annotator01/annot123
-- uploader01 / upload123
INSERT INTO sys_user (company_id, username, password_hash, real_name, role, status)
SELECT
c.id,
u.username,
u.password_hash,
u.real_name,
u.role,
'ACTIVE'
FROM sys_company c
CROSS JOIN (VALUES
('admin',
'$2a$10$B8iR5z43URiNPm.eut3JvufIPBuvGx5ZZmqyUqE1A1WdbZppX5bmi',
'管理员',
'ADMIN'),
('reviewer01',
'$2a$10$euOJZRfUtYNW7WHpfW1Ciee5b3rjkYFe3yQHT/uCQWrYVc0XQcukm',
'审核员01',
'REVIEWER'),
('annotator01',
'$2a$10$8UKwHPNASauKMTrqosR0Reg1X1gkFzFlGa/HBwNLXUELaj4e/zcqu',
'标注员01',
'ANNOTATOR'),
('uploader01',
'$2a$10$o2d7jsT31vyxIJHUo50mUefoZLLvGqft97zaL9OQCjRxn9ie1H/1O',
'上传员01',
'UPLOADER')
) AS u(username, password_hash, real_name, role)
WHERE c.company_code = 'DEMO'
ON CONFLICT (company_id, username) DO NOTHING;
-- 3. 全局系统配置
INSERT INTO sys_config (company_id, config_key, config_value, description)
VALUES
(NULL, 'token_ttl_seconds', '7200',
'会话凭证有效期(秒)'),
(NULL, 'model_default', 'glm-4',
'AI 辅助默认模型'),
(NULL, 'video_frame_interval', '30',
'视频帧提取间隔(帧数)'),
(NULL, 'prompt_extract_text',
'请提取以下文本中的主语-谓语-宾语三元组以JSON数组格式返回每个元素包含subject、predicate、object、sourceText、startOffset、endOffset字段。',
'文本三元组提取 Prompt 模板'),
(NULL, 'prompt_extract_image',
'请提取图片中的实体关系四元组以JSON数组格式返回每个元素包含subject、relation、object、modifier、confidence字段。',
'图片四元组提取 Prompt 模板'),
(NULL, 'prompt_qa_gen_text',
'根据以下文本三元组生成高质量问答对以JSON数组格式返回每个元素包含question、answer、difficulty字段。',
'文本问答生成 Prompt 模板'),
(NULL, 'prompt_qa_gen_image',
'根据以下图片四元组生成高质量问答对以JSON数组格式返回每个元素包含question、answer、imageRef、difficulty字段。',
'图片问答生成 Prompt 模板')
ON CONFLICT DO NOTHING;

View File

@@ -0,0 +1,12 @@
package com.label;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class LabelBackendApplication {
public static void main(String[] args) {
SpringApplication.run(LabelBackendApplication.class, args);
}
}

View File

@@ -0,0 +1,149 @@
package com.label.common.ai;
import lombok.Builder;
import lombok.Data;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import org.springframework.web.client.RestClient;
import jakarta.annotation.PostConstruct;
import java.util.List;
import java.util.Map;
@Component
public class AiServiceClient {
@Value("${ai-service.base-url}")
private String baseUrl;
@Value("${ai-service.timeout:30000}")
private int timeoutMs;
private RestClient restClient;
@PostConstruct
public void init() {
restClient = RestClient.builder()
.baseUrl(baseUrl)
.build();
}
// DTO classes
@Data
@Builder
public static class ExtractionRequest {
private Long sourceId;
private String filePath;
private String bucket;
private String model;
private String prompt;
}
@Data
public static class ExtractionResponse {
private List<Map<String, Object>> items; // triple/quadruple items
private String rawOutput;
}
@Data
@Builder
public static class VideoProcessRequest {
private Long sourceId;
private String filePath;
private String bucket;
private Map<String, Object> params; // frameInterval, mode etc.
}
@Data
public static class QaGenResponse {
private List<Map<String, Object>> qaPairs;
}
@Data
@Builder
public static class FinetuneRequest {
private String datasetPath; // RustFS path to JSONL file
private String model;
private Long batchId;
}
@Data
public static class FinetuneResponse {
private String jobId;
private String status;
}
@Data
public static class FinetuneStatusResponse {
private String jobId;
private String status; // PENDING/RUNNING/COMPLETED/FAILED
private Integer progress; // 0-100
private String errorMessage;
}
// The 8 endpoints:
public ExtractionResponse extractText(ExtractionRequest request) {
return restClient.post()
.uri("/extract/text")
.body(request)
.retrieve()
.body(ExtractionResponse.class);
}
public ExtractionResponse extractImage(ExtractionRequest request) {
return restClient.post()
.uri("/extract/image")
.body(request)
.retrieve()
.body(ExtractionResponse.class);
}
public void extractFrames(VideoProcessRequest request) {
restClient.post()
.uri("/video/extract-frames")
.body(request)
.retrieve()
.toBodilessEntity();
}
public void videoToText(VideoProcessRequest request) {
restClient.post()
.uri("/video/to-text")
.body(request)
.retrieve()
.toBodilessEntity();
}
public QaGenResponse genTextQa(ExtractionRequest request) {
return restClient.post()
.uri("/qa/gen-text")
.body(request)
.retrieve()
.body(QaGenResponse.class);
}
public QaGenResponse genImageQa(ExtractionRequest request) {
return restClient.post()
.uri("/qa/gen-image")
.body(request)
.retrieve()
.body(QaGenResponse.class);
}
public FinetuneResponse startFinetune(FinetuneRequest request) {
return restClient.post()
.uri("/finetune/start")
.body(request)
.retrieve()
.body(FinetuneResponse.class);
}
public FinetuneStatusResponse getFinetuneStatus(String jobId) {
return restClient.get()
.uri("/finetune/status/{jobId}", jobId)
.retrieve()
.body(FinetuneStatusResponse.class);
}
}

View File

@@ -0,0 +1,75 @@
package com.label.common.aop;
import com.label.common.context.CompanyContext;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.aspectj.lang.ProceedingJoinPoint;
import org.aspectj.lang.annotation.Around;
import org.aspectj.lang.annotation.Aspect;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Component;
/**
* AOP aspect for audit logging.
*
* KEY DESIGN DECISIONS:
* 1. Uses JdbcTemplate directly (not MyBatis Mapper) to bypass TenantLineInnerInterceptor
* — operation logs need to capture company_id explicitly, not via thread-local injection
* 2. Written in finally block — audit log is written regardless of business method success/failure
* 3. Audit failures are logged as ERROR but NEVER rethrown — business transactions must not be
* affected by audit failures
* 4. Captures result of business method to log SUCCESS or FAILURE
*/
@Slf4j
@Aspect
@Component
@RequiredArgsConstructor
public class AuditAspect {
private final JdbcTemplate jdbcTemplate;
@Around("@annotation(operationLog)")
public Object audit(ProceedingJoinPoint joinPoint, OperationLog operationLog) throws Throwable {
Long companyId = CompanyContext.get();
// operator_id can be obtained from SecurityContext or ThreadLocal in the future
// For now, use null as a safe default when not available
Long operatorId = null;
String result = "SUCCESS";
String errorMessage = null;
Object returnValue = null;
try {
returnValue = joinPoint.proceed();
} catch (Throwable e) {
result = "FAILURE";
errorMessage = e.getMessage();
throw e; // Always rethrow business exceptions
} finally {
// Write audit log in finally block — runs regardless of success or failure
// CRITICAL: Never throw from here — would swallow the original exception
try {
writeAuditLog(companyId, operatorId, operationLog.type(),
operationLog.targetType(), result, errorMessage);
} catch (Exception auditEx) {
// Audit failure must NOT affect business transaction
log.error("审计日志写入失败: type={}, error={}",
operationLog.type(), auditEx.getMessage(), auditEx);
}
}
return returnValue;
}
private void writeAuditLog(Long companyId, Long operatorId, String operationType,
String targetType, String result, String errorMessage) {
String sql = """
INSERT INTO sys_operation_log
(company_id, operator_id, operation_type, target_type, result, error_message, operated_at)
VALUES (?, ?, ?, ?, ?, ?, NOW())
""";
jdbcTemplate.update(sql, companyId, operatorId, operationType,
targetType.isEmpty() ? null : targetType,
result, errorMessage);
}
}

View File

@@ -0,0 +1,18 @@
package com.label.common.aop;
import java.lang.annotation.*;
/**
* Marks a method for audit logging.
* The AuditAspect intercepts this annotation and writes to sys_operation_log.
*/
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
@Documented
public @interface OperationLog {
/** Operation type, e.g., "EXTRACTION_APPROVE", "USER_LOGIN", "TASK_CLAIM" */
String type();
/** Target entity type, e.g., "annotation_task", "sys_user" */
String targetType() default "";
}

View File

@@ -0,0 +1,57 @@
package com.label.common.config;
import com.baomidou.mybatisplus.extension.plugins.MybatisPlusInterceptor;
import com.baomidou.mybatisplus.extension.plugins.handler.TenantLineHandler;
import com.baomidou.mybatisplus.extension.plugins.inner.PaginationInnerInterceptor;
import com.baomidou.mybatisplus.extension.plugins.inner.TenantLineInnerInterceptor;
import com.label.common.context.CompanyContext;
import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.LongValue;
import net.sf.jsqlparser.expression.NullValue;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.util.Arrays;
import java.util.List;
@Configuration
public class MybatisPlusConfig {
// Tables that do NOT need tenant isolation (either global or tenant root tables)
private static final List<String> IGNORED_TABLES = Arrays.asList(
"sys_company", // the tenant root table itself
"sys_config" // has company_id=NULL for global defaults; service handles this manually
);
@Bean
public MybatisPlusInterceptor mybatisPlusInterceptor() {
MybatisPlusInterceptor interceptor = new MybatisPlusInterceptor();
// 1. Tenant isolation - auto-injects WHERE company_id = ?
interceptor.addInnerInterceptor(new TenantLineInnerInterceptor(new TenantLineHandler() {
@Override
public Expression getTenantId() {
Long companyId = CompanyContext.get();
if (companyId == null) {
return new NullValue();
}
return new LongValue(companyId);
}
@Override
public String getTenantIdColumn() {
return "company_id";
}
@Override
public boolean ignoreTable(String tableName) {
return IGNORED_TABLES.contains(tableName);
}
}));
// 2. Pagination interceptor (required for MyBatis Plus Page queries)
interceptor.addInnerInterceptor(new PaginationInnerInterceptor());
return interceptor;
}
}

View File

@@ -0,0 +1,24 @@
package com.label.common.config;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.redis.connection.RedisConnectionFactory;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.data.redis.serializer.StringRedisSerializer;
@Configuration
public class RedisConfig {
@Bean
public RedisTemplate<String, String> redisTemplate(RedisConnectionFactory connectionFactory) {
RedisTemplate<String, String> template = new RedisTemplate<>();
template.setConnectionFactory(connectionFactory);
StringRedisSerializer serializer = new StringRedisSerializer();
template.setKeySerializer(serializer);
template.setValueSerializer(serializer);
template.setHashKeySerializer(serializer);
template.setHashValueSerializer(serializer);
template.afterPropertiesSet();
return template;
}
}

View File

@@ -0,0 +1,21 @@
package com.label.common.context;
public class CompanyContext {
private static final ThreadLocal<Long> COMPANY_ID = new ThreadLocal<>();
public static void set(Long companyId) {
COMPANY_ID.set(companyId);
}
public static Long get() {
return COMPANY_ID.get();
}
public static void clear() {
COMPANY_ID.remove(); // Use remove() not set(null) to prevent memory leaks
}
private CompanyContext() { // Prevent instantiation
throw new UnsupportedOperationException("Utility class");
}
}

View File

@@ -0,0 +1,22 @@
package com.label.common.exception;
import lombok.Getter;
import org.springframework.http.HttpStatus;
@Getter
public class BusinessException extends RuntimeException {
private final String code;
private final HttpStatus httpStatus;
public BusinessException(String code, String message) {
super(message);
this.code = code;
this.httpStatus = HttpStatus.BAD_REQUEST;
}
public BusinessException(String code, String message, HttpStatus httpStatus) {
super(message);
this.code = code;
this.httpStatus = httpStatus;
}
}

View File

@@ -0,0 +1,28 @@
package com.label.common.exception;
import com.label.common.result.Result;
import lombok.extern.slf4j.Slf4j;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ExceptionHandler;
import org.springframework.web.bind.annotation.RestControllerAdvice;
@Slf4j
@RestControllerAdvice
public class GlobalExceptionHandler {
@ExceptionHandler(BusinessException.class)
public ResponseEntity<Result<?>> handleBusinessException(BusinessException e) {
log.warn("业务异常: code={}, message={}", e.getCode(), e.getMessage());
return ResponseEntity
.status(e.getHttpStatus())
.body(Result.failure(e.getCode(), e.getMessage()));
}
@ExceptionHandler(Exception.class)
public ResponseEntity<Result<?>> handleException(Exception e) {
log.error("系统异常", e);
return ResponseEntity
.internalServerError()
.body(Result.failure("INTERNAL_ERROR", "系统内部错误"));
}
}

View File

@@ -0,0 +1,25 @@
package com.label.common.redis;
/**
* Centralized Redis key naming conventions.
* All keys follow the pattern: prefix:{id}
*/
public final class RedisKeyManager {
private RedisKeyManager() {}
/** Session token key: token:{uuid} */
public static String tokenKey(String uuid) {
return "token:" + uuid;
}
/** User permission cache key: user:perm:{userId} */
public static String userPermKey(Long userId) {
return "user:perm:" + userId;
}
/** Task claim distributed lock key: task:claim:{taskId} */
public static String taskClaimKey(Long taskId) {
return "task:claim:" + taskId;
}
}

View File

@@ -0,0 +1,61 @@
package com.label.common.redis;
import lombok.RequiredArgsConstructor;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.stereotype.Service;
import java.util.Map;
import java.util.concurrent.TimeUnit;
@Service
@RequiredArgsConstructor
public class RedisService {
private final RedisTemplate<String, String> redisTemplate;
// String operations
public void set(String key, String value, long ttlSeconds) {
redisTemplate.opsForValue().set(key, value, ttlSeconds, TimeUnit.SECONDS);
}
public String get(String key) {
return redisTemplate.opsForValue().get(key);
}
public void delete(String key) {
redisTemplate.delete(key);
}
public boolean exists(String key) {
return Boolean.TRUE.equals(redisTemplate.hasKey(key));
}
/** Set if absent (NX). Returns true if key was set (lock acquired). */
public boolean setIfAbsent(String key, String value, long ttlSeconds) {
Boolean result = redisTemplate.opsForValue()
.setIfAbsent(key, value, ttlSeconds, TimeUnit.SECONDS);
return Boolean.TRUE.equals(result);
}
/** Refresh TTL on an existing key (sliding expiration). */
public void expire(String key, long ttlSeconds) {
redisTemplate.expire(key, ttlSeconds, TimeUnit.SECONDS);
}
// Hash operations (for token storage: token:{uuid} → Hash)
public void hSetAll(String key, Map<String, String> entries, long ttlSeconds) {
redisTemplate.opsForHash().putAll(key, entries);
redisTemplate.expire(key, ttlSeconds, TimeUnit.SECONDS);
}
public Map<Object, Object> hGetAll(String key) {
return redisTemplate.opsForHash().entries(key);
}
public String hGet(String key, String field) {
Object val = redisTemplate.opsForHash().get(key, field);
return val != null ? val.toString() : null;
}
}

View File

@@ -0,0 +1,22 @@
package com.label.common.result;
import lombok.Data;
import java.util.List;
@Data
public class PageResult<T> {
private List<T> items;
private long total;
private int page;
private int pageSize;
public static <T> PageResult<T> of(List<T> items, long total, int page, int pageSize) {
PageResult<T> pageResult = new PageResult<>();
pageResult.setItems(items);
pageResult.setTotal(total);
pageResult.setPage(page);
pageResult.setPageSize(pageSize);
return pageResult;
}
}

View File

@@ -0,0 +1,37 @@
package com.label.common.result;
import lombok.Data;
@Data
public class Result<T> {
private String code;
private T data;
private String message;
public static <T> Result<T> success(T data) {
Result<T> result = new Result<>();
result.setCode(ResultCode.SUCCESS.name());
result.setData(data);
return result;
}
public static <T> Result<T> success() {
Result<T> result = new Result<>();
result.setCode(ResultCode.SUCCESS.name());
return result;
}
public static <T> Result<T> failure(ResultCode code, String message) {
Result<T> result = new Result<>();
result.setCode(code.name());
result.setMessage(message);
return result;
}
public static <T> Result<T> failure(String code, String message) {
Result<T> result = new Result<>();
result.setCode(code);
result.setMessage(message);
return result;
}
}

View File

@@ -0,0 +1,19 @@
package com.label.common.result;
public enum ResultCode {
SUCCESS,
FAILURE,
UNAUTHORIZED, // 401 - no valid token
FORBIDDEN, // 403 - insufficient role
NOT_FOUND, // 404
CONFLICT, // 409
INVALID_STATE, // 409 state machine violation
TASK_CLAIMED, // 409 task already claimed
SELF_REVIEW_FORBIDDEN, // 403 self-review prevention
UNKNOWN_CONFIG_KEY, // 400 unknown config key
INVALID_SAMPLES, // 400 invalid export samples
EMPTY_SAMPLES, // 400 empty sample list
FINETUNE_ALREADY_STARTED, // 409 fine-tune already started
INVALID_STATE_TRANSITION, // 409 invalid state machine transition
INTERNAL_ERROR // 500
}

View File

@@ -0,0 +1,26 @@
package com.label.common.shiro;
import org.apache.shiro.authc.AuthenticationToken;
/**
* Shiro AuthenticationToken wrapper for Bearer token strings.
*/
public class BearerToken implements AuthenticationToken {
private final String token;
private final TokenPrincipal principal;
public BearerToken(String token, TokenPrincipal principal) {
this.token = token;
this.principal = principal;
}
@Override
public Object getPrincipal() {
return principal;
}
@Override
public Object getCredentials() {
return token;
}
}

View File

@@ -0,0 +1,71 @@
package com.label.common.shiro;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.label.common.redis.RedisService;
import org.apache.shiro.mgt.SecurityManager;
import org.apache.shiro.realm.Realm;
import org.apache.shiro.spring.web.ShiroFilterFactoryBean;
import org.apache.shiro.web.mgt.DefaultWebSecurityManager;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import jakarta.servlet.Filter;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
/**
* Shiro security configuration.
*
* Filter chain:
* /api/auth/login → anon (no auth required)
* /api/auth/logout → tokenFilter
* /api/** → tokenFilter (all other API endpoints require auth)
* /actuator/** → anon (health check)
* /** → anon (default)
*
* NOTE: spring.mvc.pathmatch.matching-strategy=ant_path_matcher MUST be set
* in application.yml for Shiro to work correctly with Spring Boot 3.
*/
@Configuration
public class ShiroConfig {
@Bean
public UserRealm userRealm(RedisService redisService) {
return new UserRealm(redisService);
}
@Bean
public SecurityManager securityManager(UserRealm userRealm) {
DefaultWebSecurityManager manager = new DefaultWebSecurityManager();
manager.setRealms(List.of(userRealm));
return manager;
}
@Bean
public TokenFilter tokenFilter(RedisService redisService, ObjectMapper objectMapper) {
return new TokenFilter(redisService, objectMapper);
}
@Bean
public ShiroFilterFactoryBean shiroFilterFactoryBean(SecurityManager securityManager,
TokenFilter tokenFilter) {
ShiroFilterFactoryBean factory = new ShiroFilterFactoryBean();
factory.setSecurityManager(securityManager);
// Register custom filters
Map<String, Filter> filters = new LinkedHashMap<>();
filters.put("tokenFilter", tokenFilter);
factory.setFilters(filters);
// Filter chain definition (ORDER MATTERS - first match wins)
Map<String, String> filterChainDef = new LinkedHashMap<>();
filterChainDef.put("/api/auth/login", "anon");
filterChainDef.put("/actuator/**", "anon");
filterChainDef.put("/api/**", "tokenFilter");
filterChainDef.put("/**", "anon");
factory.setFilterChainDefinitionMap(filterChainDef);
return factory;
}
}

View File

@@ -0,0 +1,95 @@
package com.label.common.shiro;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.label.common.context.CompanyContext;
import com.label.common.redis.RedisKeyManager;
import com.label.common.redis.RedisService;
import com.label.common.result.Result;
import jakarta.servlet.FilterChain;
import jakarta.servlet.ServletException;
import jakarta.servlet.ServletRequest;
import jakarta.servlet.ServletResponse;
import jakarta.servlet.http.HttpServletRequest;
import jakarta.servlet.http.HttpServletResponse;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.shiro.web.filter.PathMatchingFilter;
import org.springframework.http.MediaType;
import java.io.IOException;
import java.util.Map;
/**
* Shiro filter: parses "Authorization: Bearer {uuid}", validates against Redis,
* injects CompanyContext and Shiro subject principals.
*
* KEY DESIGN:
* - CompanyContext.clear() MUST be called in finally block to prevent thread pool leakage
* - Token lookup is from Redis Hash token:{uuid} → {userId, role, companyId, username}
* - 401 on missing/invalid token; filter continues for valid token
*/
@Slf4j
@RequiredArgsConstructor
public class TokenFilter extends PathMatchingFilter {
private final RedisService redisService;
private final ObjectMapper objectMapper;
@Override
protected boolean onPreHandle(ServletRequest request, ServletResponse response, Object mappedValue) throws Exception {
HttpServletRequest req = (HttpServletRequest) request;
HttpServletResponse resp = (HttpServletResponse) response;
String authHeader = req.getHeader("Authorization");
if (authHeader == null || !authHeader.startsWith("Bearer ")) {
writeUnauthorized(resp, "缺少或无效的认证令牌");
return false;
}
String token = authHeader.substring(7).trim();
String tokenKey = RedisKeyManager.tokenKey(token);
Map<Object, Object> tokenData = redisService.hGetAll(tokenKey);
if (tokenData == null || tokenData.isEmpty()) {
writeUnauthorized(resp, "令牌已过期或不存在");
return false;
}
try {
Long userId = Long.parseLong(tokenData.get("userId").toString());
String role = tokenData.get("role").toString();
Long companyId = Long.parseLong(tokenData.get("companyId").toString());
String username = tokenData.get("username").toString();
// Inject company context (must be cleared in finally)
CompanyContext.set(companyId);
// Bind Shiro subject with token principal
TokenPrincipal principal = new TokenPrincipal(userId, role, companyId, username, token);
request.setAttribute("__token_principal__", principal);
return true;
} catch (Exception e) {
log.error("解析 Token 数据失败: {}", e.getMessage());
writeUnauthorized(resp, "令牌数据格式错误");
return false;
}
}
@Override
public void doFilterInternal(ServletRequest request, ServletResponse response, FilterChain chain)
throws ServletException, IOException {
try {
super.doFilterInternal(request, response, chain);
} finally {
// CRITICAL: Always clear ThreadLocal to prevent leakage in thread pool
CompanyContext.clear();
}
}
private void writeUnauthorized(HttpServletResponse resp, String message) throws IOException {
resp.setStatus(HttpServletResponse.SC_UNAUTHORIZED);
resp.setContentType(MediaType.APPLICATION_JSON_VALUE + ";charset=UTF-8");
resp.getWriter().write(objectMapper.writeValueAsString(Result.failure("UNAUTHORIZED", message)));
}
}

View File

@@ -0,0 +1,18 @@
package com.label.common.shiro;
import lombok.AllArgsConstructor;
import lombok.Getter;
import java.io.Serializable;
/**
* Shiro principal carrying the authenticated user's session data.
*/
@Getter
@AllArgsConstructor
public class TokenPrincipal implements Serializable {
private final Long userId;
private final String role;
private final Long companyId;
private final String username;
private final String token;
}

View File

@@ -0,0 +1,87 @@
package com.label.common.shiro;
import com.label.common.redis.RedisKeyManager;
import com.label.common.redis.RedisService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.shiro.authc.*;
import org.apache.shiro.authz.AuthorizationInfo;
import org.apache.shiro.authz.SimpleAuthorizationInfo;
import org.apache.shiro.realm.AuthorizingRealm;
import org.apache.shiro.subject.PrincipalCollection;
/**
* Shiro Realm for role-based authorization using token-based authentication.
*
* Role hierarchy (addInheritedRoles):
* ADMIN ⊃ REVIEWER ⊃ ANNOTATOR ⊃ UPLOADER
*
* Permission lookup order:
* 1. Redis user:perm:{userId} (TTL 5 min)
* 2. If miss: use role from TokenPrincipal
*/
@Slf4j
@RequiredArgsConstructor
public class UserRealm extends AuthorizingRealm {
private static final long PERM_CACHE_TTL = 300L; // 5 minutes
private final RedisService redisService;
@Override
public boolean supports(AuthenticationToken token) {
return token instanceof BearerToken;
}
@Override
protected AuthenticationInfo doGetAuthenticationInfo(AuthenticationToken token) throws AuthenticationException {
// Token validation is done in TokenFilter; this realm only handles authorization
// For authentication, we trust the token that was validated by TokenFilter
return new SimpleAuthenticationInfo(token.getPrincipal(), token.getCredentials(), getName());
}
@Override
protected AuthorizationInfo doGetAuthorizationInfo(PrincipalCollection principals) {
TokenPrincipal principal = (TokenPrincipal) principals.getPrimaryPrincipal();
if (principal == null) {
return new SimpleAuthorizationInfo();
}
String role = getRoleFromCacheOrPrincipal(principal);
SimpleAuthorizationInfo info = new SimpleAuthorizationInfo();
info.addRole(role);
addInheritedRoles(info, role);
return info;
}
private String getRoleFromCacheOrPrincipal(TokenPrincipal principal) {
String permKey = RedisKeyManager.userPermKey(principal.getUserId());
String cachedRole = redisService.get(permKey);
if (cachedRole != null && !cachedRole.isEmpty()) {
return cachedRole;
}
// Cache miss: use role from token, then refresh cache
String role = principal.getRole();
redisService.set(permKey, role, PERM_CACHE_TTL);
return role;
}
/**
* ADMIN inherits all roles: ADMIN ⊃ REVIEWER ⊃ ANNOTATOR ⊃ UPLOADER
*/
private void addInheritedRoles(SimpleAuthorizationInfo info, String role) {
switch (role) {
case "ADMIN":
info.addRole("REVIEWER");
// fall through
case "REVIEWER":
info.addRole("ANNOTATOR");
// fall through
case "ANNOTATOR":
info.addRole("UPLOADER");
break;
default:
break;
}
}
}

View File

@@ -0,0 +1,14 @@
package com.label.common.statemachine;
import java.util.Map;
import java.util.Set;
public enum DatasetStatus {
PENDING_REVIEW, APPROVED, REJECTED;
public static final Map<DatasetStatus, Set<DatasetStatus>> TRANSITIONS = Map.of(
PENDING_REVIEW, Set.of(APPROVED, REJECTED),
REJECTED, Set.of(PENDING_REVIEW) // 重新提交审核
// APPROVED: terminal state
);
}

View File

@@ -0,0 +1,15 @@
package com.label.common.statemachine;
import java.util.Map;
import java.util.Set;
public enum SourceStatus {
PENDING, PREPROCESSING, EXTRACTING, QA_REVIEW, APPROVED;
public static final Map<SourceStatus, Set<SourceStatus>> TRANSITIONS = Map.of(
PENDING, Set.of(EXTRACTING, PREPROCESSING),
PREPROCESSING, Set.of(PENDING),
EXTRACTING, Set.of(QA_REVIEW),
QA_REVIEW, Set.of(APPROVED)
);
}

View File

@@ -0,0 +1,36 @@
package com.label.common.statemachine;
import com.label.common.exception.BusinessException;
import org.springframework.http.HttpStatus;
import java.util.Map;
import java.util.Set;
/**
* Generic state machine validator.
* Validates state transitions against a predefined transitions map.
*/
public final class StateValidator {
private StateValidator() {}
/**
* Assert that a state transition from {@code current} to {@code next} is valid.
*
* @param transitions the allowed transitions map
* @param current the current state
* @param next the desired next state
* @param <S> the state type (enum)
* @throws BusinessException with code INVALID_STATE_TRANSITION if transition not allowed
*/
public static <S> void assertTransition(Map<S, Set<S>> transitions, S current, S next) {
Set<S> allowed = transitions.get(current);
if (allowed == null || !allowed.contains(next)) {
throw new BusinessException(
"INVALID_STATE_TRANSITION",
String.format("不允许的状态转换: %s → %s", current, next),
HttpStatus.CONFLICT
);
}
}
}

View File

@@ -0,0 +1,16 @@
package com.label.common.statemachine;
import java.util.Map;
import java.util.Set;
public enum TaskStatus {
UNCLAIMED, IN_PROGRESS, SUBMITTED, APPROVED, REJECTED;
public static final Map<TaskStatus, Set<TaskStatus>> TRANSITIONS = Map.of(
UNCLAIMED, Set.of(IN_PROGRESS),
IN_PROGRESS, Set.of(SUBMITTED, UNCLAIMED, IN_PROGRESS), // IN_PROGRESS->IN_PROGRESS for ADMIN reassign
SUBMITTED, Set.of(APPROVED, REJECTED),
REJECTED, Set.of(IN_PROGRESS)
// APPROVED: terminal state, no outgoing transitions
);
}

View File

@@ -0,0 +1,20 @@
package com.label.common.statemachine;
import java.util.Map;
import java.util.Set;
public enum VideoJobStatus {
PENDING, RUNNING, SUCCESS, FAILED, RETRYING;
/**
* Automatic state machine transitions.
* Note: FAILED → PENDING is a manual ADMIN operation, handled separately in VideoProcessService.reset().
*/
public static final Map<VideoJobStatus, Set<VideoJobStatus>> TRANSITIONS = Map.of(
PENDING, Set.of(RUNNING),
RUNNING, Set.of(SUCCESS, FAILED, RETRYING),
RETRYING, Set.of(RUNNING, FAILED)
// SUCCESS: terminal state
// FAILED → PENDING: manual ADMIN reset, NOT in this automatic transitions map
);
}

View File

@@ -0,0 +1,118 @@
package com.label.common.storage;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider;
import software.amazon.awssdk.core.sync.RequestBody;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.*;
import software.amazon.awssdk.services.s3.presigner.S3Presigner;
import software.amazon.awssdk.services.s3.presigner.model.GetObjectPresignRequest;
import jakarta.annotation.PostConstruct;
import java.io.InputStream;
import java.net.URI;
import java.time.Duration;
@Slf4j
@Component
public class RustFsClient {
@Value("${rustfs.endpoint}")
private String endpoint;
@Value("${rustfs.access-key}")
private String accessKey;
@Value("${rustfs.secret-key}")
private String secretKey;
private S3Client s3Client;
private S3Presigner presigner;
@PostConstruct
public void init() {
var credentials = StaticCredentialsProvider.create(
AwsBasicCredentials.create(accessKey, secretKey));
s3Client = S3Client.builder()
.endpointOverride(URI.create(endpoint))
.credentialsProvider(credentials)
.region(Region.US_EAST_1)
.forcePathStyle(true) // Required for MinIO/RustFS
.build();
presigner = S3Presigner.builder()
.endpointOverride(URI.create(endpoint))
.credentialsProvider(credentials)
.region(Region.US_EAST_1)
.build();
}
/**
* Upload file to RustFS.
* @param bucket bucket name
* @param key object key (path)
* @param inputStream file content
* @param contentLength file size in bytes
* @param contentType MIME type
*/
public void upload(String bucket, String key, InputStream inputStream,
long contentLength, String contentType) {
// Ensure bucket exists
ensureBucketExists(bucket);
s3Client.putObject(
PutObjectRequest.builder()
.bucket(bucket)
.key(key)
.contentType(contentType)
.contentLength(contentLength)
.build(),
RequestBody.fromInputStream(inputStream, contentLength)
);
}
/**
* Download file from RustFS.
*/
public InputStream download(String bucket, String key) {
return s3Client.getObject(
GetObjectRequest.builder().bucket(bucket).key(key).build()
);
}
/**
* Delete file from RustFS.
*/
public void delete(String bucket, String key) {
s3Client.deleteObject(
DeleteObjectRequest.builder().bucket(bucket).key(key).build()
);
}
/**
* Generate a presigned URL for temporary read access.
* @param expirationMinutes URL validity in minutes
*/
public String getPresignedUrl(String bucket, String key, int expirationMinutes) {
var presignRequest = GetObjectPresignRequest.builder()
.signatureDuration(Duration.ofMinutes(expirationMinutes))
.getObjectRequest(GetObjectRequest.builder().bucket(bucket).key(key).build())
.build();
return presigner.presignGetObject(presignRequest).url().toString();
}
private void ensureBucketExists(String bucket) {
try {
s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build());
} catch (NoSuchBucketException e) {
s3Client.createBucket(CreateBucketRequest.builder().bucket(bucket).build());
log.info("Created bucket: {}", bucket);
}
}
}

View File

@@ -0,0 +1,68 @@
server:
port: 8080
spring:
datasource:
url: ${SPRING_DATASOURCE_URL:jdbc:postgresql://localhost:5432/label_db}
username: ${SPRING_DATASOURCE_USERNAME:label}
password: ${SPRING_DATASOURCE_PASSWORD:label_password}
driver-class-name: org.postgresql.Driver
hikari:
maximum-pool-size: 20
minimum-idle: 5
connection-timeout: 30000
data:
redis:
host: ${SPRING_DATA_REDIS_HOST:localhost}
port: ${SPRING_DATA_REDIS_PORT:6379}
password: ${SPRING_DATA_REDIS_PASSWORD:redis_password}
timeout: 5000ms
lettuce:
pool:
max-active: 8
max-idle: 8
min-idle: 0
jackson:
default-property-inclusion: non_null
serialization:
write-dates-as-timestamps: false
mvc:
pathmatch:
matching-strategy: ant_path_matcher # Shiro 与 Spring Boot 3 兼容性需要
mybatis-plus:
mapper-locations: classpath*:mapper/**/*.xml
type-aliases-package: com.label.module
configuration:
map-underscore-to-camel-case: true
log-impl: org.apache.ibatis.logging.slf4j.Slf4jImpl
global-config:
db-config:
id-type: auto
rustfs:
endpoint: ${RUSTFS_ENDPOINT:http://localhost:9000}
access-key: ${RUSTFS_ACCESS_KEY:minioadmin}
secret-key: ${RUSTFS_SECRET_KEY:minioadmin}
region: us-east-1
ai-service:
base-url: ${AI_SERVICE_BASE_URL:http://localhost:8000}
timeout: 30000 # milliseconds
shiro:
loginUrl: /api/auth/login
successUrl: /
unauthorizedUrl: /api/auth/unauthorized
sessionManager:
sessionIdCookieEnabled: false # REST API不使用基于 Cookie 的会话
sessionIdUrlRewritingEnabled: false
logging:
level:
com.label: DEBUG
org.apache.shiro: INFO
com.baomidou.mybatisplus: INFO

View File

@@ -0,0 +1,87 @@
package com.label;
import org.junit.jupiter.api.BeforeEach;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.web.server.LocalServerPort;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.test.context.DynamicPropertyRegistry;
import org.springframework.test.context.DynamicPropertySource;
import org.testcontainers.containers.GenericContainer;
import org.testcontainers.containers.PostgreSQLContainer;
import org.testcontainers.junit.jupiter.Container;
import org.testcontainers.junit.jupiter.Testcontainers;
import org.testcontainers.utility.DockerImageName;
import org.testcontainers.utility.MountableFile;
/**
* Base class for all integration tests.
*
* Starts real PostgreSQL 16 and Redis 7 containers (shared across test class instances).
* Executes sql/init.sql to initialize schema and seed data.
*
* DESIGN:
* - @Container with static fields → containers are shared across test methods (faster)
* - @DynamicPropertySource → overrides datasource/redis properties at runtime
* - @BeforeEach cleanData() → truncates business tables (not sys_company/sys_user) between tests
*/
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Testcontainers
public abstract class AbstractIntegrationTest {
@LocalServerPort
protected int port;
@Autowired
protected JdbcTemplate jdbcTemplate;
@SuppressWarnings("resource")
@Container
protected static final PostgreSQLContainer<?> postgres =
new PostgreSQLContainer<>(DockerImageName.parse("postgres:16-alpine"))
.withDatabaseName("label_db")
.withUsername("label")
.withPassword("label_password")
.withCopyFileToContainer(
MountableFile.forClasspathResource("db/init.sql"),
"/docker-entrypoint-initdb.d/init.sql");
@SuppressWarnings("resource")
@Container
protected static final GenericContainer<?> redis =
new GenericContainer<>(DockerImageName.parse("redis:7-alpine"))
.withExposedPorts(6379)
.withCommand("redis-server", "--requirepass", "test_redis_password");
@DynamicPropertySource
static void configureProperties(DynamicPropertyRegistry registry) {
registry.add("spring.datasource.url", postgres::getJdbcUrl);
registry.add("spring.datasource.username", postgres::getUsername);
registry.add("spring.datasource.password", postgres::getPassword);
registry.add("spring.data.redis.host", redis::getHost);
registry.add("spring.data.redis.port", () -> redis.getMappedPort(6379));
registry.add("spring.data.redis.password", () -> "test_redis_password");
}
/**
* Clean only business data between tests to keep schema intact.
* Keep sys_company and sys_user since init.sql seeds them.
*/
@BeforeEach
void cleanData() {
jdbcTemplate.execute("TRUNCATE TABLE video_process_job, annotation_task_history, " +
"sys_operation_log, sys_config, export_batch, training_dataset, " +
"annotation_result, annotation_task, source_data RESTART IDENTITY CASCADE");
// Re-insert global sys_config entries that were truncated
jdbcTemplate.execute("INSERT INTO sys_config (company_id, config_key, config_value) VALUES " +
"(NULL, 'token_ttl_seconds', '7200'), " +
"(NULL, 'model_default', 'glm-4'), " +
"(NULL, 'video_frame_interval', '30') " +
"ON CONFLICT DO NOTHING");
}
/** Helper: get base URL for REST calls */
protected String baseUrl(String path) {
return "http://localhost:" + port + path;
}
}

View File

@@ -0,0 +1,7 @@
package com.label;
import org.springframework.boot.test.context.SpringBootTest;
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
class LabelBackendApplicationTests {
}

View File

@@ -0,0 +1,265 @@
package com.label.unit;
import com.label.common.exception.BusinessException;
import com.label.common.statemachine.*;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import static org.assertj.core.api.Assertions.*;
/**
* Unit tests for all state machine enums and StateValidator.
* No Spring context needed - pure unit tests.
*/
@DisplayName("状态机单元测试")
class StateMachineTest {
// ===== SourceStatus =====
@Nested
@DisplayName("SourceStatus 状态机")
class SourceStatusTest {
@Test
@DisplayName("合法转换PENDING → EXTRACTING文本/图片直接提取)")
void pendingToExtracting() {
assertThatCode(() ->
StateValidator.assertTransition(SourceStatus.TRANSITIONS, SourceStatus.PENDING, SourceStatus.EXTRACTING)
).doesNotThrowAnyException();
}
@Test
@DisplayName("合法转换PENDING → PREPROCESSING视频上传")
void pendingToPreprocessing() {
assertThatCode(() ->
StateValidator.assertTransition(SourceStatus.TRANSITIONS, SourceStatus.PENDING, SourceStatus.PREPROCESSING)
).doesNotThrowAnyException();
}
@Test
@DisplayName("合法转换PREPROCESSING → PENDING视频预处理完成")
void preprocessingToPending() {
assertThatCode(() ->
StateValidator.assertTransition(SourceStatus.TRANSITIONS, SourceStatus.PREPROCESSING, SourceStatus.PENDING)
).doesNotThrowAnyException();
}
@Test
@DisplayName("合法转换EXTRACTING → QA_REVIEW提取审批通过")
void extractingToQaReview() {
assertThatCode(() ->
StateValidator.assertTransition(SourceStatus.TRANSITIONS, SourceStatus.EXTRACTING, SourceStatus.QA_REVIEW)
).doesNotThrowAnyException();
}
@Test
@DisplayName("合法转换QA_REVIEW → APPROVEDQA 审批通过)")
void qaReviewToApproved() {
assertThatCode(() ->
StateValidator.assertTransition(SourceStatus.TRANSITIONS, SourceStatus.QA_REVIEW, SourceStatus.APPROVED)
).doesNotThrowAnyException();
}
@Test
@DisplayName("非法转换APPROVED → PENDING 抛出异常")
void approvedToPendingFails() {
assertThatThrownBy(() ->
StateValidator.assertTransition(SourceStatus.TRANSITIONS, SourceStatus.APPROVED, SourceStatus.PENDING)
).isInstanceOf(BusinessException.class)
.extracting("code").isEqualTo("INVALID_STATE_TRANSITION");
}
@Test
@DisplayName("非法转换PENDING → APPROVED跳过中间状态抛出异常")
void pendingToApprovedFails() {
assertThatThrownBy(() ->
StateValidator.assertTransition(SourceStatus.TRANSITIONS, SourceStatus.PENDING, SourceStatus.APPROVED)
).isInstanceOf(BusinessException.class)
.extracting("code").isEqualTo("INVALID_STATE_TRANSITION");
}
}
// ===== TaskStatus =====
@Nested
@DisplayName("TaskStatus 状态机")
class TaskStatusTest {
@Test
@DisplayName("合法转换UNCLAIMED → IN_PROGRESS领取")
void unclaimedToInProgress() {
assertThatCode(() ->
StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.UNCLAIMED, TaskStatus.IN_PROGRESS)
).doesNotThrowAnyException();
}
@Test
@DisplayName("合法转换IN_PROGRESS → SUBMITTED提交")
void inProgressToSubmitted() {
assertThatCode(() ->
StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.IN_PROGRESS, TaskStatus.SUBMITTED)
).doesNotThrowAnyException();
}
@Test
@DisplayName("合法转换IN_PROGRESS → UNCLAIMED放弃")
void inProgressToUnclaimed() {
assertThatCode(() ->
StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.IN_PROGRESS, TaskStatus.UNCLAIMED)
).doesNotThrowAnyException();
}
@Test
@DisplayName("合法转换IN_PROGRESS → IN_PROGRESSADMIN 强制转移,持有人变更)")
void inProgressToInProgress() {
assertThatCode(() ->
StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.IN_PROGRESS, TaskStatus.IN_PROGRESS)
).doesNotThrowAnyException();
}
@Test
@DisplayName("合法转换SUBMITTED → APPROVED审批通过")
void submittedToApproved() {
assertThatCode(() ->
StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.SUBMITTED, TaskStatus.APPROVED)
).doesNotThrowAnyException();
}
@Test
@DisplayName("合法转换SUBMITTED → REJECTED审批驳回")
void submittedToRejected() {
assertThatCode(() ->
StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.SUBMITTED, TaskStatus.REJECTED)
).doesNotThrowAnyException();
}
@Test
@DisplayName("合法转换REJECTED → IN_PROGRESS标注员重领")
void rejectedToInProgress() {
assertThatCode(() ->
StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.REJECTED, TaskStatus.IN_PROGRESS)
).doesNotThrowAnyException();
}
@Test
@DisplayName("非法转换APPROVED → IN_PROGRESS 抛出异常")
void approvedToInProgressFails() {
assertThatThrownBy(() ->
StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.APPROVED, TaskStatus.IN_PROGRESS)
).isInstanceOf(BusinessException.class)
.extracting("code").isEqualTo("INVALID_STATE_TRANSITION");
}
@Test
@DisplayName("非法转换UNCLAIMED → SUBMITTED跳过 IN_PROGRESS抛出异常")
void unclaimedToSubmittedFails() {
assertThatThrownBy(() ->
StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.UNCLAIMED, TaskStatus.SUBMITTED)
).isInstanceOf(BusinessException.class)
.extracting("code").isEqualTo("INVALID_STATE_TRANSITION");
}
}
// ===== DatasetStatus =====
@Nested
@DisplayName("DatasetStatus 状态机")
class DatasetStatusTest {
@Test
@DisplayName("合法转换PENDING_REVIEW → APPROVED")
void pendingReviewToApproved() {
assertThatCode(() ->
StateValidator.assertTransition(DatasetStatus.TRANSITIONS, DatasetStatus.PENDING_REVIEW, DatasetStatus.APPROVED)
).doesNotThrowAnyException();
}
@Test
@DisplayName("合法转换PENDING_REVIEW → REJECTED")
void pendingReviewToRejected() {
assertThatCode(() ->
StateValidator.assertTransition(DatasetStatus.TRANSITIONS, DatasetStatus.PENDING_REVIEW, DatasetStatus.REJECTED)
).doesNotThrowAnyException();
}
@Test
@DisplayName("合法转换REJECTED → PENDING_REVIEW重新提交")
void rejectedToPendingReview() {
assertThatCode(() ->
StateValidator.assertTransition(DatasetStatus.TRANSITIONS, DatasetStatus.REJECTED, DatasetStatus.PENDING_REVIEW)
).doesNotThrowAnyException();
}
@Test
@DisplayName("非法转换APPROVED → REJECTED 抛出异常")
void approvedToRejectedFails() {
assertThatThrownBy(() ->
StateValidator.assertTransition(DatasetStatus.TRANSITIONS, DatasetStatus.APPROVED, DatasetStatus.REJECTED)
).isInstanceOf(BusinessException.class)
.extracting("code").isEqualTo("INVALID_STATE_TRANSITION");
}
}
// ===== VideoJobStatus =====
@Nested
@DisplayName("VideoJobStatus 状态机")
class VideoJobStatusTest {
@Test
@DisplayName("合法转换PENDING → RUNNING")
void pendingToRunning() {
assertThatCode(() ->
StateValidator.assertTransition(VideoJobStatus.TRANSITIONS, VideoJobStatus.PENDING, VideoJobStatus.RUNNING)
).doesNotThrowAnyException();
}
@Test
@DisplayName("合法转换RUNNING → SUCCESS")
void runningToSuccess() {
assertThatCode(() ->
StateValidator.assertTransition(VideoJobStatus.TRANSITIONS, VideoJobStatus.RUNNING, VideoJobStatus.SUCCESS)
).doesNotThrowAnyException();
}
@Test
@DisplayName("合法转换RUNNING → RETRYING失败且未超重试次数")
void runningToRetrying() {
assertThatCode(() ->
StateValidator.assertTransition(VideoJobStatus.TRANSITIONS, VideoJobStatus.RUNNING, VideoJobStatus.RETRYING)
).doesNotThrowAnyException();
}
@Test
@DisplayName("合法转换RUNNING → FAILED失败且超过最大重试")
void runningToFailed() {
assertThatCode(() ->
StateValidator.assertTransition(VideoJobStatus.TRANSITIONS, VideoJobStatus.RUNNING, VideoJobStatus.FAILED)
).doesNotThrowAnyException();
}
@Test
@DisplayName("合法转换RETRYING → RUNNINGAI 重试)")
void retryingToRunning() {
assertThatCode(() ->
StateValidator.assertTransition(VideoJobStatus.TRANSITIONS, VideoJobStatus.RETRYING, VideoJobStatus.RUNNING)
).doesNotThrowAnyException();
}
@Test
@DisplayName("非法转换FAILED → PENDING 不在状态机内ADMIN 手动触发,不走 StateValidator")
void failedToPendingNotInStateMachine() {
// FAILED → PENDING is intentionally NOT in TRANSITIONS (ADMIN manual reset via special API)
assertThatThrownBy(() ->
StateValidator.assertTransition(VideoJobStatus.TRANSITIONS, VideoJobStatus.FAILED, VideoJobStatus.PENDING)
).isInstanceOf(BusinessException.class)
.extracting("code").isEqualTo("INVALID_STATE_TRANSITION");
}
@Test
@DisplayName("非法转换SUCCESS → RUNNING 抛出异常")
void successToRunningFails() {
assertThatThrownBy(() ->
StateValidator.assertTransition(VideoJobStatus.TRANSITIONS, VideoJobStatus.SUCCESS, VideoJobStatus.RUNNING)
).isInstanceOf(BusinessException.class)
.extracting("code").isEqualTo("INVALID_STATE_TRANSITION");
}
}
}

View File

View File

@@ -0,0 +1,332 @@
-- label_backend init.sql
-- PostgreSQL 14+
-- 按依赖顺序建全部 11 张表:
-- sys_company → sys_user → source_data → annotation_task → annotation_result
-- → training_dataset → export_batch → sys_config → sys_operation_log
-- → annotation_task_history → video_process_job
-- 含所有索引及初始配置数据
-- ============================================================
-- 扩展
-- ============================================================
CREATE EXTENSION IF NOT EXISTS pgcrypto;
-- ============================================================
-- 1. sys_company租户
-- ============================================================
CREATE TABLE IF NOT EXISTS sys_company (
id BIGSERIAL PRIMARY KEY,
company_name VARCHAR(100) NOT NULL,
company_code VARCHAR(50) NOT NULL,
status VARCHAR(10) NOT NULL DEFAULT 'ACTIVE', -- ACTIVE / DISABLED
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
CONSTRAINT uk_sys_company_name UNIQUE (company_name),
CONSTRAINT uk_sys_company_code UNIQUE (company_code)
);
-- ============================================================
-- 2. sys_user用户
-- ============================================================
CREATE TABLE IF NOT EXISTS sys_user (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
username VARCHAR(50) NOT NULL,
password_hash VARCHAR(255) NOT NULL, -- BCrypt, strength >= 10
real_name VARCHAR(50),
role VARCHAR(20) NOT NULL, -- UPLOADER / ANNOTATOR / REVIEWER / ADMIN
status VARCHAR(10) NOT NULL DEFAULT 'ACTIVE', -- ACTIVE / DISABLED
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
CONSTRAINT uk_sys_user_company_username UNIQUE (company_id, username)
);
CREATE INDEX IF NOT EXISTS idx_sys_user_company_id
ON sys_user (company_id);
-- ============================================================
-- 3. source_data原始资料
-- ============================================================
CREATE TABLE IF NOT EXISTS source_data (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
uploader_id BIGINT REFERENCES sys_user(id),
data_type VARCHAR(20) NOT NULL, -- TEXT / IMAGE / VIDEO
file_path VARCHAR(500) NOT NULL, -- RustFS object path
file_name VARCHAR(255) NOT NULL,
file_size BIGINT,
bucket_name VARCHAR(100) NOT NULL,
parent_source_id BIGINT REFERENCES source_data(id), -- 视频帧 / 文本片段
status VARCHAR(20) NOT NULL DEFAULT 'PENDING',
-- PENDING / PREPROCESSING / EXTRACTING / QA_REVIEW / APPROVED
reject_reason TEXT, -- 保留字段(当前无 REJECTED 状态)
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_source_data_company_id
ON source_data (company_id);
CREATE INDEX IF NOT EXISTS idx_source_data_company_status
ON source_data (company_id, status);
CREATE INDEX IF NOT EXISTS idx_source_data_parent_source_id
ON source_data (parent_source_id);
-- ============================================================
-- 4. annotation_task标注任务
-- ============================================================
CREATE TABLE IF NOT EXISTS annotation_task (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
source_id BIGINT NOT NULL REFERENCES source_data(id),
task_type VARCHAR(30) NOT NULL, -- EXTRACTION / QA_GENERATION
status VARCHAR(20) NOT NULL DEFAULT 'UNCLAIMED',
-- UNCLAIMED / IN_PROGRESS / SUBMITTED / APPROVED / REJECTED
claimed_by BIGINT REFERENCES sys_user(id),
claimed_at TIMESTAMP,
submitted_at TIMESTAMP,
completed_at TIMESTAMP,
is_final BOOLEAN NOT NULL DEFAULT FALSE, -- true 即 APPROVED 且无需再审
ai_model VARCHAR(50),
reject_reason TEXT,
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_annotation_task_company_status
ON annotation_task (company_id, status);
CREATE INDEX IF NOT EXISTS idx_annotation_task_source_id
ON annotation_task (source_id);
CREATE INDEX IF NOT EXISTS idx_annotation_task_claimed_by
ON annotation_task (claimed_by);
-- ============================================================
-- 5. annotation_result标注结果JSONB
-- ============================================================
CREATE TABLE IF NOT EXISTS annotation_result (
id BIGSERIAL NOT NULL,
task_id BIGINT NOT NULL REFERENCES annotation_task(id),
company_id BIGINT NOT NULL REFERENCES sys_company(id),
result_json JSONB NOT NULL DEFAULT '[]'::jsonb, -- 整体替换语义
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
CONSTRAINT pk_annotation_result PRIMARY KEY (id),
CONSTRAINT uk_annotation_result_task_id UNIQUE (task_id)
);
CREATE INDEX IF NOT EXISTS idx_annotation_result_task_id
ON annotation_result (task_id);
CREATE INDEX IF NOT EXISTS idx_annotation_result_company_id
ON annotation_result (company_id);
-- ============================================================
-- 6. training_dataset训练数据集
-- export_batch_id FK 在 export_batch 建完后补加
-- ============================================================
CREATE TABLE IF NOT EXISTS training_dataset (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
task_id BIGINT NOT NULL REFERENCES annotation_task(id),
source_id BIGINT NOT NULL REFERENCES source_data(id),
sample_type VARCHAR(20) NOT NULL, -- TEXT / IMAGE / VIDEO_FRAME
glm_format_json JSONB NOT NULL, -- GLM fine-tune 格式
status VARCHAR(20) NOT NULL DEFAULT 'PENDING_REVIEW',
-- PENDING_REVIEW / APPROVED / REJECTED
export_batch_id BIGINT, -- 导出后填写FK 在下方补加
exported_at TIMESTAMP,
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_training_dataset_company_status
ON training_dataset (company_id, status);
CREATE INDEX IF NOT EXISTS idx_training_dataset_task_id
ON training_dataset (task_id);
-- ============================================================
-- 7. export_batch导出批次
-- ============================================================
CREATE TABLE IF NOT EXISTS export_batch (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
batch_uuid UUID NOT NULL DEFAULT gen_random_uuid(),
sample_count INT NOT NULL DEFAULT 0,
dataset_file_path VARCHAR(500), -- 导出 JSONL 的 RustFS 路径
glm_job_id VARCHAR(100), -- GLM fine-tune 任务 ID
finetune_status VARCHAR(20) NOT NULL DEFAULT 'NOT_STARTED',
-- NOT_STARTED / RUNNING / COMPLETED / FAILED
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_export_batch_company_id
ON export_batch (company_id);
-- 补加 training_dataset.export_batch_id FK
ALTER TABLE training_dataset
ADD CONSTRAINT fk_training_dataset_export_batch
FOREIGN KEY (export_batch_id) REFERENCES export_batch(id)
NOT VALID; -- 允许已有 NULL 行,不强制回溯校验
-- ============================================================
-- 8. sys_config系统配置
-- ============================================================
CREATE TABLE IF NOT EXISTS sys_config (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT REFERENCES sys_company(id), -- NULL = 全局默认
config_key VARCHAR(100) NOT NULL,
config_value TEXT NOT NULL,
description VARCHAR(255),
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
-- 公司级配置唯一索引
CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_company_key
ON sys_config (company_id, config_key)
WHERE company_id IS NOT NULL;
-- 全局配置唯一索引
CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_global_key
ON sys_config (config_key)
WHERE company_id IS NULL;
CREATE INDEX IF NOT EXISTS idx_sys_config_company_key
ON sys_config (company_id, config_key);
-- ============================================================
-- 9. sys_operation_log操作日志仅追加
-- ============================================================
CREATE TABLE IF NOT EXISTS sys_operation_log (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
operator_id BIGINT REFERENCES sys_user(id),
operation_type VARCHAR(50) NOT NULL, -- 例如 EXTRACTION_APPROVE / USER_LOGIN
target_id BIGINT,
target_type VARCHAR(50),
detail JSONB,
result VARCHAR(10), -- SUCCESS / FAILURE
error_message TEXT,
operated_at TIMESTAMP NOT NULL DEFAULT NOW()
-- 无 updated_at仅追加表永不更新
);
CREATE INDEX IF NOT EXISTS idx_sys_operation_log_company_operated_at
ON sys_operation_log (company_id, operated_at);
CREATE INDEX IF NOT EXISTS idx_sys_operation_log_operator_id
ON sys_operation_log (operator_id);
-- ============================================================
-- 10. annotation_task_history任务状态历史仅追加
-- ============================================================
CREATE TABLE IF NOT EXISTS annotation_task_history (
id BIGSERIAL PRIMARY KEY,
task_id BIGINT NOT NULL REFERENCES annotation_task(id),
company_id BIGINT NOT NULL REFERENCES sys_company(id),
from_status VARCHAR(20),
to_status VARCHAR(20) NOT NULL,
operator_id BIGINT REFERENCES sys_user(id),
operator_role VARCHAR(20),
comment TEXT,
created_at TIMESTAMP NOT NULL DEFAULT NOW()
-- 无 updated_at仅追加表永不更新
);
CREATE INDEX IF NOT EXISTS idx_annotation_task_history_task_id
ON annotation_task_history (task_id);
CREATE INDEX IF NOT EXISTS idx_annotation_task_history_company_id
ON annotation_task_history (company_id);
-- ============================================================
-- 11. video_process_job视频处理作业
-- ============================================================
CREATE TABLE IF NOT EXISTS video_process_job (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
source_id BIGINT NOT NULL REFERENCES source_data(id),
job_type VARCHAR(30) NOT NULL, -- FRAME_EXTRACT / VIDEO_TO_TEXT
status VARCHAR(20) NOT NULL DEFAULT 'PENDING',
-- PENDING / RUNNING / SUCCESS / FAILED / RETRYING
params JSONB, -- 例如 {"frameInterval": 30, "mode": "FRAME"}
output_path VARCHAR(500), -- 完成后的 RustFS 输出路径
retry_count INT NOT NULL DEFAULT 0,
max_retries INT NOT NULL DEFAULT 3,
error_message TEXT,
started_at TIMESTAMP,
completed_at TIMESTAMP,
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_video_process_job_company_id
ON video_process_job (company_id);
CREATE INDEX IF NOT EXISTS idx_video_process_job_source_id
ON video_process_job (source_id);
CREATE INDEX IF NOT EXISTS idx_video_process_job_status
ON video_process_job (status);
-- ============================================================
-- 初始数据
-- ============================================================
-- 1. 演示公司
INSERT INTO sys_company (company_name, company_code, status)
VALUES ('演示公司', 'DEMO', 'ACTIVE')
ON CONFLICT DO NOTHING;
-- 2. 初始用户BCrypt strength=10
-- admin / admin123
-- reviewer01/ review123
-- annotator01/annot123
-- uploader01 / upload123
INSERT INTO sys_user (company_id, username, password_hash, real_name, role, status)
SELECT
c.id,
u.username,
u.password_hash,
u.real_name,
u.role,
'ACTIVE'
FROM sys_company c
CROSS JOIN (VALUES
('admin',
'$2a$10$B8iR5z43URiNPm.eut3JvufIPBuvGx5ZZmqyUqE1A1WdbZppX5bmi',
'管理员',
'ADMIN'),
('reviewer01',
'$2a$10$euOJZRfUtYNW7WHpfW1Ciee5b3rjkYFe3yQHT/uCQWrYVc0XQcukm',
'审核员01',
'REVIEWER'),
('annotator01',
'$2a$10$8UKwHPNASauKMTrqosR0Reg1X1gkFzFlGa/HBwNLXUELaj4e/zcqu',
'标注员01',
'ANNOTATOR'),
('uploader01',
'$2a$10$o2d7jsT31vyxIJHUo50mUefoZLLvGqft97zaL9OQCjRxn9ie1H/1O',
'上传员01',
'UPLOADER')
) AS u(username, password_hash, real_name, role)
WHERE c.company_code = 'DEMO'
ON CONFLICT (company_id, username) DO NOTHING;
-- 3. 全局系统配置
INSERT INTO sys_config (company_id, config_key, config_value, description)
VALUES
(NULL, 'token_ttl_seconds', '7200',
'会话凭证有效期(秒)'),
(NULL, 'model_default', 'glm-4',
'AI 辅助默认模型'),
(NULL, 'video_frame_interval', '30',
'视频帧提取间隔(帧数)'),
(NULL, 'prompt_extract_text',
'请提取以下文本中的主语-谓语-宾语三元组以JSON数组格式返回每个元素包含subject、predicate、object、sourceText、startOffset、endOffset字段。',
'文本三元组提取 Prompt 模板'),
(NULL, 'prompt_extract_image',
'请提取图片中的实体关系四元组以JSON数组格式返回每个元素包含subject、relation、object、modifier、confidence字段。',
'图片四元组提取 Prompt 模板'),
(NULL, 'prompt_qa_gen_text',
'根据以下文本三元组生成高质量问答对以JSON数组格式返回每个元素包含question、answer、difficulty字段。',
'文本问答生成 Prompt 模板'),
(NULL, 'prompt_qa_gen_image',
'根据以下图片四元组生成高质量问答对以JSON数组格式返回每个元素包含question、answer、imageRef、difficulty字段。',
'图片问答生成 Prompt 模板')
ON CONFLICT DO NOTHING;