feat(plan): 生成 label_backend 完整实施规划文档
Phase 0:research.md(10项技术决策,无需澄清项) Phase 1:data-model.md(11张表+Redis结构),contracts/(8个模块API契约),quickstart.md(Docker Compose启动+流水线验证) plan.md:宪章11条全部通过,项目结构确认
This commit is contained in:
@@ -208,7 +208,7 @@ CREATE TABLE source_data (
|
||||
bucket_name VARCHAR(100) NOT NULL,
|
||||
parent_source_id BIGINT REFERENCES source_data(id), -- 视频转文本时指向原视频
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'PENDING',
|
||||
-- PENDING / PREPROCESSING / EXTRACTING / QA_REVIEW / APPROVED / REJECTED
|
||||
-- PENDING / PREPROCESSING / EXTRACTING / QA_REVIEW / APPROVED(无 REJECTED 状态,QA 驳回作用于 annotation_task)
|
||||
reject_reason TEXT,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||||
@@ -961,10 +961,12 @@ public void unclaim(Long taskId) {
|
||||
| 方法 | 路径 | 最低权限 | 说明 |
|
||||
|------|------|----------|------|
|
||||
| POST | `/api/tasks` | ADMIN | 为指定 source 创建 EXTRACTION 任务 |
|
||||
| GET | `/api/tasks/pool` | ANNOTATOR | 查看可领取任务列表(按角色过滤,分页) |
|
||||
| POST | `/api/tasks/{id}/claim` | ANNOTATOR | 领取任务(争抢式) |
|
||||
| GET | `/api/tasks/pool` | ANNOTATOR | 查看可领取任务池(UNCLAIMED 状态)。ANNOTATOR 只看到 EXTRACTION 类型;REVIEWER 只看到 SUBMITTED 状态(即审批队列,与 pending-review 等价);两者均分页,不可无界查询 |
|
||||
| POST | `/api/tasks/{id}/claim` | ANNOTATOR | 领取任务(争抢式,Redis SET NX + DB 乐观锁) |
|
||||
| POST | `/api/tasks/{id}/unclaim` | ANNOTATOR | 放弃任务,退回任务池 |
|
||||
| GET | `/api/tasks/mine` | ANNOTATOR | 查询我领取的任务列表(分页) |
|
||||
| GET | `/api/tasks/mine` | ANNOTATOR | 查询我领取的任务列表(包含 IN_PROGRESS、SUBMITTED、REJECTED 状态,分页) |
|
||||
| POST | `/api/tasks/{id}/reclaim` | ANNOTATOR | 重领被驳回的任务(task.status 必须为 REJECTED 且 claimedBy = 当前用户),状态流转 REJECTED → IN_PROGRESS |
|
||||
| GET | `/api/tasks/pending-review` | REVIEWER | 查看待我审批的任务列表(status = SUBMITTED,分页);REVIEWER 的专属审批入口 |
|
||||
| GET | `/api/tasks/{id}` | ANNOTATOR | 查看任务详情 |
|
||||
| GET | `/api/tasks` | ADMIN | 查询全部任务(支持过滤,分页) |
|
||||
| PUT | `/api/tasks/{id}/reassign` | ADMIN | 强制转移任务归属 |
|
||||
@@ -996,11 +998,16 @@ public void updateResult(Long taskId, String resultJsonStr) {
|
||||
annotationResultMapper.updateResultJson(taskId, resultJsonStr, CompanyContext.get());
|
||||
}
|
||||
|
||||
// 审批通过——级联触发,必须在同一事务内完成
|
||||
// 审批通过——两阶段:事务内完成同步步骤,事务提交后异步触发 QA 生成
|
||||
@Transactional
|
||||
@OperationLog(type = "EXTRACTION_APPROVE")
|
||||
public void approve(Long taskId) {
|
||||
AnnotationTask task = validateAndGetTask(taskId, "SUBMITTED");
|
||||
|
||||
// 自审校验:提交者不能审批自己的任务
|
||||
if (task.getClaimedBy().equals(getCurrentUserId()))
|
||||
throw new BusinessException("SELF_REVIEW_FORBIDDEN", "不允许审批自己提交的任务");
|
||||
|
||||
AnnotationResult result = annotationResultMapper.selectByTaskId(taskId);
|
||||
|
||||
// 1. annotation_result.is_final = true
|
||||
@@ -1016,22 +1023,55 @@ public void approve(Long taskId) {
|
||||
// 3. 写入任务历史
|
||||
insertHistory(taskId, "SUBMITTED", "APPROVED", getCurrentUserId(), null);
|
||||
|
||||
// 4. 调用 AI 生成候选问答对
|
||||
String promptKey = "IMAGE".equals(getSourceType(task)) ? "prompt_qa_gen_image" : "prompt_qa_gen_text";
|
||||
// 4. 发布领域事件,事务提交后异步执行 QA 生成(步骤 5-7)
|
||||
// 注:AI HTTP 调用禁止在 @Transactional 内同步执行——会占用数据库连接直至 AI 响应,
|
||||
// 且 AI 失败会错误地回滚已完成的审批。
|
||||
// 使用 @TransactionalEventListener(phase = AFTER_COMMIT) 保证先提交再触发。
|
||||
eventPublisher.publishEvent(new ExtractionApprovedEvent(taskId, task.getSourceId(),
|
||||
getSourceType(task), CompanyContext.get()));
|
||||
}
|
||||
|
||||
// 驳回——状态回退,标注员可重领
|
||||
@Transactional
|
||||
@OperationLog(type = "EXTRACTION_REJECT")
|
||||
public void reject(Long taskId, String reason) {
|
||||
AnnotationTask task = validateAndGetTask(taskId, "SUBMITTED");
|
||||
|
||||
// 自审校验
|
||||
if (task.getClaimedBy().equals(getCurrentUserId()))
|
||||
throw new BusinessException("SELF_REVIEW_FORBIDDEN", "不允许驳回自己提交的任务");
|
||||
|
||||
StateValidator.assertTransition(TaskStatus.SUBMITTED, TaskStatus.REJECTED, TaskStatus.TRANSITIONS);
|
||||
task.setStatus("REJECTED");
|
||||
taskMapper.updateById(task);
|
||||
insertHistory(taskId, "SUBMITTED", "REJECTED", getCurrentUserId(), reason);
|
||||
// source_data.status 保持 EXTRACTING 不变,待标注员重新提交后再推进
|
||||
}
|
||||
|
||||
// ExtractionApprovedEventListener(@TransactionalEventListener,独立事务)
|
||||
// 负责 5-7 步:AI 调用 → 写 training_dataset → 创建 QA 任务 → 更新 source_data
|
||||
@TransactionalEventListener(phase = TransactionPhase.AFTER_COMMIT)
|
||||
@Transactional(propagation = Propagation.REQUIRES_NEW)
|
||||
public void onExtractionApproved(ExtractionApprovedEvent event) {
|
||||
AnnotationTask task = taskMapper.selectById(event.getTaskId());
|
||||
AnnotationResult result = annotationResultMapper.selectByTaskId(event.getTaskId());
|
||||
|
||||
// 5. 调用 AI 生成候选问答对(在事务外执行,失败不影响审批结果)
|
||||
String promptKey = "IMAGE".equals(event.getSourceType()) ? "prompt_qa_gen_image" : "prompt_qa_gen_text";
|
||||
String promptTemplate = sysConfigService.get(promptKey);
|
||||
QaGenResponse qaResponse = generateQa(task, result, promptTemplate);
|
||||
|
||||
// 5. 将候选问答对写入 training_dataset(PENDING_REVIEW)
|
||||
// 6. 将候选问答对写入 training_dataset(PENDING_REVIEW)
|
||||
List<TrainingDataset> samples = buildTrainingSamples(task, result, qaResponse);
|
||||
trainingDatasetMapper.batchInsert(samples);
|
||||
|
||||
// 6. 创建 QA_GENERATION 阶段任务(UNCLAIMED)
|
||||
// 7. 创建 QA_GENERATION 阶段任务(UNCLAIMED)
|
||||
AnnotationTask qaTask = buildQaTask(task);
|
||||
taskMapper.insert(qaTask);
|
||||
insertHistory(qaTask.getId(), null, "UNCLAIMED", getCurrentUserId(), null);
|
||||
insertHistory(qaTask.getId(), null, "UNCLAIMED", task.getClaimedBy(), null);
|
||||
|
||||
// 7. source_data.status → QA_REVIEW
|
||||
sourceDataMapper.updateStatus(task.getSourceId(), "QA_REVIEW", CompanyContext.get());
|
||||
// 8. source_data.status → QA_REVIEW
|
||||
sourceDataMapper.updateStatus(event.getSourceId(), "QA_REVIEW", event.getCompanyId());
|
||||
}
|
||||
```
|
||||
|
||||
@@ -1057,21 +1097,48 @@ public void approve(Long taskId) {
|
||||
@Transactional
|
||||
@OperationLog(type = "QA_APPROVE")
|
||||
public void approve(Long taskId) {
|
||||
// 1. training_dataset.status → APPROVED
|
||||
// 1. 先校验任务合法性(必须在任何 DB 写入之前执行,避免校验失败时数据已被修改)
|
||||
AnnotationTask task = validateAndGetTask(taskId, "SUBMITTED");
|
||||
|
||||
// 自审校验:提交者不能审批自己的任务
|
||||
if (task.getClaimedBy().equals(getCurrentUserId()))
|
||||
throw new BusinessException("SELF_REVIEW_FORBIDDEN", "不允许审批自己提交的任务");
|
||||
|
||||
// 2. training_dataset.status → APPROVED
|
||||
trainingDatasetMapper.approveByTaskId(taskId, getCurrentUserId(), CompanyContext.get());
|
||||
|
||||
// 2. annotation_task.status → APPROVED
|
||||
AnnotationTask task = validateAndGetTask(taskId, "SUBMITTED");
|
||||
// 3. annotation_task.status → APPROVED
|
||||
StateValidator.assertTransition(TaskStatus.SUBMITTED, TaskStatus.APPROVED, TaskStatus.TRANSITIONS);
|
||||
task.setStatus("APPROVED");
|
||||
task.setCompletedAt(LocalDateTime.now());
|
||||
taskMapper.updateById(task);
|
||||
|
||||
// 3. source_data.status → APPROVED(整条流水线完成)
|
||||
// 4. source_data.status → APPROVED(整条流水线完成)
|
||||
sourceDataMapper.updateStatus(task.getSourceId(), "APPROVED", CompanyContext.get());
|
||||
|
||||
// 4. 写入任务历史
|
||||
// 5. 写入任务历史
|
||||
insertHistory(taskId, "SUBMITTED", "APPROVED", getCurrentUserId(), null);
|
||||
}
|
||||
|
||||
// 驳回问答对——任务退回 IN_PROGRESS,training_dataset 删除候选记录
|
||||
@Transactional
|
||||
@OperationLog(type = "QA_REJECT")
|
||||
public void reject(Long taskId, String reason) {
|
||||
AnnotationTask task = validateAndGetTask(taskId, "SUBMITTED");
|
||||
|
||||
// 自审校验
|
||||
if (task.getClaimedBy().equals(getCurrentUserId()))
|
||||
throw new BusinessException("SELF_REVIEW_FORBIDDEN", "不允许驳回自己提交的任务");
|
||||
|
||||
// 删除本次生成的候选问答对(PENDING_REVIEW 状态),待标注员修改后重新提交
|
||||
trainingDatasetMapper.deleteByTaskId(taskId, CompanyContext.get());
|
||||
|
||||
StateValidator.assertTransition(TaskStatus.SUBMITTED, TaskStatus.REJECTED, TaskStatus.TRANSITIONS);
|
||||
task.setStatus("REJECTED");
|
||||
taskMapper.updateById(task);
|
||||
insertHistory(taskId, "SUBMITTED", "REJECTED", getCurrentUserId(), reason);
|
||||
// source_data.status 保持 QA_REVIEW 不变
|
||||
}
|
||||
```
|
||||
|
||||
**接口清单:**
|
||||
@@ -1241,14 +1308,15 @@ public final class StateValidator {
|
||||
|
||||
```java
|
||||
public enum SourceStatus {
|
||||
PENDING, PREPROCESSING, EXTRACTING, QA_REVIEW, APPROVED, REJECTED;
|
||||
PENDING, PREPROCESSING, EXTRACTING, QA_REVIEW, APPROVED;
|
||||
// 注:source_data 无 REJECTED 状态。QA 阶段驳回的是 annotation_task(→ REJECTED),
|
||||
// 不改变 source_data.status(保持 QA_REVIEW);重新提交后 source_data 随任务推进。
|
||||
|
||||
public static final Map<SourceStatus, Set<SourceStatus>> TRANSITIONS = Map.of(
|
||||
PENDING, Set.of(EXTRACTING, PREPROCESSING),
|
||||
PREPROCESSING, Set.of(PENDING),
|
||||
EXTRACTING, Set.of(QA_REVIEW),
|
||||
QA_REVIEW, Set.of(APPROVED, REJECTED),
|
||||
REJECTED, Set.of(EXTRACTING) // 驳回后可重提
|
||||
QA_REVIEW, Set.of(APPROVED)
|
||||
);
|
||||
}
|
||||
```
|
||||
@@ -1263,7 +1331,7 @@ public enum TaskStatus {
|
||||
UNCLAIMED, Set.of(IN_PROGRESS),
|
||||
IN_PROGRESS, Set.of(SUBMITTED, UNCLAIMED, IN_PROGRESS),
|
||||
// IN_PROGRESS → IN_PROGRESS 用于 ADMIN 强制转移(持有人变更,状态不变)
|
||||
SUBMITTED, Set.oAPPROVED, REJECTED),
|
||||
SUBMITTED, Set.of(APPROVED, REJECTED),
|
||||
REJECTED, Set.of(IN_PROGRESS) // 驳回后重拾
|
||||
);
|
||||
}
|
||||
|
||||
148
specs/001-label-backend-spec/contracts/auth.md
Normal file
148
specs/001-label-backend-spec/contracts/auth.md
Normal file
@@ -0,0 +1,148 @@
|
||||
# API 契约:认证与用户管理
|
||||
|
||||
**统一响应格式**:
|
||||
- 成功:`{"code": "SUCCESS", "data": {...}}`
|
||||
- 成功(无数据):`{"code": "SUCCESS", "data": null}`
|
||||
- 失败:`{"code": "ERROR_CODE", "message": "描述"}`
|
||||
- 分页成功:`{"code": "SUCCESS", "data": {"items": [...], "total": 100, "page": 1, "pageSize": 20}}`
|
||||
|
||||
---
|
||||
|
||||
## POST /api/auth/login
|
||||
|
||||
**权限**: 匿名
|
||||
**描述**: 用户登录,返回会话凭证
|
||||
|
||||
**请求体**:
|
||||
```json
|
||||
{
|
||||
"companyCode": "COMPANY_A",
|
||||
"username": "zhangsan",
|
||||
"password": "plaintext_password"
|
||||
}
|
||||
```
|
||||
|
||||
**成功响应** `200`:
|
||||
```json
|
||||
{
|
||||
"code": "SUCCESS",
|
||||
"data": {
|
||||
"token": "550e8400-e29b-41d4-a716-446655440000",
|
||||
"userId": 1,
|
||||
"username": "zhangsan",
|
||||
"role": "ANNOTATOR",
|
||||
"expiresIn": 7200
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**失败响应**:
|
||||
- `401` `USER_NOT_FOUND`: 用户名或密码错误(不区分哪个错误,防止枚举)
|
||||
- `403` `USER_DISABLED`: 账号已禁用
|
||||
|
||||
---
|
||||
|
||||
## POST /api/auth/logout
|
||||
|
||||
**权限**: 已登录(Bearer Token)
|
||||
**描述**: 退出登录,立即删除 Redis 会话
|
||||
|
||||
**请求头**: `Authorization: Bearer {token}`
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
|
||||
---
|
||||
|
||||
## GET /api/auth/me
|
||||
|
||||
**权限**: 已登录
|
||||
**描述**: 获取当前用户信息
|
||||
|
||||
**响应** `200`:
|
||||
```json
|
||||
{
|
||||
"code": "SUCCESS",
|
||||
"data": {
|
||||
"id": 1,
|
||||
"username": "zhangsan",
|
||||
"realName": "张三",
|
||||
"role": "ANNOTATOR",
|
||||
"companyId": 10,
|
||||
"companyName": "测试公司"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## GET /api/users
|
||||
|
||||
**权限**: ADMIN
|
||||
**描述**: 分页查询本公司用户列表
|
||||
|
||||
**查询参数**: `page`(默认 1)、`pageSize`(默认 20,最大 100)、`role`(可选过滤)、`status`(可选过滤)
|
||||
|
||||
**响应** `200`:
|
||||
```json
|
||||
{
|
||||
"code": "SUCCESS",
|
||||
"data": {
|
||||
"items": [
|
||||
{"id": 1, "username": "zhangsan", "realName": "张三", "role": "ANNOTATOR", "status": "ACTIVE"}
|
||||
],
|
||||
"total": 50,
|
||||
"page": 1,
|
||||
"pageSize": 20
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## POST /api/users
|
||||
|
||||
**权限**: ADMIN
|
||||
**描述**: 创建用户
|
||||
|
||||
**请求体**:
|
||||
```json
|
||||
{
|
||||
"username": "lisi",
|
||||
"password": "initial_password",
|
||||
"realName": "李四",
|
||||
"role": "ANNOTATOR"
|
||||
}
|
||||
```
|
||||
|
||||
**响应** `201`: `{"code": "SUCCESS", "data": {"id": 2, "username": "lisi", ...}}`
|
||||
**失败**: `409` `USERNAME_EXISTS`: 用户名已存在
|
||||
|
||||
---
|
||||
|
||||
## PUT /api/users/{id}
|
||||
|
||||
**权限**: ADMIN
|
||||
**描述**: 更新用户基本信息
|
||||
|
||||
**请求体**: `{"realName": "新姓名"}`
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
|
||||
---
|
||||
|
||||
## PUT /api/users/{id}/status
|
||||
|
||||
**权限**: ADMIN
|
||||
**描述**: 启用或禁用账号,立即驱逐权限缓存
|
||||
|
||||
**请求体**: `{"status": "DISABLED"}`
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
|
||||
---
|
||||
|
||||
## PUT /api/users/{id}/role
|
||||
|
||||
**权限**: ADMIN
|
||||
**描述**: 变更用户角色,立即驱逐权限缓存
|
||||
|
||||
**请求体**: `{"role": "REVIEWER"}`
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
**失败**: `400` `INVALID_ROLE`: 角色值不合法
|
||||
53
specs/001-label-backend-spec/contracts/config.md
Normal file
53
specs/001-label-backend-spec/contracts/config.md
Normal file
@@ -0,0 +1,53 @@
|
||||
# API 契约:系统配置
|
||||
|
||||
*所有接口需要 ADMIN 权限*
|
||||
|
||||
---
|
||||
|
||||
## GET /api/config
|
||||
|
||||
**描述**: 获取所有配置项(公司级配置 + 全局默认配置合并,公司级优先)
|
||||
|
||||
**响应** `200`:
|
||||
```json
|
||||
{
|
||||
"code": "SUCCESS",
|
||||
"data": {
|
||||
"items": [
|
||||
{
|
||||
"configKey": "prompt_extract_text",
|
||||
"configValue": "请提取以下文本中的主语-谓语-宾语三元组...",
|
||||
"description": "文本三元组提取 Prompt 模板",
|
||||
"scope": "GLOBAL",
|
||||
"updatedAt": "2026-04-09T00:00:00"
|
||||
},
|
||||
{
|
||||
"configKey": "model_default",
|
||||
"configValue": "glm-4-turbo",
|
||||
"description": "默认 AI 辅助模型",
|
||||
"scope": "COMPANY",
|
||||
"updatedAt": "2026-04-09T09:00:00"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`scope` 字段:`GLOBAL`(来自全局默认)、`COMPANY`(来自公司级覆盖)
|
||||
|
||||
---
|
||||
|
||||
## PUT /api/config/{key}
|
||||
|
||||
**描述**: 更新单项配置(若公司级配置不存在则创建;若存在则覆盖)
|
||||
|
||||
**请求体**:
|
||||
```json
|
||||
{
|
||||
"configValue": "glm-4-turbo",
|
||||
"description": "升级到 GLM-4-Turbo 模型"
|
||||
}
|
||||
```
|
||||
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
**失败**: `400` `UNKNOWN_CONFIG_KEY`: 未知的配置键(防止拼写错误创建无效配置)
|
||||
113
specs/001-label-backend-spec/contracts/export.md
Normal file
113
specs/001-label-backend-spec/contracts/export.md
Normal file
@@ -0,0 +1,113 @@
|
||||
# API 契约:训练数据导出与微调
|
||||
|
||||
*所有接口需要 ADMIN 权限*
|
||||
|
||||
---
|
||||
|
||||
## GET /api/training/samples
|
||||
|
||||
**描述**: 分页查询已审批、可导出的训练样本
|
||||
|
||||
**查询参数**: `page`、`pageSize`、`sampleType`(TEXT / IMAGE / VIDEO_FRAME,可选)、`exported`(true/false,可选)
|
||||
|
||||
**响应** `200`:
|
||||
```json
|
||||
{
|
||||
"code": "SUCCESS",
|
||||
"data": {
|
||||
"items": [
|
||||
{
|
||||
"id": 1001,
|
||||
"sampleType": "TEXT",
|
||||
"status": "APPROVED",
|
||||
"exportBatchId": null,
|
||||
"sourceId": 50,
|
||||
"createdAt": "2026-04-09T12:00:00"
|
||||
}
|
||||
],
|
||||
"total": 500,
|
||||
"page": 1,
|
||||
"pageSize": 20
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## POST /api/export/batch
|
||||
|
||||
**描述**: 创建导出批次,合并选定样本为 JSONL 并上传 RustFS
|
||||
|
||||
**请求体**:
|
||||
```json
|
||||
{
|
||||
"sampleIds": [1001, 1002, 1003]
|
||||
}
|
||||
```
|
||||
|
||||
**成功响应** `201`:
|
||||
```json
|
||||
{
|
||||
"code": "SUCCESS",
|
||||
"data": {
|
||||
"id": 10,
|
||||
"batchUuid": "550e8400-e29b-41d4-a716-446655440000",
|
||||
"sampleCount": 3,
|
||||
"datasetFilePath": "export/550e8400.jsonl",
|
||||
"finetuneStatus": "NOT_STARTED"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**失败**:
|
||||
- `400` `INVALID_SAMPLES`: 部分样本不处于 APPROVED 状态
|
||||
- `400` `EMPTY_SAMPLES`: sampleIds 为空
|
||||
|
||||
---
|
||||
|
||||
## POST /api/export/{batchId}/finetune
|
||||
|
||||
**描述**: 向 GLM AI 服务提交微调任务
|
||||
|
||||
**响应** `200`:
|
||||
```json
|
||||
{
|
||||
"code": "SUCCESS",
|
||||
"data": {
|
||||
"glmJobId": "glm-finetune-abc123",
|
||||
"finetuneStatus": "RUNNING"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**失败**: `409` `FINETUNE_ALREADY_STARTED`: 微调任务已提交
|
||||
|
||||
---
|
||||
|
||||
## GET /api/export/{batchId}/status
|
||||
|
||||
**描述**: 查询微调任务状态(向 AI 服务实时查询)
|
||||
|
||||
**响应** `200`:
|
||||
```json
|
||||
{
|
||||
"code": "SUCCESS",
|
||||
"data": {
|
||||
"batchId": 10,
|
||||
"glmJobId": "glm-finetune-abc123",
|
||||
"finetuneStatus": "RUNNING",
|
||||
"progress": 45,
|
||||
"errorMessage": null
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## GET /api/export/list
|
||||
|
||||
**描述**: 分页查询所有导出批次
|
||||
|
||||
**查询参数**: `page`、`pageSize`
|
||||
|
||||
**响应** `200`: 批次列表(含 finetuneStatus、sampleCount、createdAt 等字段)
|
||||
97
specs/001-label-backend-spec/contracts/extraction.md
Normal file
97
specs/001-label-backend-spec/contracts/extraction.md
Normal file
@@ -0,0 +1,97 @@
|
||||
# API 契约:提取阶段标注工作台
|
||||
|
||||
---
|
||||
|
||||
## GET /api/extraction/{taskId}
|
||||
|
||||
**权限**: ANNOTATOR(且为任务持有者)
|
||||
**描述**: 获取当前提取结果(含 AI 预标注候选,供人工编辑)
|
||||
|
||||
**响应** `200`:
|
||||
```json
|
||||
{
|
||||
"code": "SUCCESS",
|
||||
"data": {
|
||||
"taskId": 101,
|
||||
"sourceType": "TEXT",
|
||||
"sourceFilePath": "text/202604/50.txt",
|
||||
"isFinal": false,
|
||||
"resultJson": {
|
||||
"items": [
|
||||
{
|
||||
"subject": "北京",
|
||||
"predicate": "是...首都",
|
||||
"object": "中国",
|
||||
"sourceText": "北京是中国的首都",
|
||||
"startOffset": 0,
|
||||
"endOffset": 8
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## PUT /api/extraction/{taskId}
|
||||
|
||||
**权限**: ANNOTATOR(且为任务持有者)
|
||||
**描述**: 更新提取结果(**整体 JSONB 覆盖,PUT 语义,禁止局部 PATCH**)
|
||||
|
||||
**请求体**:
|
||||
```json
|
||||
{
|
||||
"items": [
|
||||
{
|
||||
"subject": "北京",
|
||||
"predicate": "是...首都",
|
||||
"object": "中国",
|
||||
"sourceText": "北京是中国的首都",
|
||||
"startOffset": 0,
|
||||
"endOffset": 8
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
**失败**: `400` `INVALID_JSON`: 提交的 JSON 格式不合法
|
||||
|
||||
---
|
||||
|
||||
## POST /api/extraction/{taskId}/submit
|
||||
|
||||
**权限**: ANNOTATOR(且为任务持有者)
|
||||
**描述**: 提交提取结果,任务状态 IN_PROGRESS → SUBMITTED,进入审批队列
|
||||
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
**失败**: `409` `INVALID_STATE`: 任务当前状态不允许提交
|
||||
|
||||
---
|
||||
|
||||
## POST /api/extraction/{taskId}/approve
|
||||
|
||||
**权限**: REVIEWER
|
||||
**描述**: 审批通过。**两阶段操作**:
|
||||
1. 同步(同一事务):`annotation_result.is_final = true`,任务状态 SUBMITTED → APPROVED,写任务历史
|
||||
2. 异步(事务提交后):AI 生成候选问答对 → 写 training_dataset → 创建 QA_GENERATION 任务 → source_data 状态推进
|
||||
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
**失败**:
|
||||
- `403` `SELF_REVIEW_FORBIDDEN`: 不允许审批自己提交的任务
|
||||
- `409` `INVALID_STATE`: 任务状态不为 SUBMITTED
|
||||
|
||||
---
|
||||
|
||||
## POST /api/extraction/{taskId}/reject
|
||||
|
||||
**权限**: REVIEWER
|
||||
**描述**: 驳回提取结果,任务状态 SUBMITTED → REJECTED,标注员可重领
|
||||
|
||||
**请求体**: `{"reason": "三元组边界不准确,请重新标注"}`
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
**失败**:
|
||||
- `403` `SELF_REVIEW_FORBIDDEN`: 不允许驳回自己提交的任务
|
||||
- `409` `INVALID_STATE`: 任务状态不为 SUBMITTED
|
||||
- `400` `REASON_REQUIRED`: 驳回原因不能为空
|
||||
83
specs/001-label-backend-spec/contracts/qa.md
Normal file
83
specs/001-label-backend-spec/contracts/qa.md
Normal file
@@ -0,0 +1,83 @@
|
||||
# API 契约:问答生成阶段
|
||||
|
||||
---
|
||||
|
||||
## GET /api/qa/{taskId}
|
||||
|
||||
**权限**: ANNOTATOR(且为任务持有者)
|
||||
**描述**: 获取候选问答对列表(由提取阶段审批触发 AI 生成)
|
||||
|
||||
**响应** `200`:
|
||||
```json
|
||||
{
|
||||
"code": "SUCCESS",
|
||||
"data": {
|
||||
"taskId": 202,
|
||||
"sourceType": "TEXT",
|
||||
"items": [
|
||||
{
|
||||
"id": 1001,
|
||||
"question": "北京是哪个国家的首都?",
|
||||
"answer": "中国",
|
||||
"status": "PENDING_REVIEW"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## PUT /api/qa/{taskId}
|
||||
|
||||
**权限**: ANNOTATOR(且为任务持有者)
|
||||
**描述**: 修改问答对(**整体覆盖,PUT 语义**,每次提交包含完整 items 数组)
|
||||
|
||||
**请求体**:
|
||||
```json
|
||||
{
|
||||
"items": [
|
||||
{
|
||||
"question": "北京是哪个国家的首都?",
|
||||
"answer": "中国。北京自1949年起成为中华人民共和国的首都。"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
|
||||
---
|
||||
|
||||
## POST /api/qa/{taskId}/submit
|
||||
|
||||
**权限**: ANNOTATOR(且为任务持有者)
|
||||
**描述**: 提交问答对,任务状态 IN_PROGRESS → SUBMITTED
|
||||
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
**失败**: `409` `INVALID_STATE`: 任务当前状态不允许提交
|
||||
|
||||
---
|
||||
|
||||
## POST /api/qa/{taskId}/approve
|
||||
|
||||
**权限**: REVIEWER
|
||||
**描述**: 审批通过。同一事务中:先校验任务 → training_dataset 状态 → 任务状态 SUBMITTED → APPROVED → source_data 状态 → 写任务历史
|
||||
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
**失败**:
|
||||
- `403` `SELF_REVIEW_FORBIDDEN`: 不允许审批自己提交的任务
|
||||
- `409` `INVALID_STATE`: 任务状态不为 SUBMITTED
|
||||
|
||||
---
|
||||
|
||||
## POST /api/qa/{taskId}/reject
|
||||
|
||||
**权限**: REVIEWER
|
||||
**描述**: 驳回问答对,删除候选记录,任务状态 SUBMITTED → REJECTED
|
||||
|
||||
**请求体**: `{"reason": "问题描述不准确,请修改"}`
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
**失败**:
|
||||
- `403` `SELF_REVIEW_FORBIDDEN`: 不允许驳回自己提交的任务
|
||||
- `400` `REASON_REQUIRED`: 驳回原因不能为空
|
||||
96
specs/001-label-backend-spec/contracts/source.md
Normal file
96
specs/001-label-backend-spec/contracts/source.md
Normal file
@@ -0,0 +1,96 @@
|
||||
# API 契约:资料管理
|
||||
|
||||
---
|
||||
|
||||
## POST /api/source/upload
|
||||
|
||||
**权限**: UPLOADER
|
||||
**描述**: 上传文件,创建 source_data 记录,文件字节流写入 RustFS
|
||||
|
||||
**请求**: `multipart/form-data`,字段:`file`(必填)、`dataType`(TEXT / IMAGE / VIDEO)
|
||||
|
||||
**响应** `201`:
|
||||
```json
|
||||
{
|
||||
"code": "SUCCESS",
|
||||
"data": {
|
||||
"id": 50,
|
||||
"fileName": "document.txt",
|
||||
"dataType": "TEXT",
|
||||
"fileSize": 204800,
|
||||
"status": "PENDING",
|
||||
"createdAt": "2026-04-09T10:00:00"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**失败**:
|
||||
- `400` `INVALID_TYPE`: 不支持的资料类型
|
||||
- `400` `FILE_EMPTY`: 文件为空
|
||||
|
||||
---
|
||||
|
||||
## GET /api/source/list
|
||||
|
||||
**权限**: UPLOADER
|
||||
**描述**: 分页查询资料列表。UPLOADER 只见自己上传的资料;ADMIN 见本公司全部资料
|
||||
|
||||
**查询参数**: `page`(默认 1)、`pageSize`(默认 20)、`dataType`(可选)、`status`(可选)
|
||||
|
||||
**响应** `200`:
|
||||
```json
|
||||
{
|
||||
"code": "SUCCESS",
|
||||
"data": {
|
||||
"items": [
|
||||
{
|
||||
"id": 50,
|
||||
"fileName": "document.txt",
|
||||
"dataType": "TEXT",
|
||||
"status": "PENDING",
|
||||
"uploaderId": 1,
|
||||
"createdAt": "2026-04-09T10:00:00"
|
||||
}
|
||||
],
|
||||
"total": 120,
|
||||
"page": 1,
|
||||
"pageSize": 20
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## GET /api/source/{id}
|
||||
|
||||
**权限**: UPLOADER
|
||||
**描述**: 查看资料详情,含 RustFS 预签名临时下载链接(有效期 15 分钟)
|
||||
|
||||
**响应** `200`:
|
||||
```json
|
||||
{
|
||||
"code": "SUCCESS",
|
||||
"data": {
|
||||
"id": 50,
|
||||
"dataType": "TEXT",
|
||||
"fileName": "document.txt",
|
||||
"fileSize": 204800,
|
||||
"status": "EXTRACTING",
|
||||
"presignedUrl": "https://rustfs.example.com/...",
|
||||
"parentSourceId": null,
|
||||
"createdAt": "2026-04-09T10:00:00"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## DELETE /api/source/{id}
|
||||
|
||||
**权限**: ADMIN
|
||||
**描述**: 删除资料(同时删除 RustFS 文件及元数据)
|
||||
|
||||
**前置条件**: 资料状态为 PENDING(不允许删除已进入流水线的资料)
|
||||
|
||||
**响应** `204`: 无响应体
|
||||
**失败**: `409` `SOURCE_IN_PIPELINE`: 资料已进入标注流程,不可删除
|
||||
150
specs/001-label-backend-spec/contracts/tasks.md
Normal file
150
specs/001-label-backend-spec/contracts/tasks.md
Normal file
@@ -0,0 +1,150 @@
|
||||
# API 契约:任务管理
|
||||
|
||||
---
|
||||
|
||||
## GET /api/tasks/pool
|
||||
|
||||
**权限**: ANNOTATOR
|
||||
**描述**: 查看可领取任务池。角色过滤规则:
|
||||
- ANNOTATOR:仅返回 EXTRACTION 阶段、status=UNCLAIMED 的任务
|
||||
- REVIEWER/ADMIN:仅返回 SUBMITTED 状态(待审批队列)的任务
|
||||
|
||||
**查询参数**: `page`(默认 1)、`pageSize`(默认 20)
|
||||
|
||||
**响应** `200`:
|
||||
```json
|
||||
{
|
||||
"code": "SUCCESS",
|
||||
"data": {
|
||||
"items": [
|
||||
{
|
||||
"id": 101,
|
||||
"sourceId": 50,
|
||||
"sourceType": "TEXT",
|
||||
"phase": "EXTRACTION",
|
||||
"status": "UNCLAIMED",
|
||||
"createdAt": "2026-04-09T10:00:00"
|
||||
}
|
||||
],
|
||||
"total": 30,
|
||||
"page": 1,
|
||||
"pageSize": 20
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## GET /api/tasks/pending-review
|
||||
|
||||
**权限**: REVIEWER
|
||||
**描述**: REVIEWER 专属审批入口,查看 status=SUBMITTED 的任务列表
|
||||
|
||||
**查询参数**: `page`、`pageSize`、`phase`(可选,EXTRACTION / QA_GENERATION)
|
||||
|
||||
**响应**: 同 `/api/tasks/pool` 结构
|
||||
|
||||
---
|
||||
|
||||
## POST /api/tasks/{id}/claim
|
||||
|
||||
**权限**: ANNOTATOR
|
||||
**描述**: 领取任务(双重并发保障:Redis SET NX + DB 乐观约束)
|
||||
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
**失败**:
|
||||
- `409` `TASK_CLAIMED`: 任务已被他人领取
|
||||
- `404` `TASK_NOT_FOUND`: 任务不存在
|
||||
|
||||
---
|
||||
|
||||
## POST /api/tasks/{id}/unclaim
|
||||
|
||||
**权限**: ANNOTATOR(且为任务持有者)
|
||||
**描述**: 放弃任务,退回任务池(status: IN_PROGRESS → UNCLAIMED)
|
||||
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
**失败**: `403` `NOT_TASK_OWNER`: 非任务持有者
|
||||
|
||||
---
|
||||
|
||||
## GET /api/tasks/mine
|
||||
|
||||
**权限**: ANNOTATOR
|
||||
**描述**: 查询当前用户领取的任务(含 IN_PROGRESS、SUBMITTED、REJECTED 三种状态)
|
||||
|
||||
**查询参数**: `page`、`pageSize`、`status`(可选过滤)
|
||||
|
||||
**响应**: 同任务列表结构,含 `rejectReason` 字段(REJECTED 状态时非空)
|
||||
|
||||
---
|
||||
|
||||
## POST /api/tasks/{id}/reclaim
|
||||
|
||||
**权限**: ANNOTATOR
|
||||
**描述**: 重领被驳回的任务(status 必须为 REJECTED 且 claimedBy = 当前用户,流转 REJECTED → IN_PROGRESS)
|
||||
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
**失败**:
|
||||
- `403` `NOT_TASK_OWNER`: 非原持有者
|
||||
- `409` `INVALID_STATE`: 任务状态不为 REJECTED
|
||||
|
||||
---
|
||||
|
||||
## GET /api/tasks/{id}
|
||||
|
||||
**权限**: ANNOTATOR
|
||||
**描述**: 查看任务详情(含驳回原因、历史记录摘要)
|
||||
|
||||
**响应** `200`:
|
||||
```json
|
||||
{
|
||||
"code": "SUCCESS",
|
||||
"data": {
|
||||
"id": 101,
|
||||
"sourceId": 50,
|
||||
"phase": "EXTRACTION",
|
||||
"status": "IN_PROGRESS",
|
||||
"claimedBy": 1,
|
||||
"claimedAt": "2026-04-09T10:05:00",
|
||||
"rejectReason": null,
|
||||
"historyCount": 2
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## GET /api/tasks
|
||||
|
||||
**权限**: ADMIN
|
||||
**描述**: 查询全部任务(支持过滤,分页)
|
||||
|
||||
**查询参数**: `page`、`pageSize`、`phase`、`status`、`claimedBy`、`sourceId`
|
||||
|
||||
---
|
||||
|
||||
## PUT /api/tasks/{id}/reassign
|
||||
|
||||
**权限**: ADMIN
|
||||
**描述**: 强制转移任务归属(status 保持 IN_PROGRESS,仅 claimedBy 变更)
|
||||
|
||||
**请求体**: `{"newOwnerId": 5, "reason": "原持有者长期未操作"}`
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
|
||||
---
|
||||
|
||||
## POST /api/tasks
|
||||
|
||||
**权限**: ADMIN
|
||||
**描述**: 为指定资料创建 EXTRACTION 任务
|
||||
|
||||
**请求体**:
|
||||
```json
|
||||
{
|
||||
"sourceId": 50,
|
||||
"taskType": "AI_ASSISTED",
|
||||
"aiModel": "glm-4"
|
||||
}
|
||||
```
|
||||
**响应** `201`: `{"code": "SUCCESS", "data": {"id": 101, ...}}`
|
||||
87
specs/001-label-backend-spec/contracts/video.md
Normal file
87
specs/001-label-backend-spec/contracts/video.md
Normal file
@@ -0,0 +1,87 @@
|
||||
# API 契约:视频处理
|
||||
|
||||
---
|
||||
|
||||
## POST /api/video/process
|
||||
|
||||
**权限**: ADMIN
|
||||
**描述**: 为已上传的视频资料创建异步处理任务
|
||||
|
||||
**请求体**:
|
||||
```json
|
||||
{
|
||||
"sourceId": 50,
|
||||
"jobType": "FRAME_EXTRACT",
|
||||
"params": {
|
||||
"frameInterval": 30,
|
||||
"mode": "FRAME"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
jobType 可选值:`FRAME_EXTRACT`(帧提取)、`VIDEO_TO_TEXT`(片段转文字)
|
||||
|
||||
**响应** `201`:
|
||||
```json
|
||||
{
|
||||
"code": "SUCCESS",
|
||||
"data": {
|
||||
"jobId": 200,
|
||||
"sourceId": 50,
|
||||
"jobType": "FRAME_EXTRACT",
|
||||
"status": "PENDING"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## GET /api/video/jobs/{jobId}
|
||||
|
||||
**权限**: ADMIN
|
||||
**描述**: 查询视频处理任务状态
|
||||
|
||||
**响应** `200`:
|
||||
```json
|
||||
{
|
||||
"code": "SUCCESS",
|
||||
"data": {
|
||||
"id": 200,
|
||||
"status": "RUNNING",
|
||||
"processedUnits": 15,
|
||||
"totalUnits": 50,
|
||||
"retryCount": 0,
|
||||
"errorMessage": null,
|
||||
"startedAt": "2026-04-09T10:05:00"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## POST /api/video/jobs/{jobId}/reset
|
||||
|
||||
**权限**: ADMIN
|
||||
**描述**: 手动重置 FAILED 状态的任务为 PENDING,允许重新触发(FAILED → PENDING 不在自动状态机中)
|
||||
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
**失败**: `409` `INVALID_STATE`: 任务状态不为 FAILED
|
||||
|
||||
---
|
||||
|
||||
## POST /api/video/callback(内部接口)
|
||||
|
||||
**权限**: AI 服务内部调用(IP 白名单 / 服务密钥)
|
||||
**描述**: AI 服务回调,通知视频处理结果(幂等:重复成功回调静默忽略)
|
||||
|
||||
**请求体**:
|
||||
```json
|
||||
{
|
||||
"jobId": 200,
|
||||
"success": true,
|
||||
"outputPath": "frames/50/",
|
||||
"errorMessage": null
|
||||
}
|
||||
```
|
||||
|
||||
**响应** `200`: `{"code": "SUCCESS", "data": null}`
|
||||
355
specs/001-label-backend-spec/data-model.md
Normal file
355
specs/001-label-backend-spec/data-model.md
Normal file
@@ -0,0 +1,355 @@
|
||||
# 数据模型:label_backend
|
||||
|
||||
**日期**: 2026-04-09
|
||||
**分支**: `001-label-backend-spec`
|
||||
|
||||
---
|
||||
|
||||
## 实体关系概览
|
||||
|
||||
```
|
||||
sys_company ─┬─ sys_user (company_id FK)
|
||||
├─ source_data (company_id FK)
|
||||
│ └─ source_data (parent_source_id 自引用,视频溯源链)
|
||||
├─ annotation_task (company_id FK)
|
||||
│ ├─ annotation_result (task_id FK)
|
||||
│ └─ annotation_task_history (task_id FK)
|
||||
├─ training_dataset (company_id FK)
|
||||
├─ export_batch (company_id FK)
|
||||
├─ sys_config (company_id FK,可为 NULL 表示全局默认)
|
||||
├─ sys_operation_log (company_id FK)
|
||||
└─ video_process_job (company_id FK)
|
||||
```
|
||||
|
||||
**多租户规则**:除 `sys_company` 本身外,所有业务表均包含 `company_id NOT NULL`。查询时由 `TenantLineInnerInterceptor` 自动注入 `WHERE company_id = ?`。唯一例外:`sys_config` 允许 `company_id = NULL` 表示全局默认配置。
|
||||
|
||||
---
|
||||
|
||||
## 实体详情
|
||||
|
||||
### 1. sys_company — 公司(租户)
|
||||
|
||||
| 字段 | 类型 | 约束 | 说明 |
|
||||
|------|------|------|------|
|
||||
| id | BIGSERIAL | PK | 自增主键 |
|
||||
| company_name | VARCHAR(100) | NOT NULL UNIQUE | 公司名称 |
|
||||
| company_code | VARCHAR(50) | NOT NULL UNIQUE | 公司编码 |
|
||||
| status | VARCHAR(10) | NOT NULL DEFAULT 'ACTIVE' | ACTIVE / DISABLED |
|
||||
| created_at | TIMESTAMP | NOT NULL DEFAULT NOW() | |
|
||||
| updated_at | TIMESTAMP | NOT NULL DEFAULT NOW() | |
|
||||
|
||||
**状态**: 无状态机(仅 ACTIVE/DISABLED 标志)
|
||||
|
||||
---
|
||||
|
||||
### 2. sys_user — 用户
|
||||
|
||||
| 字段 | 类型 | 约束 | 说明 |
|
||||
|------|------|------|------|
|
||||
| id | BIGSERIAL | PK | |
|
||||
| company_id | BIGINT | NOT NULL FK→sys_company | 租户隔离键 |
|
||||
| username | VARCHAR(50) | NOT NULL | 同公司内唯一 |
|
||||
| password_hash | VARCHAR(255) | NOT NULL | BCrypt 强度≥10,禁止序列化到响应 |
|
||||
| real_name | VARCHAR(50) | — | |
|
||||
| role | VARCHAR(20) | NOT NULL | UPLOADER / ANNOTATOR / REVIEWER / ADMIN |
|
||||
| status | VARCHAR(10) | NOT NULL DEFAULT 'ACTIVE' | ACTIVE / DISABLED |
|
||||
| created_at / updated_at | TIMESTAMP | NOT NULL | |
|
||||
|
||||
**约束**: `UNIQUE(company_id, username)`
|
||||
**索引**: `(company_id)`
|
||||
**角色继承**: ADMIN ⊃ REVIEWER ⊃ ANNOTATOR ⊃ UPLOADER(由 Shiro Realm 的 addInheritedRoles() 实现)
|
||||
|
||||
---
|
||||
|
||||
### 3. source_data — 原始资料
|
||||
|
||||
| 字段 | 类型 | 约束 | 说明 |
|
||||
|------|------|------|------|
|
||||
| id | BIGSERIAL | PK | |
|
||||
| company_id | BIGINT | NOT NULL FK→sys_company | |
|
||||
| uploader_id | BIGINT | FK→sys_user | |
|
||||
| data_type | VARCHAR(20) | NOT NULL | TEXT / IMAGE / VIDEO |
|
||||
| file_path | VARCHAR(500) | NOT NULL | RustFS 对象路径 |
|
||||
| file_name | VARCHAR(255) | NOT NULL | 原始文件名 |
|
||||
| file_size | BIGINT | — | 字节数 |
|
||||
| bucket_name | VARCHAR(100) | NOT NULL | RustFS 桶名 |
|
||||
| parent_source_id | BIGINT | FK→source_data | 视频片段转文本时指向原视频 |
|
||||
| status | VARCHAR(20) | NOT NULL DEFAULT 'PENDING' | 见状态机 |
|
||||
| reject_reason | TEXT | — | 保留字段(当前无 REJECTED 状态) |
|
||||
| created_at / updated_at | TIMESTAMP | NOT NULL | |
|
||||
|
||||
**索引**: `(company_id)`、`(company_id, status)`、`(parent_source_id)`
|
||||
|
||||
**状态机**:
|
||||
```
|
||||
PENDING → EXTRACTING(直接上传的文本/图片)
|
||||
PENDING → PREPROCESSING(视频上传后)
|
||||
PREPROCESSING → PENDING(视频预处理完成后进入标注流程)
|
||||
EXTRACTING → QA_REVIEW(提取任务审批通过后)
|
||||
QA_REVIEW → APPROVED(QA 任务审批通过后,整条流水线完成)
|
||||
```
|
||||
|
||||
*注:source_data 无 REJECTED 状态。QA 阶段驳回作用于 annotation_task(→REJECTED),source_data 保持 QA_REVIEW 不变。*
|
||||
|
||||
---
|
||||
|
||||
### 4. annotation_task — 标注任务
|
||||
|
||||
| 字段 | 类型 | 约束 | 说明 |
|
||||
|------|------|------|------|
|
||||
| id | BIGSERIAL | PK | |
|
||||
| company_id | BIGINT | NOT NULL FK→sys_company | |
|
||||
| source_id | BIGINT | NOT NULL FK→source_data | |
|
||||
| phase | VARCHAR(20) | NOT NULL | EXTRACTION / QA_GENERATION |
|
||||
| task_type | VARCHAR(20) | NOT NULL | AI_ASSISTED / MANUAL |
|
||||
| ai_model | VARCHAR(50) | — | 使用的 AI 模型 |
|
||||
| video_unit_type | VARCHAR(20) | — | FRAME(视频帧模式)/ NULL |
|
||||
| video_unit_info | JSONB | — | `{frame_index, time_sec, frame_path}` |
|
||||
| claimed_by | BIGINT | FK→sys_user | 当前持有者 |
|
||||
| claimed_at | TIMESTAMP | — | |
|
||||
| status | VARCHAR(20) | NOT NULL DEFAULT 'UNCLAIMED' | 见状态机 |
|
||||
| reject_reason | TEXT | — | 驳回原因 |
|
||||
| submitted_at | TIMESTAMP | — | |
|
||||
| completed_at | TIMESTAMP | — | |
|
||||
| created_at / updated_at | TIMESTAMP | NOT NULL | |
|
||||
|
||||
**索引**: `(company_id)`、`(company_id, phase, status)`(任务池查询)、`(claimed_by, status)`(我的任务)
|
||||
|
||||
**状态机**:
|
||||
```
|
||||
UNCLAIMED → IN_PROGRESS(领取)
|
||||
IN_PROGRESS → SUBMITTED(提交)
|
||||
IN_PROGRESS → UNCLAIMED(放弃)
|
||||
IN_PROGRESS → IN_PROGRESS(ADMIN 强制转移,持有人变更,状态不变)
|
||||
SUBMITTED → APPROVED(审批通过)
|
||||
SUBMITTED → REJECTED(审批驳回)
|
||||
REJECTED → IN_PROGRESS(标注员重领)
|
||||
```
|
||||
|
||||
**并发控制**: 领取时双重保障:① Redis `SET NX task:claim:{taskId}` TTL 30s;② DB `UPDATE ... WHERE status='UNCLAIMED'` 影响行数为 0 时返回错误
|
||||
|
||||
---
|
||||
|
||||
### 5. annotation_result — 标注结果(提取阶段)
|
||||
|
||||
| 字段 | 类型 | 约束 | 说明 |
|
||||
|------|------|------|------|
|
||||
| id | BIGSERIAL | PK | |
|
||||
| company_id | BIGINT | NOT NULL FK→sys_company | |
|
||||
| task_id | BIGINT | NOT NULL FK→annotation_task | |
|
||||
| result_json | JSONB | NOT NULL | 整体覆盖,禁止局部 PATCH |
|
||||
| is_final | BOOLEAN | NOT NULL DEFAULT FALSE | 审批通过后置 TRUE |
|
||||
| submitted_by | BIGINT | FK→sys_user | |
|
||||
| created_at / updated_at | TIMESTAMP | NOT NULL | |
|
||||
|
||||
**result_json 结构**(文本三元组示例):
|
||||
```json
|
||||
{
|
||||
"items": [
|
||||
{
|
||||
"subject": "北京",
|
||||
"predicate": "是...首都",
|
||||
"object": "中国",
|
||||
"source_text": "北京是中国的首都",
|
||||
"start_offset": 0,
|
||||
"end_offset": 8
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**result_json 结构**(图片四元组示例):
|
||||
```json
|
||||
{
|
||||
"items": [
|
||||
{
|
||||
"subject": "猫",
|
||||
"relation": "坐在",
|
||||
"object": "椅子",
|
||||
"modifier": "白色的",
|
||||
"bbox": [100, 200, 300, 400],
|
||||
"crop_path": "crops/123/0.jpg"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**索引**: `(task_id)`、`(company_id, is_final)`
|
||||
|
||||
---
|
||||
|
||||
### 6. training_dataset — 训练样本
|
||||
|
||||
| 字段 | 类型 | 约束 | 说明 |
|
||||
|------|------|------|------|
|
||||
| id | BIGSERIAL | PK | |
|
||||
| company_id | BIGINT | NOT NULL FK→sys_company | |
|
||||
| task_id | BIGINT | NOT NULL FK→annotation_task | |
|
||||
| source_id | BIGINT | NOT NULL FK→source_data | |
|
||||
| extraction_result_id | BIGINT | NOT NULL FK→annotation_result | |
|
||||
| sample_type | VARCHAR(20) | NOT NULL | TEXT / IMAGE / VIDEO_FRAME |
|
||||
| glm_format_json | JSONB | NOT NULL | GLM 微调格式 |
|
||||
| export_batch_id | VARCHAR(50) | — | NULL 表示未导出 |
|
||||
| status | VARCHAR(20) | NOT NULL DEFAULT 'PENDING_REVIEW' | 见状态机 |
|
||||
| reject_reason | TEXT | — | |
|
||||
| reviewed_by | BIGINT | FK→sys_user | |
|
||||
| exported_at | TIMESTAMP | — | |
|
||||
| created_at / updated_at | TIMESTAMP | NOT NULL | |
|
||||
|
||||
**状态机**:
|
||||
```
|
||||
PENDING_REVIEW → APPROVED(QA 审批通过)
|
||||
PENDING_REVIEW → REJECTED(QA 审批驳回)
|
||||
REJECTED → PENDING_REVIEW(标注员修改后重提)
|
||||
```
|
||||
|
||||
**glm_format_json 结构**:
|
||||
```json
|
||||
{
|
||||
"conversations": [
|
||||
{"role": "user", "content": "..."},
|
||||
{"role": "assistant", "content": "..."}
|
||||
],
|
||||
"source_type": "TEXT"
|
||||
}
|
||||
```
|
||||
|
||||
**索引**: `(company_id)`、`(company_id, status)`、`(export_batch_id)`
|
||||
|
||||
---
|
||||
|
||||
### 7. export_batch — 导出批次
|
||||
|
||||
| 字段 | 类型 | 约束 | 说明 |
|
||||
|------|------|------|------|
|
||||
| id | BIGSERIAL | PK | |
|
||||
| company_id | BIGINT | NOT NULL FK→sys_company | |
|
||||
| batch_uuid | VARCHAR(50) | NOT NULL UNIQUE | 批次标识符 |
|
||||
| dataset_file_path | VARCHAR(500) | — | RustFS JSONL 路径 |
|
||||
| sample_count | INT | NOT NULL DEFAULT 0 | |
|
||||
| glm_job_id | VARCHAR(100) | — | 微调任务 ID |
|
||||
| finetune_status | VARCHAR(20) | NOT NULL DEFAULT 'NOT_STARTED' | 见状态 |
|
||||
| error_message | TEXT | — | |
|
||||
| created_by | BIGINT | FK→sys_user | |
|
||||
| created_at / updated_at | TIMESTAMP | NOT NULL | |
|
||||
|
||||
**finetune_status 值**: NOT_STARTED / RUNNING / SUCCESS / FAILED
|
||||
|
||||
**索引**: `(company_id)`
|
||||
|
||||
---
|
||||
|
||||
### 8. sys_config — 系统配置
|
||||
|
||||
| 字段 | 类型 | 约束 | 说明 |
|
||||
|------|------|------|------|
|
||||
| id | BIGSERIAL | PK | |
|
||||
| company_id | BIGINT | FK→sys_company,可 NULL | NULL = 全局默认配置 |
|
||||
| config_key | VARCHAR(100) | NOT NULL | |
|
||||
| config_value | TEXT | NOT NULL | |
|
||||
| description | TEXT | — | |
|
||||
| updated_by | BIGINT | FK→sys_user | |
|
||||
| updated_at | TIMESTAMP | NOT NULL | |
|
||||
|
||||
**约束**: `UNIQUE(company_id, config_key)`
|
||||
**查询规则**: 先按 `(companyId, configKey)` 查;未命中则按 `(NULL, configKey)` 查全局默认。
|
||||
|
||||
**预置全局配置键**:
|
||||
- `prompt_extract_text`、`prompt_extract_image`、`prompt_video_to_text`
|
||||
- `prompt_qa_gen_text`、`prompt_qa_gen_image`
|
||||
- `model_default`(默认:`glm-4`)
|
||||
- `video_frame_interval`(默认:`30`)
|
||||
- `token_ttl_seconds`(默认:`7200`)
|
||||
- `glm_api_base_url`
|
||||
|
||||
---
|
||||
|
||||
### 9. sys_operation_log — 操作审计日志
|
||||
|
||||
| 字段 | 类型 | 约束 | 说明 |
|
||||
|------|------|------|------|
|
||||
| id | BIGSERIAL | PK | |
|
||||
| company_id | BIGINT | FK→sys_company | |
|
||||
| operator_id | BIGINT | FK→sys_user | 登录失败时可为 NULL |
|
||||
| operator_name | VARCHAR(50) | NOT NULL | **操作时用户名快照**(不随改名变化) |
|
||||
| operation_type | VARCHAR(50) | NOT NULL | 见枚举列表 |
|
||||
| target_type | VARCHAR(30) | — | |
|
||||
| target_id | BIGINT | — | |
|
||||
| detail | JSONB | — | 补充信息 |
|
||||
| ip_address | VARCHAR(50) | — | |
|
||||
| result | VARCHAR(10) | NOT NULL | SUCCESS / FAIL |
|
||||
| error_message | TEXT | — | |
|
||||
| created_at | TIMESTAMP | NOT NULL DEFAULT NOW() | 分区键 |
|
||||
|
||||
**只追加**:应用层禁止 UPDATE/DELETE,建议 DB 层添加触发器强制执行
|
||||
**分区**:按 `created_at` Range 分区,以月为单位(`sys_operation_log_YYYY_MM`)
|
||||
|
||||
**operation_type 枚举**:
|
||||
`USER_LOGIN`、`USER_LOGOUT`、`USER_CREATE`、`USER_UPDATE`、`USER_DISABLE`、`USER_ROLE_CHANGE`、`SOURCE_UPLOAD`、`SOURCE_DELETE`、`TASK_CREATE`、`TASK_CLAIM`、`TASK_UNCLAIM`、`TASK_SUBMIT`、`EXTRACTION_APPROVE`、`EXTRACTION_REJECT`、`QA_APPROVE`、`QA_REJECT`、`TASK_REASSIGN`、`EXPORT_CREATE`、`FINETUNE_START`、`CONFIG_UPDATE`、`VIDEO_JOB_RESET`
|
||||
|
||||
---
|
||||
|
||||
### 10. annotation_task_history — 任务流转历史
|
||||
|
||||
| 字段 | 类型 | 约束 | 说明 |
|
||||
|------|------|------|------|
|
||||
| id | BIGSERIAL | PK | |
|
||||
| company_id | BIGINT | NOT NULL FK→sys_company | |
|
||||
| task_id | BIGINT | NOT NULL FK→annotation_task | |
|
||||
| from_status | VARCHAR(20) | — | 任务初建时为 NULL |
|
||||
| to_status | VARCHAR(20) | NOT NULL | |
|
||||
| operator_id | BIGINT | NOT NULL FK→sys_user | |
|
||||
| operator_role | VARCHAR(20) | NOT NULL | **操作时角色快照** |
|
||||
| note | TEXT | — | 驳回原因、转移说明等 |
|
||||
| created_at | TIMESTAMP | NOT NULL | |
|
||||
|
||||
**只追加**:每次 annotation_task.status 变更时同步插入,与业务操作在同一事务中
|
||||
**索引**: `(task_id)`
|
||||
|
||||
---
|
||||
|
||||
### 11. video_process_job — 视频异步处理任务
|
||||
|
||||
| 字段 | 类型 | 约束 | 说明 |
|
||||
|------|------|------|------|
|
||||
| id | BIGSERIAL | PK | |
|
||||
| company_id | BIGINT | NOT NULL FK→sys_company | |
|
||||
| source_id | BIGINT | NOT NULL FK→source_data | |
|
||||
| job_type | VARCHAR(20) | NOT NULL | FRAME_EXTRACT / VIDEO_TO_TEXT |
|
||||
| status | VARCHAR(20) | NOT NULL DEFAULT 'PENDING' | 见状态机 |
|
||||
| params | JSONB | NOT NULL | 处理参数 |
|
||||
| total_units | INT | — | 总帧数/片段数 |
|
||||
| processed_units | INT | NOT NULL DEFAULT 0 | |
|
||||
| output_path | VARCHAR(500) | — | |
|
||||
| retry_count | INT | NOT NULL DEFAULT 0 | |
|
||||
| max_retries | INT | NOT NULL DEFAULT 3 | |
|
||||
| error_message | TEXT | — | |
|
||||
| started_at / completed_at | TIMESTAMP | — | |
|
||||
| created_at / updated_at | TIMESTAMP | NOT NULL | |
|
||||
|
||||
**状态机**:
|
||||
```
|
||||
PENDING → RUNNING
|
||||
RUNNING → SUCCESS(处理成功)
|
||||
RUNNING → RETRYING(失败且 retry_count < max_retries)
|
||||
RUNNING → FAILED(失败且 retry_count >= max_retries)
|
||||
RETRYING → RUNNING(AI 服务自动重试)
|
||||
RETRYING → FAILED(超过最大重试次数)
|
||||
```
|
||||
*FAILED → PENDING:由 ADMIN 手动触发接口,不在状态机自动流转中*
|
||||
|
||||
**幂等规则**: 回调时若 `status == SUCCESS` 则静默忽略,不执行任何 DB 写入
|
||||
|
||||
**索引**: `(source_id)`、`(status)`
|
||||
|
||||
---
|
||||
|
||||
## Redis 数据结构
|
||||
|
||||
| Key 模式 | 类型 | TTL | 内容 |
|
||||
|---------|------|-----|------|
|
||||
| `token:{uuid}` | Hash | 2h(滑动) | `{userId, role, companyId, username}` |
|
||||
| `user:perm:{userId}` | String | 5min | 用户角色字符串 |
|
||||
| `task:claim:{taskId}` | String | 30s | 持有者 userId |
|
||||
|
||||
*禁止在上述三类命名空间之外自造 Key 用于认证、权限或锁目的。*
|
||||
137
specs/001-label-backend-spec/plan.md
Normal file
137
specs/001-label-backend-spec/plan.md
Normal file
@@ -0,0 +1,137 @@
|
||||
# 实施计划:label_backend 知识图谱智能标注平台
|
||||
|
||||
**分支**: `001-label-backend-spec` | **日期**: 2026-04-09 | **规格说明**: [spec.md](spec.md)
|
||||
**输入**: 功能规格说明 `/specs/001-label-backend-spec/spec.md`
|
||||
|
||||
---
|
||||
|
||||
## 摘要
|
||||
|
||||
构建面向多租户的知识图谱智能标注平台后端服务,驱动**文本线**(三元组提取 → 问答对生成 → 训练样本)和**图片线**(四元组提取 → 问答对生成 → 训练样本)两条流水线。视频作为预处理入口异步汇入两条流水线。系统基于 Spring Boot 3 + Apache Shiro + MyBatis Plus + PostgreSQL + Redis + RustFS 构建,通过 HTTP 调用 Python FastAPI AI 服务完成 AI 辅助标注和问答生成能力。
|
||||
|
||||
---
|
||||
|
||||
## 技术上下文
|
||||
|
||||
**语言/版本**: Java 17(LTS)
|
||||
**主要依赖**: Spring Boot ≥ 3.0.x、Apache Shiro ≥ 1.13.x、MyBatis Plus ≥ 3.5.x、Spring Data Redis
|
||||
**存储**: PostgreSQL ≥ 14(主库)、Redis ≥ 6.x(会话/权限缓存/分布式锁)、RustFS(S3 兼容对象存储)
|
||||
**测试**: JUnit 5 + Testcontainers(真实 PostgreSQL + Redis 实例)、Spring Boot Test
|
||||
**目标平台**: Linux 服务器,Docker Compose 容器化部署
|
||||
**项目类型**: Web Service(REST API)
|
||||
**性能目标**: 任务领取并发下有且仅有一人成功;权限变更延迟 < 1 秒生效
|
||||
**约束**: 禁止 JWT;禁止 Spring Security;禁止文件字节流存入数据库;AI HTTP 调用禁止在 @Transactional 内同步执行;所有列表接口强制分页
|
||||
**规模**: 多租户(多公司),每公司独立数据空间;11 张核心业务表
|
||||
|
||||
---
|
||||
|
||||
## 宪章合规检查
|
||||
|
||||
*门控:Phase 0 研究前必须通过。Phase 1 设计后重检。*
|
||||
|
||||
| # | 宪章原则 | 状态 | 说明 |
|
||||
|---|---------|------|------|
|
||||
| 1 | 环境约束(JDK 17、SB 3、Shiro、MyBatis Plus) | ✅ 通过 | pom.xml 中版本约束与宪章完全对齐;无 Spring Security 引入 |
|
||||
| 2 | 多租户数据隔离(company_id + ThreadLocal) | ✅ 通过 | TenantLineInnerInterceptor 自动注入;CompanyContext 在 finally 块清理 |
|
||||
| 3 | BCrypt 密码 + UUID Token + 禁 JWT | ✅ 通过 | AuthService 使用 BCrypt ≥ 10;UUID v4 Token 存 Redis;无 JWT 库 |
|
||||
| 4 | 分级 RBAC + 权限注解 + 角色变更驱逐缓存 | ✅ 通过 | @RequiresRoles 声明权限;updateRole() 立即删 user:perm:{userId} |
|
||||
| 5 | 双流水线 + 级联触发 + parent_source_id 溯源 | ✅ 通过 | 仅文本线/图片线;审批通过用 @TransactionalEventListener 触发 QA |
|
||||
| 6 | 状态机完整性(StateValidator) | ✅ 通过 | 所有状态变更经 StateValidator.assertTransition();禁止绕过 Mapper 直写 |
|
||||
| 7 | 任务争抢双重保障(Redis SET NX + DB 乐观锁) | ✅ 通过 | task:claim:{taskId} TTL 30s + WHERE status='UNCLAIMED' |
|
||||
| 8 | 异步视频处理幂等 + 重试上限 + FAILED 手动重置 | ✅ 通过 | SUCCESS 回调静默忽略;retry_count ≥ max_retries → FAILED |
|
||||
| 9 | 只追加审计日志 + AOP 切面 + 审计失败不回滚业务 | ✅ 通过 | @OperationLog AOP;sys_operation_log 无 UPDATE/DELETE;异常仅 error 日志 |
|
||||
| 10 | RESTful URL + 统一响应格式 + 强制分页 | ✅ 通过 | Result<T> 包装;无动词路径;PageResult<T> 分页 |
|
||||
| 11 | YAGNI:业务在 Service,Controller 只处理 HTTP | ✅ 通过 | 分层明确;无预测性抽象层 |
|
||||
|
||||
**门控结果:全部通过,可进入 Phase 0。**
|
||||
|
||||
---
|
||||
|
||||
## 项目结构
|
||||
|
||||
### 规格说明文档(本功能)
|
||||
|
||||
```text
|
||||
specs/001-label-backend-spec/
|
||||
├── plan.md # 本文件(/speckit.plan 输出)
|
||||
├── research.md # Phase 0 输出
|
||||
├── data-model.md # Phase 1 输出
|
||||
├── quickstart.md # Phase 1 输出
|
||||
├── contracts/ # Phase 1 输出(REST API 契约)
|
||||
│ ├── auth.md
|
||||
│ ├── source.md
|
||||
│ ├── tasks.md
|
||||
│ ├── extraction.md
|
||||
│ ├── qa.md
|
||||
│ ├── export.md
|
||||
│ ├── config.md
|
||||
│ └── video.md
|
||||
└── tasks.md # Phase 2 输出(/speckit.tasks 命令创建,非本命令)
|
||||
```
|
||||
|
||||
### 源代码(仓库根目录)
|
||||
|
||||
```text
|
||||
src/
|
||||
└── main/
|
||||
└── java/com/label/
|
||||
├── LabelBackendApplication.java
|
||||
├── common/
|
||||
│ ├── result/ # Result<T>、ResultCode、PageResult<T>
|
||||
│ ├── exception/ # BusinessException、GlobalExceptionHandler
|
||||
│ ├── context/ # CompanyContext(ThreadLocal)
|
||||
│ ├── shiro/ # TokenFilter、UserRealm、ShiroConfig
|
||||
│ ├── redis/ # RedisKeyManager、RedisService
|
||||
│ ├── aop/ # AuditAspect、@OperationLog 注解
|
||||
│ ├── storage/ # RustFsClient(S3 兼容封装)
|
||||
│ ├── ai/ # AiServiceClient(RestClient 封装 8 个端点)
|
||||
│ └── statemachine/ # StateValidator、各状态枚举
|
||||
└── module/
|
||||
├── user/ # AuthController、UserController、AuthService、UserService
|
||||
├── source/ # SourceController、SourceService
|
||||
├── task/ # TaskController、TaskService、TaskClaimService
|
||||
├── annotation/ # ExtractionController、QaController、ExtractionService、QaService
|
||||
├── export/ # ExportController、ExportService、FinetuneService
|
||||
├── config/ # SysConfigController、SysConfigService
|
||||
└── video/ # VideoController、VideoProcessService
|
||||
|
||||
src/
|
||||
└── test/
|
||||
└── java/com/label/
|
||||
├── integration/ # Testcontainers(真实 PG + Redis)集成测试
|
||||
│ ├── AuthIntegrationTest.java
|
||||
│ ├── TaskClaimConcurrencyTest.java
|
||||
│ ├── VideoCallbackIdempotencyTest.java
|
||||
│ ├── MultiTenantIsolationTest.java
|
||||
│ └── ShiroFilterIntegrationTest.java
|
||||
└── unit/ # 纯单元测试(状态机、业务逻辑)
|
||||
└── StateMachineTest.java
|
||||
|
||||
sql/
|
||||
└── init.sql # 全部 DDL(11 张表,按依赖顺序执行)
|
||||
|
||||
docker-compose.yml # postgres、redis、rustfs、backend、ai-service、frontend
|
||||
Dockerfile # eclipse-temurin:17-jre-alpine
|
||||
pom.xml
|
||||
```
|
||||
|
||||
**结构决策**:单一后端服务(Web Service),无前端代码。标准 Maven 项目布局,源代码在 `src/main/java/com/label/`,测试在 `src/test/java/com/label/`,按 `common/` + `module/` 两层分包。
|
||||
|
||||
---
|
||||
|
||||
## 复杂度追踪
|
||||
|
||||
> 宪章检查无违规,本节留空。
|
||||
|
||||
---
|
||||
|
||||
## GSTACK REVIEW REPORT
|
||||
|
||||
| Review | Trigger | Why | Runs | Status | Findings |
|
||||
|--------|---------|-----|------|--------|----------|
|
||||
| CEO Review | `/plan-ceo-review` | Scope & strategy | 0 | — | — |
|
||||
| Codex Review | `/codex review` | Independent 2nd opinion | 0 | — | — |
|
||||
| Eng Review | `/plan-eng-review` | Architecture & tests (required) | 0 | — | — |
|
||||
| Design Review | `/plan-design-review` | UI/UX gaps | 0 | — | — |
|
||||
|
||||
**VERDICT:** NO REVIEWS YET — run `/autoplan` for full review pipeline, or individual reviews above.
|
||||
179
specs/001-label-backend-spec/quickstart.md
Normal file
179
specs/001-label-backend-spec/quickstart.md
Normal file
@@ -0,0 +1,179 @@
|
||||
# 快速启动指南:label_backend
|
||||
|
||||
**日期**: 2026-04-09
|
||||
**分支**: `001-label-backend-spec`
|
||||
|
||||
---
|
||||
|
||||
## 前置条件
|
||||
|
||||
- Docker Desktop ≥ 4.x(含 Docker Compose v2)
|
||||
- JDK 17(本地开发时)
|
||||
- Maven ≥ 3.8(本地开发时)
|
||||
|
||||
---
|
||||
|
||||
## 一、使用 Docker Compose 启动完整环境
|
||||
|
||||
```bash
|
||||
# 克隆仓库
|
||||
git clone <repo-url>
|
||||
cd label_backend
|
||||
|
||||
# 启动所有服务(PostgreSQL + Redis + RustFS + AI Service + Backend + Frontend)
|
||||
docker compose up -d
|
||||
|
||||
# 查看后端启动日志
|
||||
docker compose logs -f backend
|
||||
|
||||
# 检查健康状态
|
||||
docker compose ps
|
||||
```
|
||||
|
||||
**服务端口**:
|
||||
| 服务 | 端口 |
|
||||
|------|------|
|
||||
| 前端(Nginx) | http://localhost:80 |
|
||||
| 后端 REST API | http://localhost:8080 |
|
||||
| AI 服务(FastAPI) | http://localhost:8000 |
|
||||
| PostgreSQL | localhost:5432 |
|
||||
| Redis | localhost:6379 |
|
||||
| RustFS S3 API | http://localhost:9000 |
|
||||
| RustFS Web 控制台 | http://localhost:9001 |
|
||||
|
||||
---
|
||||
|
||||
## 二、初始化数据库
|
||||
|
||||
数据库 DDL 通过 `./sql/init.sql` 在 PostgreSQL 容器启动时自动执行(`docker-entrypoint-initdb.d`)。
|
||||
|
||||
若需手动执行:
|
||||
```bash
|
||||
docker compose exec postgres psql -U label -d label_db -f /docker-entrypoint-initdb.d/init.sql
|
||||
```
|
||||
|
||||
**初始账号**(由 `init.sql` 中的 INSERT 语句创建):
|
||||
| 用户名 | 密码 | 角色 | 公司 |
|
||||
|--------|------|------|------|
|
||||
| admin | admin123 | ADMIN | 演示公司 |
|
||||
| reviewer01 | review123 | REVIEWER | 演示公司 |
|
||||
| annotator01 | annot123 | ANNOTATOR | 演示公司 |
|
||||
| uploader01 | upload123 | UPLOADER | 演示公司 |
|
||||
|
||||
---
|
||||
|
||||
## 三、本地开发模式(不使用 Docker)
|
||||
|
||||
```bash
|
||||
# 启动依赖服务(仅 PostgreSQL + Redis + RustFS,不启动后端)
|
||||
docker compose up -d postgres redis rustfs
|
||||
|
||||
# 设置环境变量
|
||||
export SPRING_DATASOURCE_URL=jdbc:postgresql://localhost:5432/label_db
|
||||
export SPRING_DATASOURCE_USERNAME=label
|
||||
export SPRING_DATASOURCE_PASSWORD=label_password
|
||||
export SPRING_REDIS_HOST=localhost
|
||||
export SPRING_REDIS_PORT=6379
|
||||
export SPRING_REDIS_PASSWORD=redis_password
|
||||
export RUSTFS_ENDPOINT=http://localhost:9000
|
||||
export RUSTFS_ACCESS_KEY=minioadmin
|
||||
export RUSTFS_SECRET_KEY=minioadmin
|
||||
export AI_SERVICE_BASE_URL=http://localhost:8000
|
||||
|
||||
# 编译并启动
|
||||
mvn clean spring-boot:run
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 四、验证安装
|
||||
|
||||
```bash
|
||||
# 1. 登录(获取 Token)
|
||||
curl -X POST http://localhost:8080/api/auth/login \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"companyCode":"DEMO","username":"admin","password":"admin123"}'
|
||||
|
||||
# 期望响应:{"code":"SUCCESS","data":{"token":"...","role":"ADMIN",...}}
|
||||
|
||||
# 2. 使用 Token 访问受保护接口(将 {TOKEN} 替换为上一步返回的 token)
|
||||
curl http://localhost:8080/api/auth/me \
|
||||
-H "Authorization: Bearer {TOKEN}"
|
||||
|
||||
# 期望响应:{"code":"SUCCESS","data":{"username":"admin","role":"ADMIN",...}}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 五、运行测试
|
||||
|
||||
```bash
|
||||
# 运行所有测试(Testcontainers 会自动启动真实 PG + Redis 容器)
|
||||
mvn test
|
||||
|
||||
# 运行特定测试(并发任务领取)
|
||||
mvn test -Dtest=TaskClaimConcurrencyTest
|
||||
|
||||
# 运行集成测试套件
|
||||
mvn test -Dtest=*IntegrationTest
|
||||
```
|
||||
|
||||
**注意**: Testcontainers 需要本地 Docker 可用。首次运行会拉取 PostgreSQL 和 Redis 镜像(约 200MB)。
|
||||
|
||||
---
|
||||
|
||||
## 六、关键配置项说明
|
||||
|
||||
配置文件位于 `src/main/resources/application.yml`。以下配置项可在运行时通过 `PUT /api/config/{key}` 接口(ADMIN 权限)动态调整,无需重启服务:
|
||||
|
||||
| 配置键 | 说明 | 默认值 |
|
||||
|--------|------|--------|
|
||||
| `token_ttl_seconds` | 会话凭证有效期(秒) | 7200(2小时) |
|
||||
| `model_default` | AI 辅助默认模型 | glm-4 |
|
||||
| `video_frame_interval` | 视频帧提取间隔(帧数) | 30 |
|
||||
| `prompt_extract_text` | 文本三元组提取 Prompt | 见 init.sql |
|
||||
| `prompt_extract_image` | 图片四元组提取 Prompt | 见 init.sql |
|
||||
| `prompt_qa_gen_text` | 文本问答生成 Prompt | 见 init.sql |
|
||||
| `prompt_qa_gen_image` | 图片问答生成 Prompt | 见 init.sql |
|
||||
|
||||
---
|
||||
|
||||
## 七、标注流水线快速验证
|
||||
|
||||
```bash
|
||||
TOKEN="your-admin-token"
|
||||
|
||||
# 步骤 1:上传文本资料
|
||||
curl -X POST http://localhost:8080/api/source/upload \
|
||||
-H "Authorization: Bearer $TOKEN" \
|
||||
-F "file=@sample.txt" -F "dataType=TEXT"
|
||||
|
||||
# 步骤 2:为资料创建提取任务(sourceId 从上一步响应中获取)
|
||||
curl -X POST http://localhost:8080/api/tasks \
|
||||
-H "Authorization: Bearer $TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"sourceId": 1, "taskType": "AI_ASSISTED", "aiModel": "glm-4"}'
|
||||
|
||||
# 步骤 3:标注员领取任务(使用 annotator01 的 Token)
|
||||
ANNOTATOR_TOKEN="annotator-token"
|
||||
curl -X POST http://localhost:8080/api/tasks/1/claim \
|
||||
-H "Authorization: Bearer $ANNOTATOR_TOKEN"
|
||||
|
||||
# 步骤 4:获取 AI 预标注结果
|
||||
curl http://localhost:8080/api/extraction/1 \
|
||||
-H "Authorization: Bearer $ANNOTATOR_TOKEN"
|
||||
|
||||
# 步骤 5:提交标注结果
|
||||
curl -X PUT http://localhost:8080/api/extraction/1 \
|
||||
-H "Authorization: Bearer $ANNOTATOR_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"items":[{"subject":"北京","predicate":"是首都","object":"中国","sourceText":"北京是中国的首都","startOffset":0,"endOffset":8}]}'
|
||||
|
||||
curl -X POST http://localhost:8080/api/extraction/1/submit \
|
||||
-H "Authorization: Bearer $ANNOTATOR_TOKEN"
|
||||
|
||||
# 步骤 6:审批员审批通过(使用 reviewer01 的 Token)
|
||||
REVIEWER_TOKEN="reviewer-token"
|
||||
curl -X POST http://localhost:8080/api/extraction/1/approve \
|
||||
-H "Authorization: Bearer $REVIEWER_TOKEN"
|
||||
```
|
||||
150
specs/001-label-backend-spec/research.md
Normal file
150
specs/001-label-backend-spec/research.md
Normal file
@@ -0,0 +1,150 @@
|
||||
# Phase 0 研究报告:label_backend
|
||||
|
||||
**日期**: 2026-04-09
|
||||
**分支**: `001-label-backend-spec`
|
||||
|
||||
---
|
||||
|
||||
## 技术决策汇总
|
||||
|
||||
所有技术选型均由宪章强制约束,无需评估备选方案。本报告记录关键设计决策的理由,供后续实施参考。
|
||||
|
||||
---
|
||||
|
||||
## 决策 1:认证机制
|
||||
|
||||
**决策**: UUID v4 Token 存储于 Redis,滑动过期,禁止 JWT
|
||||
|
||||
**理由**:
|
||||
- JWT 自包含令牌无法按需吊销,无法满足"管理员禁用账号立即生效"的安全要求
|
||||
- UUID Token 在 Redis 中可精确控制生命周期:退出登录或禁用账号时同步删除 Key,下一次请求立即失效
|
||||
- 滑动过期(每次有效请求重置 TTL)确保活跃用户不被意外踢出
|
||||
|
||||
**备选方案放弃理由**:
|
||||
- JWT:无法即时吊销,存在安全窗口
|
||||
- Session Cookie:在无状态 REST API 架构中不适用
|
||||
- OAuth2:过度设计,当前场景无第三方授权需求
|
||||
|
||||
---
|
||||
|
||||
## 决策 2:多租户隔离机制
|
||||
|
||||
**决策**: MyBatis Plus `TenantLineInnerInterceptor` + `ThreadLocal CompanyContext`
|
||||
|
||||
**理由**:
|
||||
- `TenantLineInnerInterceptor` 在 SQL 拦截器层自动在每条查询的 WHERE 子句中注入 `company_id`,覆盖范围广且无需逐方法手动添加条件
|
||||
- ThreadLocal 存储当前请求的 `companyId`,由 Shiro TokenFilter 在解析 Token 时从 Redis 会话数据注入,确保 companyId 来自服务端权威来源而非客户端参数
|
||||
- `finally` 块强制清理 ThreadLocal,防止线程池复用时数据串漏
|
||||
|
||||
**备选方案放弃理由**:
|
||||
- 行级安全(RLS):PostgreSQL 原生支持,但与 MyBatis Plus 集成复杂,且宪章已指定 ThreadLocal 方案
|
||||
- 逐方法手动添加 WHERE:容易遗漏,维护成本高
|
||||
|
||||
---
|
||||
|
||||
## 决策 3:任务并发领取控制
|
||||
|
||||
**决策**: Redis `SET NX`(分布式锁)+ 数据库乐观约束(`WHERE status = 'UNCLAIMED'`)双重保障
|
||||
|
||||
**理由**:
|
||||
- 单纯使用数据库乐观锁在高并发下存在写放大问题(大量 UPDATE 竞争)
|
||||
- 单纯使用 Redis 锁若锁过期后 DB 写入失败可能导致数据不一致
|
||||
- 双重保障:Redis 锁(TTL 30s)快速拦截大部分并发请求,减少数据库压力;DB 乐观约束作为最终一致性兜底
|
||||
|
||||
**Key 命名**: `task:claim:{taskId}`(TTL 30s,与宪章 Redis Key 规范一致)
|
||||
|
||||
---
|
||||
|
||||
## 决策 4:审批触发 QA 任务的异步解耦
|
||||
|
||||
**决策**: Spring `@TransactionalEventListener(phase = AFTER_COMMIT)` + `@Transactional(REQUIRES_NEW)`
|
||||
|
||||
**理由**:
|
||||
- 提取阶段审批通过后需调用 AI HTTP 生成候选问答对,该 HTTP 调用延迟不确定(秒级到分钟级)
|
||||
- 若在 `@Transactional` 内同步调用,数据库连接被长时间占用,且 AI 失败会错误地回滚已完成的审批操作
|
||||
- `AFTER_COMMIT` 保证业务审批先提交再触发事件,避免事务回滚导致的幽灵任务
|
||||
- `REQUIRES_NEW` 为 QA 生成开启独立事务,AI 失败仅影响 QA 任务创建,不影响审批结果
|
||||
|
||||
**事件流**: `approve()` → publish `ExtractionApprovedEvent` → 事务提交 → `onExtractionApproved()` 异步执行(AI 调用 + 创建 QA 任务)
|
||||
|
||||
---
|
||||
|
||||
## 决策 5:标注结果存储语义
|
||||
|
||||
**决策**: JSONB 整体覆盖(PUT 语义),禁止局部 PATCH
|
||||
|
||||
**理由**:
|
||||
- 三元组/四元组条目具有强关联性(主语-谓语-宾语作为整体,或主体-关系-客体-修饰词作为整体),局部更新易导致不一致
|
||||
- 整体替换简化服务端逻辑,前端每次提交完整 items 数组,服务端直接执行 UPDATE `result_json = ?`
|
||||
- 避免局部追加导致的索引层数据不一致(如删除某条目后残留旧数据)
|
||||
|
||||
---
|
||||
|
||||
## 决策 6:审计日志事务边界
|
||||
|
||||
**决策**: 审计日志写入不要求与业务操作在同一事务,AOP `finally` 块中独立写入
|
||||
|
||||
**理由**:
|
||||
- 审计写入失败不应回滚业务操作(用户的标注/审批结果比审计日志更重要)
|
||||
- `@Around` 通知在业务方法执行完成(commit 或 rollback)后捕获最终 `result`,可记录准确的成功/失败状态
|
||||
- 审计失败仅 error 级别日志 + 告警,不影响用户体验
|
||||
|
||||
---
|
||||
|
||||
## 决策 7:视频预处理幂等回调
|
||||
|
||||
**决策**: 回调处理时检查 `video_process_job.status`,已为 `SUCCESS` 则静默忽略
|
||||
|
||||
**理由**:
|
||||
- AI 服务可能因网络抖动对同一 jobId 发起多次成功回调
|
||||
- 幂等检查确保第一次成功回调创建标注任务,后续重复回调无任何副作用
|
||||
- 检查粒度:`status == SUCCESS` 即返回,不进行任何 DB 写入
|
||||
|
||||
---
|
||||
|
||||
## 决策 8:对象存储路径规范
|
||||
|
||||
**决策**: RustFS(S3 兼容),文件字节流禁止入库,路径按资源类型分桶分目录
|
||||
|
||||
**路径规范**:
|
||||
|
||||
| 资源 | 桶 | 路径格式 |
|
||||
|------|-----|---------|
|
||||
| 文本文件 | `source-data` | `text/{yyyyMM}/{source_id}.txt` |
|
||||
| 图片 | `source-data` | `image/{yyyyMM}/{source_id}.jpg` |
|
||||
| 视频 | `source-data` | `video/{yyyyMM}/{source_id}.mp4` |
|
||||
| 视频帧 | `source-data` | `frames/{source_id}/{frame_index}.jpg` |
|
||||
| 视频转文本 | `source-data` | `video-text/{parent_source_id}/{timestamp}.txt` |
|
||||
| bbox 裁剪图 | `source-data` | `crops/{task_id}/{item_index}.jpg` |
|
||||
| 导出 JSONL | `finetune-export` | `export/{batchUuid}.jsonl` |
|
||||
|
||||
---
|
||||
|
||||
## 决策 9:测试策略
|
||||
|
||||
**决策**: 集成测试使用 Testcontainers(真实 PG + Redis),不允许 Mock 数据库
|
||||
|
||||
**必须覆盖的测试场景**:
|
||||
|
||||
1. **并发任务领取**:10 线程同时争抢同一任务,验证恰好 1 人成功(Redis + DB 双重锁)
|
||||
2. **视频回调幂等**:同一 jobId 两次成功回调,验证只创建 1 个 annotation_task
|
||||
3. **状态机越界拒绝**:非法状态转换(如 APPROVED → IN_PROGRESS)抛出 BusinessException
|
||||
4. **多租户隔离**:公司 A 身份访问公司 B 资源,验证被拒绝
|
||||
5. **Shiro 过滤器链**:无 Token → 401;Token 有效但角色不足 → 403
|
||||
|
||||
---
|
||||
|
||||
## 无需澄清事项汇总
|
||||
|
||||
| 项目 | 状态 | 来源 |
|
||||
|------|------|------|
|
||||
| 认证方案 | ✅ 已确定(UUID Token) | 宪章原则三 |
|
||||
| 数据库选型 | ✅ 已确定(PostgreSQL) | 宪章原则一 |
|
||||
| ORM | ✅ 已确定(MyBatis Plus) | 宪章原则一 |
|
||||
| 缓存/锁 | ✅ 已确定(Redis) | 宪章原则一 |
|
||||
| 对象存储 | ✅ 已确定(RustFS S3) | 宪章原则一 |
|
||||
| AI 集成方式 | ✅ 已确定(HTTP RestClient) | 宪章原则一 |
|
||||
| 多租户隔离 | ✅ 已确定(ThreadLocal + Interceptor) | 宪章原则二 |
|
||||
| 并发控制 | ✅ 已确定(双重锁) | 宪章原则七 |
|
||||
| 审批事务边界 | ✅ 已确定(@TransactionalEventListener) | 宪章原则五 |
|
||||
| 测试策略 | ✅ 已确定(Testcontainers) | 宪章开发工作流 |
|
||||
Reference in New Issue
Block a user