diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..1f495ed --- /dev/null +++ b/.dockerignore @@ -0,0 +1,7 @@ +.git +.claude +specs +docs +target +*.md +.gitignore diff --git a/.gitignore b/.gitignore index d4a7c3c..0f47a07 100644 --- a/.gitignore +++ b/.gitignore @@ -1,37 +1,32 @@ -target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst -target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst -target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst -target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst -target/surefire-reports/2026-04-09T13-02-42_141.dumpstream -target/surefire-reports/2026-04-09T13-05-35_797.dumpstream -target/surefire-reports/2026-04-09T13-10-00_741.dumpstream -target/surefire-reports/2026-04-09T13-12-36_692.dumpstream -target/surefire-reports/2026-04-09T13-12-48_346.dumpstream -.specify/init-options.json -.specify/integration.json -.specify/integrations/claude.manifest.json -.specify/integrations/speckit.manifest.json -.specify/integrations/claude/scripts/update-context.ps1 -.specify/integrations/claude/scripts/update-context.sh -.specify/memory/constitution.md -.specify/scripts/powershell/check-prerequisites.ps1 -.specify/scripts/powershell/common.ps1 -.specify/scripts/powershell/create-new-feature.ps1 -.specify/scripts/powershell/setup-plan.ps1 -.specify/scripts/powershell/update-agent-context.ps1 -.specify/templates/agent-file-template.md -.specify/templates/checklist-template.md -.specify/templates/constitution-template.md -.specify/templates/plan-template.md -.specify/templates/spec-template.md -.specify/templates/tasks-template.md -.claude/settings.local.json -.claude/skills/speckit-analyze/SKILL.md -.claude/skills/speckit-checklist/SKILL.md -.claude/skills/speckit-clarify/SKILL.md -.claude/skills/speckit-constitution/SKILL.md -.claude/skills/speckit-implement/SKILL.md -.claude/skills/speckit-plan/SKILL.md -.claude/skills/speckit-specify/SKILL.md -.claude/skills/speckit-tasks/SKILL.md -.claude/skills/speckit-taskstoissues/SKILL.md +# ========================================== +# 1. Maven/Java 构建产物 (一键忽略整个目录) +# ========================================== +target/ +*.class +*.jar +*.war +*.ear + +# ========================================== +# 2. IDE 配置文件 +# ========================================== +.idea/ +.vscode/ +*.iml +*.ipr +*.iws + +# ========================================== +# 3. 项目特定工具目录 (根据你的文件列表) +# ========================================== +# 忽略 Specifiy 工具生成的所有配置和脚本 +.specify/ + +# 忽略 Claude 本地设置和技能文件 +.claude/ + +# ========================================== +# 4. 操作系统文件 +# ========================================== +.DS_Store +Thumbs.db \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..091f3bc --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,3 @@ +# language + 请始终使用简体中文与我对话,并保持回答专业、简洁。 + diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c8aa788 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +# Build stage: uses Maven + JDK 17 (Alpine) to compile and package the application. +FROM maven:3.9-eclipse-temurin-17-alpine AS builder +WORKDIR /app + +# Copy pom.xml first to leverage Docker layer caching for dependency downloads. +COPY pom.xml . +RUN mvn dependency:go-offline -q + +# Copy source and build the fat JAR, skipping tests. +COPY src ./src +RUN mvn clean package -DskipTests -q + +# Runtime stage: slim JRE-only image for a smaller production footprint. +FROM eclipse-temurin:17-jre-alpine +WORKDIR /app +COPY --from=builder /app/target/*.jar app.jar +EXPOSE 8080 +ENTRYPOINT ["java", "-jar", "app.jar"] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..823bb22 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,96 @@ +version: "3.9" + +services: + postgres: + image: postgres:16-alpine + environment: + POSTGRES_DB: label_db + POSTGRES_USER: label + POSTGRES_PASSWORD: label_password + ports: + - "5432:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + - ./sql/init.sql:/docker-entrypoint-initdb.d/init.sql + healthcheck: + test: ["CMD-SHELL", "pg_isready -U label -d label_db"] + interval: 10s + timeout: 5s + retries: 5 + + redis: + image: redis:7-alpine + command: redis-server --requirepass redis_password + ports: + - "6379:6379" + healthcheck: + test: ["CMD", "redis-cli", "-a", "redis_password", "ping"] + interval: 10s + timeout: 5s + retries: 5 + + # RustFS is an S3-compatible object storage service. + # Using MinIO as a drop-in S3 API substitute for development/testing. + # Replace with the actual RustFS image in production environments. + rustfs: + image: minio/minio:latest + command: server /data --console-address ":9001" + environment: + MINIO_ROOT_USER: minioadmin + MINIO_ROOT_PASSWORD: minioadmin + ports: + - "9000:9000" + - "9001:9001" + volumes: + - rustfs_data:/data + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 10s + timeout: 5s + retries: 5 + + backend: + build: . + ports: + - "8080:8080" + environment: + SPRING_DATASOURCE_URL: jdbc:postgresql://postgres:5432/label_db + SPRING_DATASOURCE_USERNAME: label + SPRING_DATASOURCE_PASSWORD: label_password + SPRING_DATA_REDIS_HOST: redis + SPRING_DATA_REDIS_PORT: 6379 + SPRING_DATA_REDIS_PASSWORD: redis_password + RUSTFS_ENDPOINT: http://rustfs:9000 + RUSTFS_ACCESS_KEY: minioadmin + RUSTFS_SECRET_KEY: minioadmin + AI_SERVICE_BASE_URL: http://ai-service:8000 + depends_on: + postgres: + condition: service_healthy + redis: + condition: service_healthy + rustfs: + condition: service_healthy + healthcheck: + test: ["CMD-SHELL", "wget -q --spider http://localhost:8080/actuator/health 2>/dev/null || exit 1"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + + # Placeholder AI service — replace with the actual FastAPI image in production. + ai-service: + image: python:3.11-slim + command: ["python3", "-m", "http.server", "8000"] + ports: + - "8000:8000" + + # Placeholder frontend — replace with the actual Nginx + static build in production. + frontend: + image: nginx:alpine + ports: + - "80:80" + +volumes: + postgres_data: + rustfs_data: diff --git a/docs/superpowers/specs/2026-04-09-label-backend-design.md b/docs/superpowers/specs/2026-04-09-label-backend-design.md index be4f016..e7e6ca2 100644 --- a/docs/superpowers/specs/2026-04-09-label-backend-design.md +++ b/docs/superpowers/specs/2026-04-09-label-backend-design.md @@ -208,7 +208,7 @@ CREATE TABLE source_data ( bucket_name VARCHAR(100) NOT NULL, parent_source_id BIGINT REFERENCES source_data(id), -- 视频转文本时指向原视频 status VARCHAR(20) NOT NULL DEFAULT 'PENDING', - -- PENDING / PREPROCESSING / EXTRACTING / QA_REVIEW / APPROVED / REJECTED + -- PENDING / PREPROCESSING / EXTRACTING / QA_REVIEW / APPROVED(无 REJECTED 状态,QA 驳回作用于 annotation_task) reject_reason TEXT, created_at TIMESTAMP NOT NULL DEFAULT NOW(), updated_at TIMESTAMP NOT NULL DEFAULT NOW() @@ -961,10 +961,12 @@ public void unclaim(Long taskId) { | 方法 | 路径 | 最低权限 | 说明 | |------|------|----------|------| | POST | `/api/tasks` | ADMIN | 为指定 source 创建 EXTRACTION 任务 | -| GET | `/api/tasks/pool` | ANNOTATOR | 查看可领取任务列表(按角色过滤,分页) | -| POST | `/api/tasks/{id}/claim` | ANNOTATOR | 领取任务(争抢式) | +| GET | `/api/tasks/pool` | ANNOTATOR | 查看可领取任务池(UNCLAIMED 状态)。ANNOTATOR 只看到 EXTRACTION 类型;REVIEWER 只看到 SUBMITTED 状态(即审批队列,与 pending-review 等价);两者均分页,不可无界查询 | +| POST | `/api/tasks/{id}/claim` | ANNOTATOR | 领取任务(争抢式,Redis SET NX + DB 乐观锁) | | POST | `/api/tasks/{id}/unclaim` | ANNOTATOR | 放弃任务,退回任务池 | -| GET | `/api/tasks/mine` | ANNOTATOR | 查询我领取的任务列表(分页) | +| GET | `/api/tasks/mine` | ANNOTATOR | 查询我领取的任务列表(包含 IN_PROGRESS、SUBMITTED、REJECTED 状态,分页) | +| POST | `/api/tasks/{id}/reclaim` | ANNOTATOR | 重领被驳回的任务(task.status 必须为 REJECTED 且 claimedBy = 当前用户),状态流转 REJECTED → IN_PROGRESS | +| GET | `/api/tasks/pending-review` | REVIEWER | 查看待我审批的任务列表(status = SUBMITTED,分页);REVIEWER 的专属审批入口 | | GET | `/api/tasks/{id}` | ANNOTATOR | 查看任务详情 | | GET | `/api/tasks` | ADMIN | 查询全部任务(支持过滤,分页) | | PUT | `/api/tasks/{id}/reassign` | ADMIN | 强制转移任务归属 | @@ -996,11 +998,16 @@ public void updateResult(Long taskId, String resultJsonStr) { annotationResultMapper.updateResultJson(taskId, resultJsonStr, CompanyContext.get()); } -// 审批通过——级联触发,必须在同一事务内完成 +// 审批通过——两阶段:事务内完成同步步骤,事务提交后异步触发 QA 生成 @Transactional @OperationLog(type = "EXTRACTION_APPROVE") public void approve(Long taskId) { AnnotationTask task = validateAndGetTask(taskId, "SUBMITTED"); + + // 自审校验:提交者不能审批自己的任务 + if (task.getClaimedBy().equals(getCurrentUserId())) + throw new BusinessException("SELF_REVIEW_FORBIDDEN", "不允许审批自己提交的任务"); + AnnotationResult result = annotationResultMapper.selectByTaskId(taskId); // 1. annotation_result.is_final = true @@ -1016,22 +1023,55 @@ public void approve(Long taskId) { // 3. 写入任务历史 insertHistory(taskId, "SUBMITTED", "APPROVED", getCurrentUserId(), null); - // 4. 调用 AI 生成候选问答对 - String promptKey = "IMAGE".equals(getSourceType(task)) ? "prompt_qa_gen_image" : "prompt_qa_gen_text"; + // 4. 发布领域事件,事务提交后异步执行 QA 生成(步骤 5-7) + // 注:AI HTTP 调用禁止在 @Transactional 内同步执行——会占用数据库连接直至 AI 响应, + // 且 AI 失败会错误地回滚已完成的审批。 + // 使用 @TransactionalEventListener(phase = AFTER_COMMIT) 保证先提交再触发。 + eventPublisher.publishEvent(new ExtractionApprovedEvent(taskId, task.getSourceId(), + getSourceType(task), CompanyContext.get())); +} + +// 驳回——状态回退,标注员可重领 +@Transactional +@OperationLog(type = "EXTRACTION_REJECT") +public void reject(Long taskId, String reason) { + AnnotationTask task = validateAndGetTask(taskId, "SUBMITTED"); + + // 自审校验 + if (task.getClaimedBy().equals(getCurrentUserId())) + throw new BusinessException("SELF_REVIEW_FORBIDDEN", "不允许驳回自己提交的任务"); + + StateValidator.assertTransition(TaskStatus.SUBMITTED, TaskStatus.REJECTED, TaskStatus.TRANSITIONS); + task.setStatus("REJECTED"); + taskMapper.updateById(task); + insertHistory(taskId, "SUBMITTED", "REJECTED", getCurrentUserId(), reason); + // source_data.status 保持 EXTRACTING 不变,待标注员重新提交后再推进 +} + +// ExtractionApprovedEventListener(@TransactionalEventListener,独立事务) +// 负责 5-7 步:AI 调用 → 写 training_dataset → 创建 QA 任务 → 更新 source_data +@TransactionalEventListener(phase = TransactionPhase.AFTER_COMMIT) +@Transactional(propagation = Propagation.REQUIRES_NEW) +public void onExtractionApproved(ExtractionApprovedEvent event) { + AnnotationTask task = taskMapper.selectById(event.getTaskId()); + AnnotationResult result = annotationResultMapper.selectByTaskId(event.getTaskId()); + + // 5. 调用 AI 生成候选问答对(在事务外执行,失败不影响审批结果) + String promptKey = "IMAGE".equals(event.getSourceType()) ? "prompt_qa_gen_image" : "prompt_qa_gen_text"; String promptTemplate = sysConfigService.get(promptKey); QaGenResponse qaResponse = generateQa(task, result, promptTemplate); - // 5. 将候选问答对写入 training_dataset(PENDING_REVIEW) + // 6. 将候选问答对写入 training_dataset(PENDING_REVIEW) List samples = buildTrainingSamples(task, result, qaResponse); trainingDatasetMapper.batchInsert(samples); - // 6. 创建 QA_GENERATION 阶段任务(UNCLAIMED) + // 7. 创建 QA_GENERATION 阶段任务(UNCLAIMED) AnnotationTask qaTask = buildQaTask(task); taskMapper.insert(qaTask); - insertHistory(qaTask.getId(), null, "UNCLAIMED", getCurrentUserId(), null); + insertHistory(qaTask.getId(), null, "UNCLAIMED", task.getClaimedBy(), null); - // 7. source_data.status → QA_REVIEW - sourceDataMapper.updateStatus(task.getSourceId(), "QA_REVIEW", CompanyContext.get()); + // 8. source_data.status → QA_REVIEW + sourceDataMapper.updateStatus(event.getSourceId(), "QA_REVIEW", event.getCompanyId()); } ``` @@ -1057,21 +1097,48 @@ public void approve(Long taskId) { @Transactional @OperationLog(type = "QA_APPROVE") public void approve(Long taskId) { - // 1. training_dataset.status → APPROVED + // 1. 先校验任务合法性(必须在任何 DB 写入之前执行,避免校验失败时数据已被修改) + AnnotationTask task = validateAndGetTask(taskId, "SUBMITTED"); + + // 自审校验:提交者不能审批自己的任务 + if (task.getClaimedBy().equals(getCurrentUserId())) + throw new BusinessException("SELF_REVIEW_FORBIDDEN", "不允许审批自己提交的任务"); + + // 2. training_dataset.status → APPROVED trainingDatasetMapper.approveByTaskId(taskId, getCurrentUserId(), CompanyContext.get()); - // 2. annotation_task.status → APPROVED - AnnotationTask task = validateAndGetTask(taskId, "SUBMITTED"); + // 3. annotation_task.status → APPROVED + StateValidator.assertTransition(TaskStatus.SUBMITTED, TaskStatus.APPROVED, TaskStatus.TRANSITIONS); task.setStatus("APPROVED"); task.setCompletedAt(LocalDateTime.now()); taskMapper.updateById(task); - // 3. source_data.status → APPROVED(整条流水线完成) + // 4. source_data.status → APPROVED(整条流水线完成) sourceDataMapper.updateStatus(task.getSourceId(), "APPROVED", CompanyContext.get()); - // 4. 写入任务历史 + // 5. 写入任务历史 insertHistory(taskId, "SUBMITTED", "APPROVED", getCurrentUserId(), null); } + +// 驳回问答对——任务退回 IN_PROGRESS,training_dataset 删除候选记录 +@Transactional +@OperationLog(type = "QA_REJECT") +public void reject(Long taskId, String reason) { + AnnotationTask task = validateAndGetTask(taskId, "SUBMITTED"); + + // 自审校验 + if (task.getClaimedBy().equals(getCurrentUserId())) + throw new BusinessException("SELF_REVIEW_FORBIDDEN", "不允许驳回自己提交的任务"); + + // 删除本次生成的候选问答对(PENDING_REVIEW 状态),待标注员修改后重新提交 + trainingDatasetMapper.deleteByTaskId(taskId, CompanyContext.get()); + + StateValidator.assertTransition(TaskStatus.SUBMITTED, TaskStatus.REJECTED, TaskStatus.TRANSITIONS); + task.setStatus("REJECTED"); + taskMapper.updateById(task); + insertHistory(taskId, "SUBMITTED", "REJECTED", getCurrentUserId(), reason); + // source_data.status 保持 QA_REVIEW 不变 +} ``` **接口清单:** @@ -1241,14 +1308,15 @@ public final class StateValidator { ```java public enum SourceStatus { - PENDING, PREPROCESSING, EXTRACTING, QA_REVIEW, APPROVED, REJECTED; + PENDING, PREPROCESSING, EXTRACTING, QA_REVIEW, APPROVED; + // 注:source_data 无 REJECTED 状态。QA 阶段驳回的是 annotation_task(→ REJECTED), + // 不改变 source_data.status(保持 QA_REVIEW);重新提交后 source_data 随任务推进。 public static final Map> TRANSITIONS = Map.of( PENDING, Set.of(EXTRACTING, PREPROCESSING), PREPROCESSING, Set.of(PENDING), EXTRACTING, Set.of(QA_REVIEW), - QA_REVIEW, Set.of(APPROVED, REJECTED), - REJECTED, Set.of(EXTRACTING) // 驳回后可重提 + QA_REVIEW, Set.of(APPROVED) ); } ``` @@ -1263,7 +1331,7 @@ public enum TaskStatus { UNCLAIMED, Set.of(IN_PROGRESS), IN_PROGRESS, Set.of(SUBMITTED, UNCLAIMED, IN_PROGRESS), // IN_PROGRESS → IN_PROGRESS 用于 ADMIN 强制转移(持有人变更,状态不变) - SUBMITTED, Set.oAPPROVED, REJECTED), + SUBMITTED, Set.of(APPROVED, REJECTED), REJECTED, Set.of(IN_PROGRESS) // 驳回后重拾 ); } diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..86420a7 --- /dev/null +++ b/pom.xml @@ -0,0 +1,163 @@ + + + 4.0.0 + + + org.springframework.boot + spring-boot-starter-parent + 3.2.5 + + + + com.label + label-backend + 1.0.0-SNAPSHOT + jar + + + 17 + UTF-8 + + + + + + + software.amazon.awssdk + bom + 2.26.31 + pom + import + + + + org.testcontainers + testcontainers-bom + 1.20.1 + pom + import + + + + + + + + org.springframework.boot + spring-boot-starter-web + + + + + org.springframework.boot + spring-boot-starter-actuator + + + + + org.springframework.boot + spring-boot-starter-data-redis + + + + + org.springframework.boot + spring-boot-starter-aop + + + + + org.postgresql + postgresql + runtime + + + + + com.baomidou + mybatis-plus-boot-starter + 3.5.9 + + + + + com.baomidou + mybatis-plus-jsqlparser + 3.5.9 + + + + + org.apache.shiro + shiro-spring-boot-web-starter + 1.13.0 + + + + + software.amazon.awssdk + s3 + + + + + software.amazon.awssdk + sts + + + + + org.springframework.security + spring-security-crypto + + + + + org.projectlombok + lombok + true + + + + + org.springframework.boot + spring-boot-starter-test + test + + + + + org.testcontainers + postgresql + test + + + + + org.testcontainers + junit-jupiter + test + + + + + + + org.springframework.boot + spring-boot-maven-plugin + + + + org.projectlombok + lombok + + + + + + + + \ No newline at end of file diff --git a/specs/001-label-backend-spec/checklists/requirements.md b/specs/001-label-backend-spec/checklists/requirements.md new file mode 100644 index 0000000..407b074 --- /dev/null +++ b/specs/001-label-backend-spec/checklists/requirements.md @@ -0,0 +1,34 @@ +# 规格质量检查清单:label_backend 知识图谱智能标注平台 + +**用途**: 在进入规划阶段前验证规格说明的完整性和质量 +**创建日期**: 2026-04-09 +**功能**: [查看规格说明](../spec.md) + +## 内容质量 + +- [x] 无实现细节(无编程语言、框架、API 引用) +- [x] 聚焦用户价值和业务需求 +- [x] 面向非技术干系人编写 +- [x] 所有必填章节均已完成 + +## 需求完整性 + +- [x] 无 [NEEDS CLARIFICATION] 标记残留 +- [x] 需求可测试且无歧义 +- [x] 成功标准可度量 +- [x] 成功标准与技术无关(无实现细节) +- [x] 所有验收场景均已定义 +- [x] 已识别边界情况 +- [x] 范围边界清晰 +- [x] 已识别依赖和假设 + +## 功能就绪性 + +- [x] 所有功能性需求均有明确验收标准 +- [x] 用户场景覆盖主流程(认证、上传、标注、审批、导出) +- [x] 功能满足成功标准中定义的可度量结果 +- [x] 无实现细节渗入规格说明 + +## 备注 + +所有检查项均通过。规格说明已就绪,可进行 `/speckit.plan` 规划阶段。 diff --git a/specs/001-label-backend-spec/contracts/auth.md b/specs/001-label-backend-spec/contracts/auth.md new file mode 100644 index 0000000..47b1fc8 --- /dev/null +++ b/specs/001-label-backend-spec/contracts/auth.md @@ -0,0 +1,148 @@ +# API 契约:认证与用户管理 + +**统一响应格式**: +- 成功:`{"code": "SUCCESS", "data": {...}}` +- 成功(无数据):`{"code": "SUCCESS", "data": null}` +- 失败:`{"code": "ERROR_CODE", "message": "描述"}` +- 分页成功:`{"code": "SUCCESS", "data": {"items": [...], "total": 100, "page": 1, "pageSize": 20}}` + +--- + +## POST /api/auth/login + +**权限**: 匿名 +**描述**: 用户登录,返回会话凭证 + +**请求体**: +```json +{ + "companyCode": "COMPANY_A", + "username": "zhangsan", + "password": "plaintext_password" +} +``` + +**成功响应** `200`: +```json +{ + "code": "SUCCESS", + "data": { + "token": "550e8400-e29b-41d4-a716-446655440000", + "userId": 1, + "username": "zhangsan", + "role": "ANNOTATOR", + "expiresIn": 7200 + } +} +``` + +**失败响应**: +- `401` `USER_NOT_FOUND`: 用户名或密码错误(不区分哪个错误,防止枚举) +- `403` `USER_DISABLED`: 账号已禁用 + +--- + +## POST /api/auth/logout + +**权限**: 已登录(Bearer Token) +**描述**: 退出登录,立即删除 Redis 会话 + +**请求头**: `Authorization: Bearer {token}` +**响应** `200`: `{"code": "SUCCESS", "data": null}` + +--- + +## GET /api/auth/me + +**权限**: 已登录 +**描述**: 获取当前用户信息 + +**响应** `200`: +```json +{ + "code": "SUCCESS", + "data": { + "id": 1, + "username": "zhangsan", + "realName": "张三", + "role": "ANNOTATOR", + "companyId": 10, + "companyName": "测试公司" + } +} +``` + +--- + +## GET /api/users + +**权限**: ADMIN +**描述**: 分页查询本公司用户列表 + +**查询参数**: `page`(默认 1)、`pageSize`(默认 20,最大 100)、`role`(可选过滤)、`status`(可选过滤) + +**响应** `200`: +```json +{ + "code": "SUCCESS", + "data": { + "items": [ + {"id": 1, "username": "zhangsan", "realName": "张三", "role": "ANNOTATOR", "status": "ACTIVE"} + ], + "total": 50, + "page": 1, + "pageSize": 20 + } +} +``` + +--- + +## POST /api/users + +**权限**: ADMIN +**描述**: 创建用户 + +**请求体**: +```json +{ + "username": "lisi", + "password": "initial_password", + "realName": "李四", + "role": "ANNOTATOR" +} +``` + +**响应** `201`: `{"code": "SUCCESS", "data": {"id": 2, "username": "lisi", ...}}` +**失败**: `409` `USERNAME_EXISTS`: 用户名已存在 + +--- + +## PUT /api/users/{id} + +**权限**: ADMIN +**描述**: 更新用户基本信息 + +**请求体**: `{"realName": "新姓名"}` +**响应** `200`: `{"code": "SUCCESS", "data": null}` + +--- + +## PUT /api/users/{id}/status + +**权限**: ADMIN +**描述**: 启用或禁用账号,立即驱逐权限缓存 + +**请求体**: `{"status": "DISABLED"}` +**响应** `200`: `{"code": "SUCCESS", "data": null}` + +--- + +## PUT /api/users/{id}/role + +**权限**: ADMIN +**描述**: 变更用户角色,立即驱逐权限缓存 + +**请求体**: `{"role": "REVIEWER"}` +**响应** `200`: `{"code": "SUCCESS", "data": null}` +**失败**: `400` `INVALID_ROLE`: 角色值不合法 diff --git a/specs/001-label-backend-spec/contracts/config.md b/specs/001-label-backend-spec/contracts/config.md new file mode 100644 index 0000000..66c3aff --- /dev/null +++ b/specs/001-label-backend-spec/contracts/config.md @@ -0,0 +1,53 @@ +# API 契约:系统配置 + +*所有接口需要 ADMIN 权限* + +--- + +## GET /api/config + +**描述**: 获取所有配置项(公司级配置 + 全局默认配置合并,公司级优先) + +**响应** `200`: +```json +{ + "code": "SUCCESS", + "data": { + "items": [ + { + "configKey": "prompt_extract_text", + "configValue": "请提取以下文本中的主语-谓语-宾语三元组...", + "description": "文本三元组提取 Prompt 模板", + "scope": "GLOBAL", + "updatedAt": "2026-04-09T00:00:00" + }, + { + "configKey": "model_default", + "configValue": "glm-4-turbo", + "description": "默认 AI 辅助模型", + "scope": "COMPANY", + "updatedAt": "2026-04-09T09:00:00" + } + ] + } +} +``` + +`scope` 字段:`GLOBAL`(来自全局默认)、`COMPANY`(来自公司级覆盖) + +--- + +## PUT /api/config/{key} + +**描述**: 更新单项配置(若公司级配置不存在则创建;若存在则覆盖) + +**请求体**: +```json +{ + "configValue": "glm-4-turbo", + "description": "升级到 GLM-4-Turbo 模型" +} +``` + +**响应** `200`: `{"code": "SUCCESS", "data": null}` +**失败**: `400` `UNKNOWN_CONFIG_KEY`: 未知的配置键(防止拼写错误创建无效配置) diff --git a/specs/001-label-backend-spec/contracts/export.md b/specs/001-label-backend-spec/contracts/export.md new file mode 100644 index 0000000..20a93d5 --- /dev/null +++ b/specs/001-label-backend-spec/contracts/export.md @@ -0,0 +1,113 @@ +# API 契约:训练数据导出与微调 + +*所有接口需要 ADMIN 权限* + +--- + +## GET /api/training/samples + +**描述**: 分页查询已审批、可导出的训练样本 + +**查询参数**: `page`、`pageSize`、`sampleType`(TEXT / IMAGE / VIDEO_FRAME,可选)、`exported`(true/false,可选) + +**响应** `200`: +```json +{ + "code": "SUCCESS", + "data": { + "items": [ + { + "id": 1001, + "sampleType": "TEXT", + "status": "APPROVED", + "exportBatchId": null, + "sourceId": 50, + "createdAt": "2026-04-09T12:00:00" + } + ], + "total": 500, + "page": 1, + "pageSize": 20 + } +} +``` + +--- + +## POST /api/export/batch + +**描述**: 创建导出批次,合并选定样本为 JSONL 并上传 RustFS + +**请求体**: +```json +{ + "sampleIds": [1001, 1002, 1003] +} +``` + +**成功响应** `201`: +```json +{ + "code": "SUCCESS", + "data": { + "id": 10, + "batchUuid": "550e8400-e29b-41d4-a716-446655440000", + "sampleCount": 3, + "datasetFilePath": "export/550e8400.jsonl", + "finetuneStatus": "NOT_STARTED" + } +} +``` + +**失败**: +- `400` `INVALID_SAMPLES`: 部分样本不处于 APPROVED 状态 +- `400` `EMPTY_SAMPLES`: sampleIds 为空 + +--- + +## POST /api/export/{batchId}/finetune + +**描述**: 向 GLM AI 服务提交微调任务 + +**响应** `200`: +```json +{ + "code": "SUCCESS", + "data": { + "glmJobId": "glm-finetune-abc123", + "finetuneStatus": "RUNNING" + } +} +``` + +**失败**: `409` `FINETUNE_ALREADY_STARTED`: 微调任务已提交 + +--- + +## GET /api/export/{batchId}/status + +**描述**: 查询微调任务状态(向 AI 服务实时查询) + +**响应** `200`: +```json +{ + "code": "SUCCESS", + "data": { + "batchId": 10, + "glmJobId": "glm-finetune-abc123", + "finetuneStatus": "RUNNING", + "progress": 45, + "errorMessage": null + } +} +``` + +--- + +## GET /api/export/list + +**描述**: 分页查询所有导出批次 + +**查询参数**: `page`、`pageSize` + +**响应** `200`: 批次列表(含 finetuneStatus、sampleCount、createdAt 等字段) diff --git a/specs/001-label-backend-spec/contracts/extraction.md b/specs/001-label-backend-spec/contracts/extraction.md new file mode 100644 index 0000000..5c4461f --- /dev/null +++ b/specs/001-label-backend-spec/contracts/extraction.md @@ -0,0 +1,97 @@ +# API 契约:提取阶段标注工作台 + +--- + +## GET /api/extraction/{taskId} + +**权限**: ANNOTATOR(且为任务持有者) +**描述**: 获取当前提取结果(含 AI 预标注候选,供人工编辑) + +**响应** `200`: +```json +{ + "code": "SUCCESS", + "data": { + "taskId": 101, + "sourceType": "TEXT", + "sourceFilePath": "text/202604/50.txt", + "isFinal": false, + "resultJson": { + "items": [ + { + "subject": "北京", + "predicate": "是...首都", + "object": "中国", + "sourceText": "北京是中国的首都", + "startOffset": 0, + "endOffset": 8 + } + ] + } + } +} +``` + +--- + +## PUT /api/extraction/{taskId} + +**权限**: ANNOTATOR(且为任务持有者) +**描述**: 更新提取结果(**整体 JSONB 覆盖,PUT 语义,禁止局部 PATCH**) + +**请求体**: +```json +{ + "items": [ + { + "subject": "北京", + "predicate": "是...首都", + "object": "中国", + "sourceText": "北京是中国的首都", + "startOffset": 0, + "endOffset": 8 + } + ] +} +``` + +**响应** `200`: `{"code": "SUCCESS", "data": null}` +**失败**: `400` `INVALID_JSON`: 提交的 JSON 格式不合法 + +--- + +## POST /api/extraction/{taskId}/submit + +**权限**: ANNOTATOR(且为任务持有者) +**描述**: 提交提取结果,任务状态 IN_PROGRESS → SUBMITTED,进入审批队列 + +**响应** `200`: `{"code": "SUCCESS", "data": null}` +**失败**: `409` `INVALID_STATE`: 任务当前状态不允许提交 + +--- + +## POST /api/extraction/{taskId}/approve + +**权限**: REVIEWER +**描述**: 审批通过。**两阶段操作**: +1. 同步(同一事务):`annotation_result.is_final = true`,任务状态 SUBMITTED → APPROVED,写任务历史 +2. 异步(事务提交后):AI 生成候选问答对 → 写 training_dataset → 创建 QA_GENERATION 任务 → source_data 状态推进 + +**响应** `200`: `{"code": "SUCCESS", "data": null}` +**失败**: +- `403` `SELF_REVIEW_FORBIDDEN`: 不允许审批自己提交的任务 +- `409` `INVALID_STATE`: 任务状态不为 SUBMITTED + +--- + +## POST /api/extraction/{taskId}/reject + +**权限**: REVIEWER +**描述**: 驳回提取结果,任务状态 SUBMITTED → REJECTED,标注员可重领 + +**请求体**: `{"reason": "三元组边界不准确,请重新标注"}` +**响应** `200`: `{"code": "SUCCESS", "data": null}` +**失败**: +- `403` `SELF_REVIEW_FORBIDDEN`: 不允许驳回自己提交的任务 +- `409` `INVALID_STATE`: 任务状态不为 SUBMITTED +- `400` `REASON_REQUIRED`: 驳回原因不能为空 diff --git a/specs/001-label-backend-spec/contracts/qa.md b/specs/001-label-backend-spec/contracts/qa.md new file mode 100644 index 0000000..f113c5f --- /dev/null +++ b/specs/001-label-backend-spec/contracts/qa.md @@ -0,0 +1,83 @@ +# API 契约:问答生成阶段 + +--- + +## GET /api/qa/{taskId} + +**权限**: ANNOTATOR(且为任务持有者) +**描述**: 获取候选问答对列表(由提取阶段审批触发 AI 生成) + +**响应** `200`: +```json +{ + "code": "SUCCESS", + "data": { + "taskId": 202, + "sourceType": "TEXT", + "items": [ + { + "id": 1001, + "question": "北京是哪个国家的首都?", + "answer": "中国", + "status": "PENDING_REVIEW" + } + ] + } +} +``` + +--- + +## PUT /api/qa/{taskId} + +**权限**: ANNOTATOR(且为任务持有者) +**描述**: 修改问答对(**整体覆盖,PUT 语义**,每次提交包含完整 items 数组) + +**请求体**: +```json +{ + "items": [ + { + "question": "北京是哪个国家的首都?", + "answer": "中国。北京自1949年起成为中华人民共和国的首都。" + } + ] +} +``` + +**响应** `200`: `{"code": "SUCCESS", "data": null}` + +--- + +## POST /api/qa/{taskId}/submit + +**权限**: ANNOTATOR(且为任务持有者) +**描述**: 提交问答对,任务状态 IN_PROGRESS → SUBMITTED + +**响应** `200`: `{"code": "SUCCESS", "data": null}` +**失败**: `409` `INVALID_STATE`: 任务当前状态不允许提交 + +--- + +## POST /api/qa/{taskId}/approve + +**权限**: REVIEWER +**描述**: 审批通过。同一事务中:先校验任务 → training_dataset 状态 → 任务状态 SUBMITTED → APPROVED → source_data 状态 → 写任务历史 + +**响应** `200`: `{"code": "SUCCESS", "data": null}` +**失败**: +- `403` `SELF_REVIEW_FORBIDDEN`: 不允许审批自己提交的任务 +- `409` `INVALID_STATE`: 任务状态不为 SUBMITTED + +--- + +## POST /api/qa/{taskId}/reject + +**权限**: REVIEWER +**描述**: 驳回问答对,删除候选记录,任务状态 SUBMITTED → REJECTED + +**请求体**: `{"reason": "问题描述不准确,请修改"}` +**响应** `200`: `{"code": "SUCCESS", "data": null}` +**失败**: +- `403` `SELF_REVIEW_FORBIDDEN`: 不允许驳回自己提交的任务 +- `400` `REASON_REQUIRED`: 驳回原因不能为空 diff --git a/specs/001-label-backend-spec/contracts/source.md b/specs/001-label-backend-spec/contracts/source.md new file mode 100644 index 0000000..4647fbe --- /dev/null +++ b/specs/001-label-backend-spec/contracts/source.md @@ -0,0 +1,96 @@ +# API 契约:资料管理 + +--- + +## POST /api/source/upload + +**权限**: UPLOADER +**描述**: 上传文件,创建 source_data 记录,文件字节流写入 RustFS + +**请求**: `multipart/form-data`,字段:`file`(必填)、`dataType`(TEXT / IMAGE / VIDEO) + +**响应** `201`: +```json +{ + "code": "SUCCESS", + "data": { + "id": 50, + "fileName": "document.txt", + "dataType": "TEXT", + "fileSize": 204800, + "status": "PENDING", + "createdAt": "2026-04-09T10:00:00" + } +} +``` + +**失败**: +- `400` `INVALID_TYPE`: 不支持的资料类型 +- `400` `FILE_EMPTY`: 文件为空 + +--- + +## GET /api/source/list + +**权限**: UPLOADER +**描述**: 分页查询资料列表。UPLOADER 只见自己上传的资料;ADMIN 见本公司全部资料 + +**查询参数**: `page`(默认 1)、`pageSize`(默认 20)、`dataType`(可选)、`status`(可选) + +**响应** `200`: +```json +{ + "code": "SUCCESS", + "data": { + "items": [ + { + "id": 50, + "fileName": "document.txt", + "dataType": "TEXT", + "status": "PENDING", + "uploaderId": 1, + "createdAt": "2026-04-09T10:00:00" + } + ], + "total": 120, + "page": 1, + "pageSize": 20 + } +} +``` + +--- + +## GET /api/source/{id} + +**权限**: UPLOADER +**描述**: 查看资料详情,含 RustFS 预签名临时下载链接(有效期 15 分钟) + +**响应** `200`: +```json +{ + "code": "SUCCESS", + "data": { + "id": 50, + "dataType": "TEXT", + "fileName": "document.txt", + "fileSize": 204800, + "status": "EXTRACTING", + "presignedUrl": "https://rustfs.example.com/...", + "parentSourceId": null, + "createdAt": "2026-04-09T10:00:00" + } +} +``` + +--- + +## DELETE /api/source/{id} + +**权限**: ADMIN +**描述**: 删除资料(同时删除 RustFS 文件及元数据) + +**前置条件**: 资料状态为 PENDING(不允许删除已进入流水线的资料) + +**响应** `204`: 无响应体 +**失败**: `409` `SOURCE_IN_PIPELINE`: 资料已进入标注流程,不可删除 diff --git a/specs/001-label-backend-spec/contracts/tasks.md b/specs/001-label-backend-spec/contracts/tasks.md new file mode 100644 index 0000000..033fee8 --- /dev/null +++ b/specs/001-label-backend-spec/contracts/tasks.md @@ -0,0 +1,150 @@ +# API 契约:任务管理 + +--- + +## GET /api/tasks/pool + +**权限**: ANNOTATOR +**描述**: 查看可领取任务池。角色过滤规则: +- ANNOTATOR:仅返回 EXTRACTION 阶段、status=UNCLAIMED 的任务 +- REVIEWER/ADMIN:仅返回 SUBMITTED 状态(待审批队列)的任务 + +**查询参数**: `page`(默认 1)、`pageSize`(默认 20) + +**响应** `200`: +```json +{ + "code": "SUCCESS", + "data": { + "items": [ + { + "id": 101, + "sourceId": 50, + "sourceType": "TEXT", + "phase": "EXTRACTION", + "status": "UNCLAIMED", + "createdAt": "2026-04-09T10:00:00" + } + ], + "total": 30, + "page": 1, + "pageSize": 20 + } +} +``` + +--- + +## GET /api/tasks/pending-review + +**权限**: REVIEWER +**描述**: REVIEWER 专属审批入口,查看 status=SUBMITTED 的任务列表 + +**查询参数**: `page`、`pageSize`、`phase`(可选,EXTRACTION / QA_GENERATION) + +**响应**: 同 `/api/tasks/pool` 结构 + +--- + +## POST /api/tasks/{id}/claim + +**权限**: ANNOTATOR +**描述**: 领取任务(双重并发保障:Redis SET NX + DB 乐观约束) + +**响应** `200`: `{"code": "SUCCESS", "data": null}` +**失败**: +- `409` `TASK_CLAIMED`: 任务已被他人领取 +- `404` `TASK_NOT_FOUND`: 任务不存在 + +--- + +## POST /api/tasks/{id}/unclaim + +**权限**: ANNOTATOR(且为任务持有者) +**描述**: 放弃任务,退回任务池(status: IN_PROGRESS → UNCLAIMED) + +**响应** `200`: `{"code": "SUCCESS", "data": null}` +**失败**: `403` `NOT_TASK_OWNER`: 非任务持有者 + +--- + +## GET /api/tasks/mine + +**权限**: ANNOTATOR +**描述**: 查询当前用户领取的任务(含 IN_PROGRESS、SUBMITTED、REJECTED 三种状态) + +**查询参数**: `page`、`pageSize`、`status`(可选过滤) + +**响应**: 同任务列表结构,含 `rejectReason` 字段(REJECTED 状态时非空) + +--- + +## POST /api/tasks/{id}/reclaim + +**权限**: ANNOTATOR +**描述**: 重领被驳回的任务(status 必须为 REJECTED 且 claimedBy = 当前用户,流转 REJECTED → IN_PROGRESS) + +**响应** `200`: `{"code": "SUCCESS", "data": null}` +**失败**: +- `403` `NOT_TASK_OWNER`: 非原持有者 +- `409` `INVALID_STATE`: 任务状态不为 REJECTED + +--- + +## GET /api/tasks/{id} + +**权限**: ANNOTATOR +**描述**: 查看任务详情(含驳回原因、历史记录摘要) + +**响应** `200`: +```json +{ + "code": "SUCCESS", + "data": { + "id": 101, + "sourceId": 50, + "phase": "EXTRACTION", + "status": "IN_PROGRESS", + "claimedBy": 1, + "claimedAt": "2026-04-09T10:05:00", + "rejectReason": null, + "historyCount": 2 + } +} +``` + +--- + +## GET /api/tasks + +**权限**: ADMIN +**描述**: 查询全部任务(支持过滤,分页) + +**查询参数**: `page`、`pageSize`、`phase`、`status`、`claimedBy`、`sourceId` + +--- + +## PUT /api/tasks/{id}/reassign + +**权限**: ADMIN +**描述**: 强制转移任务归属(status 保持 IN_PROGRESS,仅 claimedBy 变更) + +**请求体**: `{"newOwnerId": 5, "reason": "原持有者长期未操作"}` +**响应** `200`: `{"code": "SUCCESS", "data": null}` + +--- + +## POST /api/tasks + +**权限**: ADMIN +**描述**: 为指定资料创建 EXTRACTION 任务 + +**请求体**: +```json +{ + "sourceId": 50, + "taskType": "AI_ASSISTED", + "aiModel": "glm-4" +} +``` +**响应** `201`: `{"code": "SUCCESS", "data": {"id": 101, ...}}` diff --git a/specs/001-label-backend-spec/contracts/video.md b/specs/001-label-backend-spec/contracts/video.md new file mode 100644 index 0000000..8906c80 --- /dev/null +++ b/specs/001-label-backend-spec/contracts/video.md @@ -0,0 +1,87 @@ +# API 契约:视频处理 + +--- + +## POST /api/video/process + +**权限**: ADMIN +**描述**: 为已上传的视频资料创建异步处理任务 + +**请求体**: +```json +{ + "sourceId": 50, + "jobType": "FRAME_EXTRACT", + "params": { + "frameInterval": 30, + "mode": "FRAME" + } +} +``` + +jobType 可选值:`FRAME_EXTRACT`(帧提取)、`VIDEO_TO_TEXT`(片段转文字) + +**响应** `201`: +```json +{ + "code": "SUCCESS", + "data": { + "jobId": 200, + "sourceId": 50, + "jobType": "FRAME_EXTRACT", + "status": "PENDING" + } +} +``` + +--- + +## GET /api/video/jobs/{jobId} + +**权限**: ADMIN +**描述**: 查询视频处理任务状态 + +**响应** `200`: +```json +{ + "code": "SUCCESS", + "data": { + "id": 200, + "status": "RUNNING", + "processedUnits": 15, + "totalUnits": 50, + "retryCount": 0, + "errorMessage": null, + "startedAt": "2026-04-09T10:05:00" + } +} +``` + +--- + +## POST /api/video/jobs/{jobId}/reset + +**权限**: ADMIN +**描述**: 手动重置 FAILED 状态的任务为 PENDING,允许重新触发(FAILED → PENDING 不在自动状态机中) + +**响应** `200`: `{"code": "SUCCESS", "data": null}` +**失败**: `409` `INVALID_STATE`: 任务状态不为 FAILED + +--- + +## POST /api/video/callback(内部接口) + +**权限**: AI 服务内部调用(IP 白名单 / 服务密钥) +**描述**: AI 服务回调,通知视频处理结果(幂等:重复成功回调静默忽略) + +**请求体**: +```json +{ + "jobId": 200, + "success": true, + "outputPath": "frames/50/", + "errorMessage": null +} +``` + +**响应** `200`: `{"code": "SUCCESS", "data": null}` diff --git a/specs/001-label-backend-spec/data-model.md b/specs/001-label-backend-spec/data-model.md new file mode 100644 index 0000000..eb5ac9c --- /dev/null +++ b/specs/001-label-backend-spec/data-model.md @@ -0,0 +1,355 @@ +# 数据模型:label_backend + +**日期**: 2026-04-09 +**分支**: `001-label-backend-spec` + +--- + +## 实体关系概览 + +``` +sys_company ─┬─ sys_user (company_id FK) + ├─ source_data (company_id FK) + │ └─ source_data (parent_source_id 自引用,视频溯源链) + ├─ annotation_task (company_id FK) + │ ├─ annotation_result (task_id FK) + │ └─ annotation_task_history (task_id FK) + ├─ training_dataset (company_id FK) + ├─ export_batch (company_id FK) + ├─ sys_config (company_id FK,可为 NULL 表示全局默认) + ├─ sys_operation_log (company_id FK) + └─ video_process_job (company_id FK) +``` + +**多租户规则**:除 `sys_company` 本身外,所有业务表均包含 `company_id NOT NULL`。查询时由 `TenantLineInnerInterceptor` 自动注入 `WHERE company_id = ?`。唯一例外:`sys_config` 允许 `company_id = NULL` 表示全局默认配置。 + +--- + +## 实体详情 + +### 1. sys_company — 公司(租户) + +| 字段 | 类型 | 约束 | 说明 | +|------|------|------|------| +| id | BIGSERIAL | PK | 自增主键 | +| company_name | VARCHAR(100) | NOT NULL UNIQUE | 公司名称 | +| company_code | VARCHAR(50) | NOT NULL UNIQUE | 公司编码 | +| status | VARCHAR(10) | NOT NULL DEFAULT 'ACTIVE' | ACTIVE / DISABLED | +| created_at | TIMESTAMP | NOT NULL DEFAULT NOW() | | +| updated_at | TIMESTAMP | NOT NULL DEFAULT NOW() | | + +**状态**: 无状态机(仅 ACTIVE/DISABLED 标志) + +--- + +### 2. sys_user — 用户 + +| 字段 | 类型 | 约束 | 说明 | +|------|------|------|------| +| id | BIGSERIAL | PK | | +| company_id | BIGINT | NOT NULL FK→sys_company | 租户隔离键 | +| username | VARCHAR(50) | NOT NULL | 同公司内唯一 | +| password_hash | VARCHAR(255) | NOT NULL | BCrypt 强度≥10,禁止序列化到响应 | +| real_name | VARCHAR(50) | — | | +| role | VARCHAR(20) | NOT NULL | UPLOADER / ANNOTATOR / REVIEWER / ADMIN | +| status | VARCHAR(10) | NOT NULL DEFAULT 'ACTIVE' | ACTIVE / DISABLED | +| created_at / updated_at | TIMESTAMP | NOT NULL | | + +**约束**: `UNIQUE(company_id, username)` +**索引**: `(company_id)` +**角色继承**: ADMIN ⊃ REVIEWER ⊃ ANNOTATOR ⊃ UPLOADER(由 Shiro Realm 的 addInheritedRoles() 实现) + +--- + +### 3. source_data — 原始资料 + +| 字段 | 类型 | 约束 | 说明 | +|------|------|------|------| +| id | BIGSERIAL | PK | | +| company_id | BIGINT | NOT NULL FK→sys_company | | +| uploader_id | BIGINT | FK→sys_user | | +| data_type | VARCHAR(20) | NOT NULL | TEXT / IMAGE / VIDEO | +| file_path | VARCHAR(500) | NOT NULL | RustFS 对象路径 | +| file_name | VARCHAR(255) | NOT NULL | 原始文件名 | +| file_size | BIGINT | — | 字节数 | +| bucket_name | VARCHAR(100) | NOT NULL | RustFS 桶名 | +| parent_source_id | BIGINT | FK→source_data | 视频片段转文本时指向原视频 | +| status | VARCHAR(20) | NOT NULL DEFAULT 'PENDING' | 见状态机 | +| reject_reason | TEXT | — | 保留字段(当前无 REJECTED 状态) | +| created_at / updated_at | TIMESTAMP | NOT NULL | | + +**索引**: `(company_id)`、`(company_id, status)`、`(parent_source_id)` + +**状态机**: +``` +PENDING → EXTRACTING(直接上传的文本/图片) +PENDING → PREPROCESSING(视频上传后) +PREPROCESSING → PENDING(视频预处理完成后进入标注流程) +EXTRACTING → QA_REVIEW(提取任务审批通过后) +QA_REVIEW → APPROVED(QA 任务审批通过后,整条流水线完成) +``` + +*注:source_data 无 REJECTED 状态。QA 阶段驳回作用于 annotation_task(→REJECTED),source_data 保持 QA_REVIEW 不变。* + +--- + +### 4. annotation_task — 标注任务 + +| 字段 | 类型 | 约束 | 说明 | +|------|------|------|------| +| id | BIGSERIAL | PK | | +| company_id | BIGINT | NOT NULL FK→sys_company | | +| source_id | BIGINT | NOT NULL FK→source_data | | +| phase | VARCHAR(20) | NOT NULL | EXTRACTION / QA_GENERATION | +| task_type | VARCHAR(20) | NOT NULL | AI_ASSISTED / MANUAL | +| ai_model | VARCHAR(50) | — | 使用的 AI 模型 | +| video_unit_type | VARCHAR(20) | — | FRAME(视频帧模式)/ NULL | +| video_unit_info | JSONB | — | `{frame_index, time_sec, frame_path}` | +| claimed_by | BIGINT | FK→sys_user | 当前持有者 | +| claimed_at | TIMESTAMP | — | | +| status | VARCHAR(20) | NOT NULL DEFAULT 'UNCLAIMED' | 见状态机 | +| reject_reason | TEXT | — | 驳回原因 | +| submitted_at | TIMESTAMP | — | | +| completed_at | TIMESTAMP | — | | +| created_at / updated_at | TIMESTAMP | NOT NULL | | + +**索引**: `(company_id)`、`(company_id, phase, status)`(任务池查询)、`(claimed_by, status)`(我的任务) + +**状态机**: +``` +UNCLAIMED → IN_PROGRESS(领取) +IN_PROGRESS → SUBMITTED(提交) +IN_PROGRESS → UNCLAIMED(放弃) +IN_PROGRESS → IN_PROGRESS(ADMIN 强制转移,持有人变更,状态不变) +SUBMITTED → APPROVED(审批通过) +SUBMITTED → REJECTED(审批驳回) +REJECTED → IN_PROGRESS(标注员重领) +``` + +**并发控制**: 领取时双重保障:① Redis `SET NX task:claim:{taskId}` TTL 30s;② DB `UPDATE ... WHERE status='UNCLAIMED'` 影响行数为 0 时返回错误 + +--- + +### 5. annotation_result — 标注结果(提取阶段) + +| 字段 | 类型 | 约束 | 说明 | +|------|------|------|------| +| id | BIGSERIAL | PK | | +| company_id | BIGINT | NOT NULL FK→sys_company | | +| task_id | BIGINT | NOT NULL FK→annotation_task | | +| result_json | JSONB | NOT NULL | 整体覆盖,禁止局部 PATCH | +| is_final | BOOLEAN | NOT NULL DEFAULT FALSE | 审批通过后置 TRUE | +| submitted_by | BIGINT | FK→sys_user | | +| created_at / updated_at | TIMESTAMP | NOT NULL | | + +**result_json 结构**(文本三元组示例): +```json +{ + "items": [ + { + "subject": "北京", + "predicate": "是...首都", + "object": "中国", + "source_text": "北京是中国的首都", + "start_offset": 0, + "end_offset": 8 + } + ] +} +``` + +**result_json 结构**(图片四元组示例): +```json +{ + "items": [ + { + "subject": "猫", + "relation": "坐在", + "object": "椅子", + "modifier": "白色的", + "bbox": [100, 200, 300, 400], + "crop_path": "crops/123/0.jpg" + } + ] +} +``` + +**索引**: `(task_id)`、`(company_id, is_final)` + +--- + +### 6. training_dataset — 训练样本 + +| 字段 | 类型 | 约束 | 说明 | +|------|------|------|------| +| id | BIGSERIAL | PK | | +| company_id | BIGINT | NOT NULL FK→sys_company | | +| task_id | BIGINT | NOT NULL FK→annotation_task | | +| source_id | BIGINT | NOT NULL FK→source_data | | +| extraction_result_id | BIGINT | NOT NULL FK→annotation_result | | +| sample_type | VARCHAR(20) | NOT NULL | TEXT / IMAGE / VIDEO_FRAME | +| glm_format_json | JSONB | NOT NULL | GLM 微调格式 | +| export_batch_id | VARCHAR(50) | — | NULL 表示未导出 | +| status | VARCHAR(20) | NOT NULL DEFAULT 'PENDING_REVIEW' | 见状态机 | +| reject_reason | TEXT | — | | +| reviewed_by | BIGINT | FK→sys_user | | +| exported_at | TIMESTAMP | — | | +| created_at / updated_at | TIMESTAMP | NOT NULL | | + +**状态机**: +``` +PENDING_REVIEW → APPROVED(QA 审批通过) +PENDING_REVIEW → REJECTED(QA 审批驳回) +REJECTED → PENDING_REVIEW(标注员修改后重提) +``` + +**glm_format_json 结构**: +```json +{ + "conversations": [ + {"role": "user", "content": "..."}, + {"role": "assistant", "content": "..."} + ], + "source_type": "TEXT" +} +``` + +**索引**: `(company_id)`、`(company_id, status)`、`(export_batch_id)` + +--- + +### 7. export_batch — 导出批次 + +| 字段 | 类型 | 约束 | 说明 | +|------|------|------|------| +| id | BIGSERIAL | PK | | +| company_id | BIGINT | NOT NULL FK→sys_company | | +| batch_uuid | VARCHAR(50) | NOT NULL UNIQUE | 批次标识符 | +| dataset_file_path | VARCHAR(500) | — | RustFS JSONL 路径 | +| sample_count | INT | NOT NULL DEFAULT 0 | | +| glm_job_id | VARCHAR(100) | — | 微调任务 ID | +| finetune_status | VARCHAR(20) | NOT NULL DEFAULT 'NOT_STARTED' | 见状态 | +| error_message | TEXT | — | | +| created_by | BIGINT | FK→sys_user | | +| created_at / updated_at | TIMESTAMP | NOT NULL | | + +**finetune_status 值**: NOT_STARTED / RUNNING / SUCCESS / FAILED + +**索引**: `(company_id)` + +--- + +### 8. sys_config — 系统配置 + +| 字段 | 类型 | 约束 | 说明 | +|------|------|------|------| +| id | BIGSERIAL | PK | | +| company_id | BIGINT | FK→sys_company,可 NULL | NULL = 全局默认配置 | +| config_key | VARCHAR(100) | NOT NULL | | +| config_value | TEXT | NOT NULL | | +| description | TEXT | — | | +| updated_by | BIGINT | FK→sys_user | | +| updated_at | TIMESTAMP | NOT NULL | | + +**约束**: `UNIQUE(company_id, config_key)` +**查询规则**: 先按 `(companyId, configKey)` 查;未命中则按 `(NULL, configKey)` 查全局默认。 + +**预置全局配置键**: +- `prompt_extract_text`、`prompt_extract_image`、`prompt_video_to_text` +- `prompt_qa_gen_text`、`prompt_qa_gen_image` +- `model_default`(默认:`glm-4`) +- `video_frame_interval`(默认:`30`) +- `token_ttl_seconds`(默认:`7200`) +- `glm_api_base_url` + +--- + +### 9. sys_operation_log — 操作审计日志 + +| 字段 | 类型 | 约束 | 说明 | +|------|------|------|------| +| id | BIGSERIAL | PK | | +| company_id | BIGINT | FK→sys_company | | +| operator_id | BIGINT | FK→sys_user | 登录失败时可为 NULL | +| operator_name | VARCHAR(50) | NOT NULL | **操作时用户名快照**(不随改名变化) | +| operation_type | VARCHAR(50) | NOT NULL | 见枚举列表 | +| target_type | VARCHAR(30) | — | | +| target_id | BIGINT | — | | +| detail | JSONB | — | 补充信息 | +| ip_address | VARCHAR(50) | — | | +| result | VARCHAR(10) | NOT NULL | SUCCESS / FAIL | +| error_message | TEXT | — | | +| created_at | TIMESTAMP | NOT NULL DEFAULT NOW() | 分区键 | + +**只追加**:应用层禁止 UPDATE/DELETE,建议 DB 层添加触发器强制执行 +**分区**:按 `created_at` Range 分区,以月为单位(`sys_operation_log_YYYY_MM`) + +**operation_type 枚举**: +`USER_LOGIN`、`USER_LOGOUT`、`USER_CREATE`、`USER_UPDATE`、`USER_DISABLE`、`USER_ROLE_CHANGE`、`SOURCE_UPLOAD`、`SOURCE_DELETE`、`TASK_CREATE`、`TASK_CLAIM`、`TASK_UNCLAIM`、`TASK_SUBMIT`、`EXTRACTION_APPROVE`、`EXTRACTION_REJECT`、`QA_APPROVE`、`QA_REJECT`、`TASK_REASSIGN`、`EXPORT_CREATE`、`FINETUNE_START`、`CONFIG_UPDATE`、`VIDEO_JOB_RESET` + +--- + +### 10. annotation_task_history — 任务流转历史 + +| 字段 | 类型 | 约束 | 说明 | +|------|------|------|------| +| id | BIGSERIAL | PK | | +| company_id | BIGINT | NOT NULL FK→sys_company | | +| task_id | BIGINT | NOT NULL FK→annotation_task | | +| from_status | VARCHAR(20) | — | 任务初建时为 NULL | +| to_status | VARCHAR(20) | NOT NULL | | +| operator_id | BIGINT | NOT NULL FK→sys_user | | +| operator_role | VARCHAR(20) | NOT NULL | **操作时角色快照** | +| note | TEXT | — | 驳回原因、转移说明等 | +| created_at | TIMESTAMP | NOT NULL | | + +**只追加**:每次 annotation_task.status 变更时同步插入,与业务操作在同一事务中 +**索引**: `(task_id)` + +--- + +### 11. video_process_job — 视频异步处理任务 + +| 字段 | 类型 | 约束 | 说明 | +|------|------|------|------| +| id | BIGSERIAL | PK | | +| company_id | BIGINT | NOT NULL FK→sys_company | | +| source_id | BIGINT | NOT NULL FK→source_data | | +| job_type | VARCHAR(20) | NOT NULL | FRAME_EXTRACT / VIDEO_TO_TEXT | +| status | VARCHAR(20) | NOT NULL DEFAULT 'PENDING' | 见状态机 | +| params | JSONB | NOT NULL | 处理参数 | +| total_units | INT | — | 总帧数/片段数 | +| processed_units | INT | NOT NULL DEFAULT 0 | | +| output_path | VARCHAR(500) | — | | +| retry_count | INT | NOT NULL DEFAULT 0 | | +| max_retries | INT | NOT NULL DEFAULT 3 | | +| error_message | TEXT | — | | +| started_at / completed_at | TIMESTAMP | — | | +| created_at / updated_at | TIMESTAMP | NOT NULL | | + +**状态机**: +``` +PENDING → RUNNING +RUNNING → SUCCESS(处理成功) +RUNNING → RETRYING(失败且 retry_count < max_retries) +RUNNING → FAILED(失败且 retry_count >= max_retries) +RETRYING → RUNNING(AI 服务自动重试) +RETRYING → FAILED(超过最大重试次数) +``` +*FAILED → PENDING:由 ADMIN 手动触发接口,不在状态机自动流转中* + +**幂等规则**: 回调时若 `status == SUCCESS` 则静默忽略,不执行任何 DB 写入 + +**索引**: `(source_id)`、`(status)` + +--- + +## Redis 数据结构 + +| Key 模式 | 类型 | TTL | 内容 | +|---------|------|-----|------| +| `token:{uuid}` | Hash | 2h(滑动) | `{userId, role, companyId, username}` | +| `user:perm:{userId}` | String | 5min | 用户角色字符串 | +| `task:claim:{taskId}` | String | 30s | 持有者 userId | + +*禁止在上述三类命名空间之外自造 Key 用于认证、权限或锁目的。* diff --git a/specs/001-label-backend-spec/plan.md b/specs/001-label-backend-spec/plan.md new file mode 100644 index 0000000..548ae4d --- /dev/null +++ b/specs/001-label-backend-spec/plan.md @@ -0,0 +1,137 @@ +# 实施计划:label_backend 知识图谱智能标注平台 + +**分支**: `001-label-backend-spec` | **日期**: 2026-04-09 | **规格说明**: [spec.md](spec.md) +**输入**: 功能规格说明 `/specs/001-label-backend-spec/spec.md` + +--- + +## 摘要 + +构建面向多租户的知识图谱智能标注平台后端服务,驱动**文本线**(三元组提取 → 问答对生成 → 训练样本)和**图片线**(四元组提取 → 问答对生成 → 训练样本)两条流水线。视频作为预处理入口异步汇入两条流水线。系统基于 Spring Boot 3 + Apache Shiro + MyBatis Plus + PostgreSQL + Redis + RustFS 构建,通过 HTTP 调用 Python FastAPI AI 服务完成 AI 辅助标注和问答生成能力。 + +--- + +## 技术上下文 + +**语言/版本**: Java 17(LTS) +**主要依赖**: Spring Boot ≥ 3.0.x、Apache Shiro ≥ 1.13.x、MyBatis Plus ≥ 3.5.x、Spring Data Redis +**存储**: PostgreSQL ≥ 14(主库)、Redis ≥ 6.x(会话/权限缓存/分布式锁)、RustFS(S3 兼容对象存储) +**测试**: JUnit 5 + Testcontainers(真实 PostgreSQL + Redis 实例)、Spring Boot Test +**目标平台**: Linux 服务器,Docker Compose 容器化部署 +**项目类型**: Web Service(REST API) +**性能目标**: 任务领取并发下有且仅有一人成功;权限变更延迟 < 1 秒生效 +**约束**: 禁止 JWT;禁止 Spring Security;禁止文件字节流存入数据库;AI HTTP 调用禁止在 @Transactional 内同步执行;所有列表接口强制分页 +**规模**: 多租户(多公司),每公司独立数据空间;11 张核心业务表 + +--- + +## 宪章合规检查 + +*门控:Phase 0 研究前必须通过。Phase 1 设计后重检。* + +| # | 宪章原则 | 状态 | 说明 | +|---|---------|------|------| +| 1 | 环境约束(JDK 17、SB 3、Shiro、MyBatis Plus) | ✅ 通过 | pom.xml 中版本约束与宪章完全对齐;无 Spring Security 引入 | +| 2 | 多租户数据隔离(company_id + ThreadLocal) | ✅ 通过 | TenantLineInnerInterceptor 自动注入;CompanyContext 在 finally 块清理 | +| 3 | BCrypt 密码 + UUID Token + 禁 JWT | ✅ 通过 | AuthService 使用 BCrypt ≥ 10;UUID v4 Token 存 Redis;无 JWT 库 | +| 4 | 分级 RBAC + 权限注解 + 角色变更驱逐缓存 | ✅ 通过 | @RequiresRoles 声明权限;updateRole() 立即删 user:perm:{userId} | +| 5 | 双流水线 + 级联触发 + parent_source_id 溯源 | ✅ 通过 | 仅文本线/图片线;审批通过用 @TransactionalEventListener 触发 QA | +| 6 | 状态机完整性(StateValidator) | ✅ 通过 | 所有状态变更经 StateValidator.assertTransition();禁止绕过 Mapper 直写 | +| 7 | 任务争抢双重保障(Redis SET NX + DB 乐观锁) | ✅ 通过 | task:claim:{taskId} TTL 30s + WHERE status='UNCLAIMED' | +| 8 | 异步视频处理幂等 + 重试上限 + FAILED 手动重置 | ✅ 通过 | SUCCESS 回调静默忽略;retry_count ≥ max_retries → FAILED | +| 9 | 只追加审计日志 + AOP 切面 + 审计失败不回滚业务 | ✅ 通过 | @OperationLog AOP;sys_operation_log 无 UPDATE/DELETE;异常仅 error 日志 | +| 10 | RESTful URL + 统一响应格式 + 强制分页 | ✅ 通过 | Result 包装;无动词路径;PageResult 分页 | +| 11 | YAGNI:业务在 Service,Controller 只处理 HTTP | ✅ 通过 | 分层明确;无预测性抽象层 | + +**门控结果:全部通过,可进入 Phase 0。** + +--- + +## 项目结构 + +### 规格说明文档(本功能) + +```text +specs/001-label-backend-spec/ +├── plan.md # 本文件(/speckit.plan 输出) +├── research.md # Phase 0 输出 +├── data-model.md # Phase 1 输出 +├── quickstart.md # Phase 1 输出 +├── contracts/ # Phase 1 输出(REST API 契约) +│ ├── auth.md +│ ├── source.md +│ ├── tasks.md +│ ├── extraction.md +│ ├── qa.md +│ ├── export.md +│ ├── config.md +│ └── video.md +└── tasks.md # Phase 2 输出(/speckit.tasks 命令创建,非本命令) +``` + +### 源代码(仓库根目录) + +```text +src/ +└── main/ + └── java/com/label/ + ├── LabelBackendApplication.java + ├── common/ + │ ├── result/ # Result、ResultCode、PageResult + │ ├── exception/ # BusinessException、GlobalExceptionHandler + │ ├── context/ # CompanyContext(ThreadLocal) + │ ├── shiro/ # TokenFilter、UserRealm、ShiroConfig + │ ├── redis/ # RedisKeyManager、RedisService + │ ├── aop/ # AuditAspect、@OperationLog 注解 + │ ├── storage/ # RustFsClient(S3 兼容封装) + │ ├── ai/ # AiServiceClient(RestClient 封装 8 个端点) + │ └── statemachine/ # StateValidator、各状态枚举 + └── module/ + ├── user/ # AuthController、UserController、AuthService、UserService + ├── source/ # SourceController、SourceService + ├── task/ # TaskController、TaskService、TaskClaimService + ├── annotation/ # ExtractionController、QaController、ExtractionService、QaService + ├── export/ # ExportController、ExportService、FinetuneService + ├── config/ # SysConfigController、SysConfigService + └── video/ # VideoController、VideoProcessService + +src/ +└── test/ + └── java/com/label/ + ├── integration/ # Testcontainers(真实 PG + Redis)集成测试 + │ ├── AuthIntegrationTest.java + │ ├── TaskClaimConcurrencyTest.java + │ ├── VideoCallbackIdempotencyTest.java + │ ├── MultiTenantIsolationTest.java + │ └── ShiroFilterIntegrationTest.java + └── unit/ # 纯单元测试(状态机、业务逻辑) + └── StateMachineTest.java + +sql/ +└── init.sql # 全部 DDL(11 张表,按依赖顺序执行) + +docker-compose.yml # postgres、redis、rustfs、backend、ai-service、frontend +Dockerfile # eclipse-temurin:17-jre-alpine +pom.xml +``` + +**结构决策**:单一后端服务(Web Service),无前端代码。标准 Maven 项目布局,源代码在 `src/main/java/com/label/`,测试在 `src/test/java/com/label/`,按 `common/` + `module/` 两层分包。 + +--- + +## 复杂度追踪 + +> 宪章检查无违规,本节留空。 + +--- + +## GSTACK REVIEW REPORT + +| Review | Trigger | Why | Runs | Status | Findings | +|--------|---------|-----|------|--------|----------| +| CEO Review | `/plan-ceo-review` | Scope & strategy | 0 | — | — | +| Codex Review | `/codex review` | Independent 2nd opinion | 0 | — | — | +| Eng Review | `/plan-eng-review` | Architecture & tests (required) | 0 | — | — | +| Design Review | `/plan-design-review` | UI/UX gaps | 0 | — | — | + +**VERDICT:** NO REVIEWS YET — run `/autoplan` for full review pipeline, or individual reviews above. diff --git a/specs/001-label-backend-spec/quickstart.md b/specs/001-label-backend-spec/quickstart.md new file mode 100644 index 0000000..952a590 --- /dev/null +++ b/specs/001-label-backend-spec/quickstart.md @@ -0,0 +1,179 @@ +# 快速启动指南:label_backend + +**日期**: 2026-04-09 +**分支**: `001-label-backend-spec` + +--- + +## 前置条件 + +- Docker Desktop ≥ 4.x(含 Docker Compose v2) +- JDK 17(本地开发时) +- Maven ≥ 3.8(本地开发时) + +--- + +## 一、使用 Docker Compose 启动完整环境 + +```bash +# 克隆仓库 +git clone +cd label_backend + +# 启动所有服务(PostgreSQL + Redis + RustFS + AI Service + Backend + Frontend) +docker compose up -d + +# 查看后端启动日志 +docker compose logs -f backend + +# 检查健康状态 +docker compose ps +``` + +**服务端口**: +| 服务 | 端口 | +|------|------| +| 前端(Nginx) | http://localhost:80 | +| 后端 REST API | http://localhost:8080 | +| AI 服务(FastAPI) | http://localhost:8000 | +| PostgreSQL | localhost:5432 | +| Redis | localhost:6379 | +| RustFS S3 API | http://localhost:9000 | +| RustFS Web 控制台 | http://localhost:9001 | + +--- + +## 二、初始化数据库 + +数据库 DDL 通过 `./sql/init.sql` 在 PostgreSQL 容器启动时自动执行(`docker-entrypoint-initdb.d`)。 + +若需手动执行: +```bash +docker compose exec postgres psql -U label -d label_db -f /docker-entrypoint-initdb.d/init.sql +``` + +**初始账号**(由 `init.sql` 中的 INSERT 语句创建): +| 用户名 | 密码 | 角色 | 公司 | +|--------|------|------|------| +| admin | admin123 | ADMIN | 演示公司 | +| reviewer01 | review123 | REVIEWER | 演示公司 | +| annotator01 | annot123 | ANNOTATOR | 演示公司 | +| uploader01 | upload123 | UPLOADER | 演示公司 | + +--- + +## 三、本地开发模式(不使用 Docker) + +```bash +# 启动依赖服务(仅 PostgreSQL + Redis + RustFS,不启动后端) +docker compose up -d postgres redis rustfs + +# 设置环境变量 +export SPRING_DATASOURCE_URL=jdbc:postgresql://localhost:5432/label_db +export SPRING_DATASOURCE_USERNAME=label +export SPRING_DATASOURCE_PASSWORD=label_password +export SPRING_REDIS_HOST=localhost +export SPRING_REDIS_PORT=6379 +export SPRING_REDIS_PASSWORD=redis_password +export RUSTFS_ENDPOINT=http://localhost:9000 +export RUSTFS_ACCESS_KEY=minioadmin +export RUSTFS_SECRET_KEY=minioadmin +export AI_SERVICE_BASE_URL=http://localhost:8000 + +# 编译并启动 +mvn clean spring-boot:run +``` + +--- + +## 四、验证安装 + +```bash +# 1. 登录(获取 Token) +curl -X POST http://localhost:8080/api/auth/login \ + -H "Content-Type: application/json" \ + -d '{"companyCode":"DEMO","username":"admin","password":"admin123"}' + +# 期望响应:{"code":"SUCCESS","data":{"token":"...","role":"ADMIN",...}} + +# 2. 使用 Token 访问受保护接口(将 {TOKEN} 替换为上一步返回的 token) +curl http://localhost:8080/api/auth/me \ + -H "Authorization: Bearer {TOKEN}" + +# 期望响应:{"code":"SUCCESS","data":{"username":"admin","role":"ADMIN",...}} +``` + +--- + +## 五、运行测试 + +```bash +# 运行所有测试(Testcontainers 会自动启动真实 PG + Redis 容器) +mvn test + +# 运行特定测试(并发任务领取) +mvn test -Dtest=TaskClaimConcurrencyTest + +# 运行集成测试套件 +mvn test -Dtest=*IntegrationTest +``` + +**注意**: Testcontainers 需要本地 Docker 可用。首次运行会拉取 PostgreSQL 和 Redis 镜像(约 200MB)。 + +--- + +## 六、关键配置项说明 + +配置文件位于 `src/main/resources/application.yml`。以下配置项可在运行时通过 `PUT /api/config/{key}` 接口(ADMIN 权限)动态调整,无需重启服务: + +| 配置键 | 说明 | 默认值 | +|--------|------|--------| +| `token_ttl_seconds` | 会话凭证有效期(秒) | 7200(2小时) | +| `model_default` | AI 辅助默认模型 | glm-4 | +| `video_frame_interval` | 视频帧提取间隔(帧数) | 30 | +| `prompt_extract_text` | 文本三元组提取 Prompt | 见 init.sql | +| `prompt_extract_image` | 图片四元组提取 Prompt | 见 init.sql | +| `prompt_qa_gen_text` | 文本问答生成 Prompt | 见 init.sql | +| `prompt_qa_gen_image` | 图片问答生成 Prompt | 见 init.sql | + +--- + +## 七、标注流水线快速验证 + +```bash +TOKEN="your-admin-token" + +# 步骤 1:上传文本资料 +curl -X POST http://localhost:8080/api/source/upload \ + -H "Authorization: Bearer $TOKEN" \ + -F "file=@sample.txt" -F "dataType=TEXT" + +# 步骤 2:为资料创建提取任务(sourceId 从上一步响应中获取) +curl -X POST http://localhost:8080/api/tasks \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"sourceId": 1, "taskType": "AI_ASSISTED", "aiModel": "glm-4"}' + +# 步骤 3:标注员领取任务(使用 annotator01 的 Token) +ANNOTATOR_TOKEN="annotator-token" +curl -X POST http://localhost:8080/api/tasks/1/claim \ + -H "Authorization: Bearer $ANNOTATOR_TOKEN" + +# 步骤 4:获取 AI 预标注结果 +curl http://localhost:8080/api/extraction/1 \ + -H "Authorization: Bearer $ANNOTATOR_TOKEN" + +# 步骤 5:提交标注结果 +curl -X PUT http://localhost:8080/api/extraction/1 \ + -H "Authorization: Bearer $ANNOTATOR_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"items":[{"subject":"北京","predicate":"是首都","object":"中国","sourceText":"北京是中国的首都","startOffset":0,"endOffset":8}]}' + +curl -X POST http://localhost:8080/api/extraction/1/submit \ + -H "Authorization: Bearer $ANNOTATOR_TOKEN" + +# 步骤 6:审批员审批通过(使用 reviewer01 的 Token) +REVIEWER_TOKEN="reviewer-token" +curl -X POST http://localhost:8080/api/extraction/1/approve \ + -H "Authorization: Bearer $REVIEWER_TOKEN" +``` diff --git a/specs/001-label-backend-spec/research.md b/specs/001-label-backend-spec/research.md new file mode 100644 index 0000000..cd0ae81 --- /dev/null +++ b/specs/001-label-backend-spec/research.md @@ -0,0 +1,150 @@ +# Phase 0 研究报告:label_backend + +**日期**: 2026-04-09 +**分支**: `001-label-backend-spec` + +--- + +## 技术决策汇总 + +所有技术选型均由宪章强制约束,无需评估备选方案。本报告记录关键设计决策的理由,供后续实施参考。 + +--- + +## 决策 1:认证机制 + +**决策**: UUID v4 Token 存储于 Redis,滑动过期,禁止 JWT + +**理由**: +- JWT 自包含令牌无法按需吊销,无法满足"管理员禁用账号立即生效"的安全要求 +- UUID Token 在 Redis 中可精确控制生命周期:退出登录或禁用账号时同步删除 Key,下一次请求立即失效 +- 滑动过期(每次有效请求重置 TTL)确保活跃用户不被意外踢出 + +**备选方案放弃理由**: +- JWT:无法即时吊销,存在安全窗口 +- Session Cookie:在无状态 REST API 架构中不适用 +- OAuth2:过度设计,当前场景无第三方授权需求 + +--- + +## 决策 2:多租户隔离机制 + +**决策**: MyBatis Plus `TenantLineInnerInterceptor` + `ThreadLocal CompanyContext` + +**理由**: +- `TenantLineInnerInterceptor` 在 SQL 拦截器层自动在每条查询的 WHERE 子句中注入 `company_id`,覆盖范围广且无需逐方法手动添加条件 +- ThreadLocal 存储当前请求的 `companyId`,由 Shiro TokenFilter 在解析 Token 时从 Redis 会话数据注入,确保 companyId 来自服务端权威来源而非客户端参数 +- `finally` 块强制清理 ThreadLocal,防止线程池复用时数据串漏 + +**备选方案放弃理由**: +- 行级安全(RLS):PostgreSQL 原生支持,但与 MyBatis Plus 集成复杂,且宪章已指定 ThreadLocal 方案 +- 逐方法手动添加 WHERE:容易遗漏,维护成本高 + +--- + +## 决策 3:任务并发领取控制 + +**决策**: Redis `SET NX`(分布式锁)+ 数据库乐观约束(`WHERE status = 'UNCLAIMED'`)双重保障 + +**理由**: +- 单纯使用数据库乐观锁在高并发下存在写放大问题(大量 UPDATE 竞争) +- 单纯使用 Redis 锁若锁过期后 DB 写入失败可能导致数据不一致 +- 双重保障:Redis 锁(TTL 30s)快速拦截大部分并发请求,减少数据库压力;DB 乐观约束作为最终一致性兜底 + +**Key 命名**: `task:claim:{taskId}`(TTL 30s,与宪章 Redis Key 规范一致) + +--- + +## 决策 4:审批触发 QA 任务的异步解耦 + +**决策**: Spring `@TransactionalEventListener(phase = AFTER_COMMIT)` + `@Transactional(REQUIRES_NEW)` + +**理由**: +- 提取阶段审批通过后需调用 AI HTTP 生成候选问答对,该 HTTP 调用延迟不确定(秒级到分钟级) +- 若在 `@Transactional` 内同步调用,数据库连接被长时间占用,且 AI 失败会错误地回滚已完成的审批操作 +- `AFTER_COMMIT` 保证业务审批先提交再触发事件,避免事务回滚导致的幽灵任务 +- `REQUIRES_NEW` 为 QA 生成开启独立事务,AI 失败仅影响 QA 任务创建,不影响审批结果 + +**事件流**: `approve()` → publish `ExtractionApprovedEvent` → 事务提交 → `onExtractionApproved()` 异步执行(AI 调用 + 创建 QA 任务) + +--- + +## 决策 5:标注结果存储语义 + +**决策**: JSONB 整体覆盖(PUT 语义),禁止局部 PATCH + +**理由**: +- 三元组/四元组条目具有强关联性(主语-谓语-宾语作为整体,或主体-关系-客体-修饰词作为整体),局部更新易导致不一致 +- 整体替换简化服务端逻辑,前端每次提交完整 items 数组,服务端直接执行 UPDATE `result_json = ?` +- 避免局部追加导致的索引层数据不一致(如删除某条目后残留旧数据) + +--- + +## 决策 6:审计日志事务边界 + +**决策**: 审计日志写入不要求与业务操作在同一事务,AOP `finally` 块中独立写入 + +**理由**: +- 审计写入失败不应回滚业务操作(用户的标注/审批结果比审计日志更重要) +- `@Around` 通知在业务方法执行完成(commit 或 rollback)后捕获最终 `result`,可记录准确的成功/失败状态 +- 审计失败仅 error 级别日志 + 告警,不影响用户体验 + +--- + +## 决策 7:视频预处理幂等回调 + +**决策**: 回调处理时检查 `video_process_job.status`,已为 `SUCCESS` 则静默忽略 + +**理由**: +- AI 服务可能因网络抖动对同一 jobId 发起多次成功回调 +- 幂等检查确保第一次成功回调创建标注任务,后续重复回调无任何副作用 +- 检查粒度:`status == SUCCESS` 即返回,不进行任何 DB 写入 + +--- + +## 决策 8:对象存储路径规范 + +**决策**: RustFS(S3 兼容),文件字节流禁止入库,路径按资源类型分桶分目录 + +**路径规范**: + +| 资源 | 桶 | 路径格式 | +|------|-----|---------| +| 文本文件 | `source-data` | `text/{yyyyMM}/{source_id}.txt` | +| 图片 | `source-data` | `image/{yyyyMM}/{source_id}.jpg` | +| 视频 | `source-data` | `video/{yyyyMM}/{source_id}.mp4` | +| 视频帧 | `source-data` | `frames/{source_id}/{frame_index}.jpg` | +| 视频转文本 | `source-data` | `video-text/{parent_source_id}/{timestamp}.txt` | +| bbox 裁剪图 | `source-data` | `crops/{task_id}/{item_index}.jpg` | +| 导出 JSONL | `finetune-export` | `export/{batchUuid}.jsonl` | + +--- + +## 决策 9:测试策略 + +**决策**: 集成测试使用 Testcontainers(真实 PG + Redis),不允许 Mock 数据库 + +**必须覆盖的测试场景**: + +1. **并发任务领取**:10 线程同时争抢同一任务,验证恰好 1 人成功(Redis + DB 双重锁) +2. **视频回调幂等**:同一 jobId 两次成功回调,验证只创建 1 个 annotation_task +3. **状态机越界拒绝**:非法状态转换(如 APPROVED → IN_PROGRESS)抛出 BusinessException +4. **多租户隔离**:公司 A 身份访问公司 B 资源,验证被拒绝 +5. **Shiro 过滤器链**:无 Token → 401;Token 有效但角色不足 → 403 + +--- + +## 无需澄清事项汇总 + +| 项目 | 状态 | 来源 | +|------|------|------| +| 认证方案 | ✅ 已确定(UUID Token) | 宪章原则三 | +| 数据库选型 | ✅ 已确定(PostgreSQL) | 宪章原则一 | +| ORM | ✅ 已确定(MyBatis Plus) | 宪章原则一 | +| 缓存/锁 | ✅ 已确定(Redis) | 宪章原则一 | +| 对象存储 | ✅ 已确定(RustFS S3) | 宪章原则一 | +| AI 集成方式 | ✅ 已确定(HTTP RestClient) | 宪章原则一 | +| 多租户隔离 | ✅ 已确定(ThreadLocal + Interceptor) | 宪章原则二 | +| 并发控制 | ✅ 已确定(双重锁) | 宪章原则七 | +| 审批事务边界 | ✅ 已确定(@TransactionalEventListener) | 宪章原则五 | +| 测试策略 | ✅ 已确定(Testcontainers) | 宪章开发工作流 | diff --git a/specs/001-label-backend-spec/spec.md b/specs/001-label-backend-spec/spec.md new file mode 100644 index 0000000..d8efb9a --- /dev/null +++ b/specs/001-label-backend-spec/spec.md @@ -0,0 +1,273 @@ +# 功能规格说明:label_backend 知识图谱智能标注平台 + +**功能分支**: `001-label-backend-spec` +**创建日期**: 2026-04-09 +**状态**: 草稿 +**输入**: 根据文档 docs/superpowers/specs/2026-04-09-label-backend-design.md 生成需求规格文档 + +--- + +## 用户场景与测试 *(必填)* + +### 用户故事 1 - 用户登录与身份认证 (优先级: P1) + +公司员工使用用户名和密码登录平台,获取会话凭证后访问受权限保护的功能。会话在持续活跃时保持有效,用户主动退出或管理员禁用账号后会话立即失效。 + +**优先级理由**: 认证是所有其他功能的前提,无法登录则所有功能均不可用。 + +**独立测试**: 可独立通过以下方式测试:用正确凭证登录,携带返回凭证请求受保护接口,验证正常访问;携带错误凭证或过期凭证,验证被拒绝。 + +**验收场景**: + +1. **给定** 用户持有效用户名和密码,**当** 提交登录请求,**则** 系统返回会话凭证,且该凭证可用于后续请求 +2. **给定** 用户已登录并持有效凭证,**当** 发起正常业务请求,**则** 会话有效期自动延长 +3. **给定** 用户主动退出登录,**当** 使用旧凭证访问任意受保护接口,**则** 系统立即拒绝,返回未授权响应 +4. **给定** 管理员禁用某用户账号,**当** 被禁用用户使用现有凭证访问接口,**则** 系统立即拒绝,不设任何宽限期 +5. **给定** 用户使用错误密码,**当** 提交登录请求,**则** 系统返回认证失败,不泄露用户是否存在 + +--- + +### 用户故事 2 - 原始资料上传 (优先级: P1) + +上传员将文本文件、图片或视频上传至平台,系统存储文件并记录元数据,视频文件额外触发异步预处理流程(帧提取或转文字)。 + +**优先级理由**: 资料上传是整条标注流水线的起点,没有资料则无法产生任何标注任务。 + +**独立测试**: 可独立测试:上传一个文本文件或图片,验证系统成功接收并记录;上传视频,验证系统创建预处理任务并开始异步处理。 + +**验收场景**: + +1. **给定** 上传员已登录,**当** 上传一个文本文件,**则** 系统保存文件并创建资料记录,状态为"待提取" +2. **给定** 上传员已登录,**当** 上传一张图片,**则** 系统保存图片并创建资料记录,状态为"待提取" +3. **给定** 上传员已登录,**当** 上传一个视频文件,**则** 系统保存视频,创建预处理任务,资料状态变为"预处理中" +4. **给定** 视频预处理成功完成,**当** AI 服务回调成功,**则** 每帧(帧模式)或每段转译文本(片段模式)均作为独立资料进入标注队列,原视频状态变为"已完成" +5. **给定** 视频预处理因 AI 服务故障失败且已达最大重试次数,**当** 回调失败,**则** 任务标记为失败,管理员可查阅错误信息并手动重新触发 + +--- + +### 用户故事 3 - 提取阶段标注(EXTRACTION) (优先级: P1) + +标注员从任务池中领取一个提取任务,借助 AI 辅助预标注对文本资料完成三元组标注或对图片资料完成四元组标注,提交后由审批员审核。 + +**优先级理由**: 提取阶段是双流水线的第一个生产阶段,直接产出结构化知识。 + +**独立测试**: 可独立测试:标注员领取任务,修改 AI 预标注结果,提交后验证任务进入"待审批"状态;同一任务被多人同时尝试领取,验证只有一人成功。 + +**验收场景**: + +1. **给定** 存在未被领取的提取任务,**当** 标注员请求领取,**则** 任务归属到该标注员,状态变为"进行中" +2. **给定** 同一任务被 10 名标注员同时争抢,**当** 所有人同时发起领取请求,**则** 恰好一名标注员领取成功,其余人收到"任务已被他人领取"响应 +3. **给定** 标注员已领取任务,**当** 请求 AI 辅助预标注,**则** 系统调用 AI 服务返回结构化候选结果(不直接提交,供人工编辑) +4. **给定** 标注员完成人工编辑,**当** 提交标注结果,**则** 任务状态变为"已提交",进入审批队列 +5. **给定** 标注员领取任务后决定放弃,**当** 放弃任务,**则** 任务回到任务池,可被其他标注员重新领取 + +--- + +### 用户故事 4 - 提取阶段审批 (优先级: P1) + +审批员查看提交的提取标注结果,选择通过或驳回。审批通过后系统自动创建问答生成任务;驳回时需填写驳回原因,标注员可重新领取该任务修改后再次提交。 + +**优先级理由**: 审批控制标注质量,是推进流水线到下一阶段的门控节点。 + +**独立测试**: 可独立测试:审批通过一个提取任务,验证系统自动创建 QA 生成任务;驳回一个任务,验证标注员可重领并修改。 + +**验收场景**: + +1. **给定** 审批员进入待审批队列,**当** 查看列表,**则** 只看到状态为"已提交"的任务 +2. **给定** 审批员查看某提取任务的标注结果,**当** 点击通过,**则** 标注结果标记为最终版,系统自动创建对应的问答生成任务并置于任务池中 +3. **给定** 审批员本人提交了某提取任务,**当** 该审批员尝试审批自己提交的任务,**则** 系统拒绝,提示不允许自审 +4. **给定** 审批员认为标注结果不合格,**当** 附带驳回原因并驳回,**则** 任务状态变为"已驳回",标注员可在我的任务列表中看到该任务及原因 +5. **给定** 标注员查看被驳回的任务,**当** 重新领取并修改后提交,**则** 任务重新进入审批队列 + +--- + +### 用户故事 5 - 问答生成阶段标注与审批(QA_GENERATION) (优先级: P2) + +标注员领取问答生成任务,在 AI 候选问答对基础上完成人工编辑,提交后由审批员审批。审批通过即写入训练样本库;驳回则退回标注员修改。 + +**优先级理由**: QA 阶段是流水线的最后生产阶段,直接决定训练样本质量。 + +**独立测试**: 可独立测试:领取 QA 任务,修改候选问答对并提交;审批员通过后,验证训练样本库中出现对应记录。 + +**验收场景**: + +1. **给定** 存在由提取阶段审批通过自动创建的问答生成任务,**当** 标注员进入任务池,**则** 可以看到并领取该任务 +2. **给定** 标注员已领取问答生成任务,**当** 整体提交修改后的问答对列表,**则** 任务进入审批队列(每次提交均为完整列表替换,不允许部分追加) +3. **给定** 审批员通过问答生成任务,**当** 审批完成,**则** 对应训练样本状态变为"已审批",整条资料流水线标记为完成 +4. **给定** 审批员驳回问答生成任务,**当** 驳回完成,**则** 候选问答对记录被清除,标注员可重领任务重新生成 + +--- + +### 用户故事 6 - 训练数据导出与微调提交 (优先级: P2) + +管理员从已审批的训练样本中选择一批次,导出为 GLM 微调格式的 JSONL 文件,并可选择一键提交至 GLM 微调服务。 + +**优先级理由**: 导出是将标注成果转化为 AI 训练价值的最终步骤。 + +**独立测试**: 可独立测试:选择若干已审批样本创建导出批次,验证生成 JSONL 文件;将批次提交微调服务,验证可查询到微调任务状态。 + +**验收场景**: + +1. **给定** 管理员查看样本库,**当** 筛选已审批样本,**则** 只返回状态为"已审批"的样本(分页,不可无界查询) +2. **给定** 管理员选择若干已审批样本,**当** 创建导出批次,**则** 系统生成 JSONL 文件并存储,返回批次标识;若任意样本不处于已审批状态则整批失败 +3. **给定** 导出批次已创建,**当** 管理员提交微调任务,**则** 系统向 AI 服务发起微调请求,记录微调任务标识,状态变为"进行中" +4. **给定** 微调任务已提交,**当** 管理员查询状态,**则** 返回最新的微调进度信息 + +--- + +### 用户故事 7 - 用户与权限管理 (优先级: P2) + +管理员管理本公司用户,包括创建用户、分配角色、启用/禁用账号。角色变更和账号禁用在保存后立即生效,无延迟窗口。 + +**优先级理由**: 人员和权限管理是平台运营的基础管控能力。 + +**独立测试**: 可独立测试:创建一个标注员角色用户,验证该用户可以领取任务但无法执行审批;将其角色升为审批员,立即验证可以审批;禁用该用户,验证其现有会话立即失效。 + +**验收场景**: + +1. **给定** 管理员创建一个新用户并分配角色,**当** 新用户登录,**则** 该用户拥有该角色对应的权限(高级角色自动包含低级角色权限) +2. **给定** 管理员将用户角色从标注员升为审批员,**当** 角色变更保存后,**则** 该用户无需重新登录即可使用审批功能 +3. **给定** 管理员禁用某用户账号,**当** 被禁用用户下次发起请求,**则** 系统立即返回拒绝响应,不设过渡期 +4. **给定** 管理员查询用户列表,**当** 获取结果,**则** 仅返回本公司用户,不可看到其他公司用户数据 + +--- + +### 用户故事 8 - 系统配置管理 (优先级: P3) + +管理员维护 AI Prompt 模板、模型参数、Token 有效期等系统配置项,支持公司级配置覆盖全局默认值。 + +**优先级理由**: 配置管理是运营支撑能力,可在系统运行后按需调整,不影响核心标注流程。 + +**独立测试**: 可独立测试:修改某公司的 Prompt 模板配置,验证该公司后续标注使用新模板,其他公司仍使用全局默认值。 + +**验收场景**: + +1. **给定** 管理员查看配置列表,**当** 获取结果,**则** 同时展示本公司专属配置和全局默认配置,公司专属配置对同一 Key 优先 +2. **给定** 管理员更新某配置项,**当** 保存成功,**则** 后续相关操作立即使用新配置值 +3. **给定** 某配置项仅有全局默认值无公司级覆盖,**当** 系统查询该配置,**则** 返回全局默认值 + +--- + +### 边界情况 + +- 标注员领取任务后长时间未操作——管理员可强制转移任务给其他标注员(状态保持"进行中",持有人变更) +- 视频预处理回调因网络抖动发生重复投递——系统对同一任务的重复成功回调静默忽略,不重复创建标注任务 +- 某租户上传量极大时的无界查询——所有列表接口强制分页,无法绕过分页限制获取全量数据 +- 审批员同时兼任标注员角色时尝试自审——系统按提交者身份校验,自审请求被拒绝 +- 跨公司数据访问尝试——每次数据查询自动注入当前用户所属公司标识,无法通过参数篡改访问其他公司数据 +- 操作日志写入失败——审计写入失败不影响业务操作,仅记录错误并触发告警 +- 同一账号在多设备登录——每次登录生成独立会话凭证,互不影响;退出某设备仅使该设备凭证失效 + +--- + +## 需求说明 *(必填)* + +### 功能性需求 + +**认证与会话** + +- **FR-001**: 系统必须支持基于用户名和密码的登录认证,验证通过后返回会话凭证 +- **FR-002**: 系统必须在每次有效请求时自动延长会话有效期(滑动过期) +- **FR-003**: 系统必须支持主动退出登录,退出后凭证立即失效 +- **FR-004**: 系统必须在管理员禁用账号后立即使该账号所有有效凭证失效,不设任何宽限期 +- **FR-005**: 系统必须拒绝无凭证或过期凭证的请求,返回未授权响应 + +**访问控制** + +- **FR-006**: 系统必须实现四级角色体系:上传员 ⊂ 标注员 ⊂ 审批员 ⊂ 管理员,高级角色自动继承低级角色权限 +- **FR-007**: 系统必须在接口层声明每个接口所需的最低角色,角色不足时拒绝访问 +- **FR-008**: 系统必须在角色变更保存后立即生效,无需等待会话自然过期 + +**多租户数据隔离** + +- **FR-009**: 系统必须保证每个公司的数据完全隔离,任何查询均只返回当前用户所属公司的数据 +- **FR-010**: 系统必须禁止调用方通过请求参数指定公司标识来访问其他公司数据;公司标识必须从服务端会话中获取 +- **FR-011**: 全局系统配置对所有公司可见,公司级配置对同一配置项优先覆盖全局值 + +**资料管理** + +- **FR-012**: 系统必须支持文本、图片、视频三种原始资料的上传,文件内容存储至对象存储服务,数据库只保存元数据和存储路径 +- **FR-013**: 视频上传后必须触发异步预处理任务,不阻塞上传响应 +- **FR-014**: 系统必须支持视频帧提取模式(每帧作为独立图片进入图片标注流水线)和视频片段转文本模式(派生文本资料进入文本标注流水线) +- **FR-015**: 视频片段转文本产生的派生资料必须记录对原始视频资料的引用,可追溯来源 + +**任务管理** + +- **FR-016**: 系统必须支持并发安全的任务领取机制,确保同一任务不会被两名标注员同时持有 +- **FR-017**: 系统必须支持任务放弃(退回任务池)和管理员强制转移任务归属 +- **FR-018**: 每次任务状态变更必须记录历史快照(含操作人、操作时间、驳回原因等),不可修改或删除历史记录 +- **FR-019**: 所有任务列表接口必须强制分页,不允许无界查询 + +**提取阶段标注工作台** + +- **FR-020**: 系统必须调用 AI 服务生成候选提取结果供标注员参考编辑,不直接写入最终结果 +- **FR-021**: 标注员提交的提取结果以整体替换方式存储,禁止局部追加修改 +- **FR-022**: 审批员审批通过时,系统必须在同一操作中将提取结果标记为最终版并自动创建问答生成任务,该级联操作不得由前端发起独立请求触发 +- **FR-023**: 系统必须拒绝提交者本人审批或驳回自己提交的任务(禁止自审) +- **FR-024**: 审批驳回时,标注员必须可以看到被驳回任务及驳回原因,并可重新领取修改后再次提交 + +**问答生成阶段** + +- **FR-025**: 问答生成任务的标注结果采用整体替换,每次提交包含完整问答对列表 +- **FR-026**: 问答生成阶段审批通过时,对应训练样本必须写入训练样本库,资料状态标记为"已完成" +- **FR-027**: 问答生成阶段审批驳回时,候选问答对记录必须被清除,标注员可重领任务重新生成 + +**训练数据导出** + +- **FR-028**: 系统必须支持将已审批的训练样本批量导出为 GLM 微调格式,每条样本一行 +- **FR-029**: 导出时若任意选定样本不处于已审批状态,整批导出请求必须失败 +- **FR-030**: 系统必须支持将导出批次提交至外部 AI 微调服务,并可追踪微调任务进度 + +**审计日志** + +- **FR-031**: 系统必须对所有状态变更操作自动记录审计日志,包含操作人姓名快照、操作类型、结果、IP 地址等信息 +- **FR-032**: 审计日志只追加不修改,禁止对审计记录执行更新或删除 +- **FR-033**: 审计日志写入失败不得导致业务操作失败或回滚 + +**视频异步处理** + +- **FR-034**: 视频预处理任务必须支持自动重试,达到最大重试次数后置为失败状态,需管理员手动重新触发 +- **FR-035**: AI 服务对同一视频处理任务的重复成功回调必须被幂等处理,不得重复创建标注任务 + +### 核心实体 + +- **公司(Company)**: 多租户根节点,每个公司拥有独立的用户、资料和任务数据空间 +- **用户(User)**: 属于某公司,拥有角色(上传员/标注员/审批员/管理员),通过会话凭证访问系统 +- **原始资料(SourceData)**: 待标注的文件(文本/图片/视频),拥有状态流转(待处理→提取中→QA审核中→已完成);视频派生资料通过父资料引用保留溯源链 +- **标注任务(AnnotationTask)**: 标注工作单元,分提取阶段和问答生成阶段,拥有领取、提交、审批、驳回完整生命周期 +- **标注结果(AnnotationResult)**: 提取阶段的结构化输出(三元组或四元组),以整体 JSON 存储 +- **训练样本(TrainingDataset)**: 经审批的问答对,GLM 微调格式,待导出 +- **导出批次(ExportBatch)**: 一批训练样本的导出记录,关联外部微调任务标识 +- **视频处理任务(VideoProcessJob)**: 视频预处理的异步任务跟踪,包含重试计数和最终输出路径 +- **系统配置(SysConfig)**: 配置键值对,分全局默认和公司级两层,公司级优先 + +--- + +## 成功标准 *(必填)* + +### 可度量结果 + +- **SC-001**: 同一标注任务被多人同时争抢时,有且仅有一人领取成功,其余人立即收到明确的"已被领取"响应,成功率 100%,无数据竞争导致的双重持有 +- **SC-002**: 管理员禁用账号或变更角色后,该账号的权限变更在下一次请求时立即生效(延迟小于 1 秒) +- **SC-003**: 提取阶段审批通过时,问答生成任务在同一次操作中自动出现在任务池,无需任何人工干预步骤 +- **SC-004**: 视频预处理回调的重复投递(同一任务多次成功回调)不产生重复标注任务,幂等处理成功率 100% +- **SC-005**: 跨公司数据访问尝试 100% 被系统拒绝,无任何数据泄露至非所属租户 +- **SC-006**: 审计日志对所有状态变更操作的覆盖率达到 100%,审计写入失败不影响业务成功率 +- **SC-007**: 所有列表接口在数据量增长时保持稳定响应,用户无法绕过分页限制一次性获取不受限制数量的记录 +- **SC-008**: 标注员完成一次任务领取→标注→提交的完整操作流程(不含 AI 辅助预标注等待时间)可在 5 分钟内完成 +- **SC-009**: 从资料上传到训练样本进入样本库的完整流水线(含两次人工标注和两次审批)中,每个节点的操作人、时间、结果均可查询追溯 + +--- + +## 假设与前提 + +- 系统服务于多个公司,每家公司的用户、资料和标注数据完全独立,不存在跨公司协作场景 +- 每位用户在同一时刻只属于一家公司,不存在用户跨公司兼职的场景 +- 视频预处理(帧提取、转文字)由外部 AI 服务异步完成,后端只负责触发和回调处理 +- 微调结果的质量评估不在本平台范围内,平台只负责提交微调任务并查询状态 +- 前端应用已独立开发,本规格仅覆盖后端 API 能力 +- 所有文件二进制内容存储在兼容 S3 协议的对象存储服务中,不存入关系型数据库 +- 生产环境使用容器化部署,后端服务、数据库、缓存、对象存储均为独立容器 +- AI 服务通过 HTTP 提供结构化的提取和问答生成能力,后端不内嵌 AI 模型 +- 标注流水线中一条资料同一时间只有一个活跃的提取任务或问答生成任务,不支持并行多版本标注 +- 审计日志的长期归档(超过月分区范围)由数据库运维团队负责,不在本系统范围内 diff --git a/specs/001-label-backend-spec/tasks.md b/specs/001-label-backend-spec/tasks.md new file mode 100644 index 0000000..7954237 --- /dev/null +++ b/specs/001-label-backend-spec/tasks.md @@ -0,0 +1,310 @@ +# 任务清单:label_backend 知识图谱智能标注平台 + +**输入**: `/specs/001-label-backend-spec/` 全部设计文档 +**前置条件**: plan.md ✅ | spec.md ✅ | research.md ✅ | data-model.md ✅ | contracts/ ✅ | quickstart.md ✅ + +## 格式说明 + +- **[P]**: 可并行执行(不同文件,无未完成任务的依赖) +- **[USn]**: 对应 spec.md 中的用户故事编号 +- 每条任务包含精确的文件路径 + +--- + +## Phase 1: 项目初始化 + +**目标**: 创建 Maven 项目骨架、基础配置和 Docker 环境 + +- [ ] T001 创建 Maven 项目骨架(`com.label` GroupId,`label-backend` ArtifactId,Java 17 编译目标) +- [ ] T002 配置 `pom.xml`(Spring Boot 3、Apache Shiro 1.13.x、MyBatis Plus 3.5.x、Spring Data Redis、AWS S3 SDK v2、Testcontainers、Lombok) +- [ ] T003 [P] 创建 `sql/init.sql`(按依赖顺序建全部 11 张表:sys_company → sys_user → source_data → annotation_task → annotation_result → training_dataset → export_batch → sys_config → sys_operation_log → annotation_task_history → video_process_job;含所有索引和初始配置数据) +- [ ] T004 [P] 创建 `docker-compose.yml`(postgres、redis、rustfs、backend、ai-service、frontend 六个服务,含健康检查)和后端 `Dockerfile`(eclipse-temurin:17-jre-alpine) +- [ ] T005 创建 `src/main/resources/application.yml`(数据源、Redis、RustFS、AI 服务 base-url、Shiro 相关配置项) + +**检查点**: Maven 编译通过(`mvn compile`),Docker Compose `up -d` 全部服务健康 + +--- + +## Phase 2: 公共基础设施(阻塞性前置条件) + +**目标**: 所有业务模块依赖的公共组件。**必须全部完成后用户故事阶段才能开始** + +**⚠️ 重要**: 此阶段未完成前任何用户故事均不可开始实现 + +- [ ] T006 创建 `Result`、`ResultCode`、`PageResult` — `src/main/java/com/label/common/result/`(统一响应格式:`{"code":"SUCCESS","data":{...}}`) +- [ ] T007 [P] 创建 `BusinessException`(含 `code`、`message`、`httpStatus`)和 `GlobalExceptionHandler`(`@RestControllerAdvice`)— `src/main/java/com/label/common/exception/` +- [ ] T008 [P] 创建 `CompanyContext`(ThreadLocal,含 `set/get/clear` 三个方法,clear 必须在 finally 块调用)— `src/main/java/com/label/common/context/CompanyContext.java` +- [ ] T009 创建 `RedisKeyManager`(三个静态方法:`tokenKey`、`userPermKey`、`taskClaimKey`)和 `RedisService` — `src/main/java/com/label/common/redis/` +- [ ] T010 创建 MyBatis Plus 配置类 `MybatisPlusConfig`,注册 `TenantLineInnerInterceptor`(从 `CompanyContext` 获取 `companyId` 自动注入 WHERE 子句;`sys_company`、`sys_config` 加入忽略表列表)— `src/main/java/com/label/common/config/MybatisPlusConfig.java` +- [ ] T011 创建 `StateValidator`(`assertTransition` 泛型方法,违规时抛出 `BusinessException("INVALID_STATE_TRANSITION",...)`)— `src/main/java/com/label/common/statemachine/StateValidator.java` +- [ ] T012 [P] 创建 `SourceStatus` 枚举(PENDING/PREPROCESSING/EXTRACTING/QA_REVIEW/APPROVED,含 TRANSITIONS Map)— `src/main/java/com/label/common/statemachine/SourceStatus.java` +- [ ] T013 [P] 创建 `TaskStatus` 枚举(UNCLAIMED/IN_PROGRESS/SUBMITTED/APPROVED/REJECTED,含 TRANSITIONS Map,含 IN_PROGRESS→IN_PROGRESS 用于 ADMIN 强制转移)— `src/main/java/com/label/common/statemachine/TaskStatus.java` +- [ ] T014 [P] 创建 `DatasetStatus` 枚举(PENDING_REVIEW/APPROVED/REJECTED,含 TRANSITIONS Map)— `src/main/java/com/label/common/statemachine/DatasetStatus.java` +- [ ] T015 [P] 创建 `VideoJobStatus` 枚举(PENDING/RUNNING/SUCCESS/FAILED/RETRYING,含 TRANSITIONS Map,注释说明 FAILED→PENDING 由 ADMIN 手动触发)— `src/main/java/com/label/common/statemachine/VideoJobStatus.java` +- [ ] T016 创建 `@OperationLog` 注解(`type` 和 `targetType` 两个属性,`@Around` 级别)— `src/main/java/com/label/common/aop/OperationLog.java` +- [ ] T017 创建 `AuditAspect`(`@Around("@annotation(operationLog)")`,在 finally 块以独立操作写入 `sys_operation_log`;审计写入失败只记录 error 日志,禁止抛出异常回滚业务)— `src/main/java/com/label/common/aop/AuditAspect.java` +- [ ] T018 [P] 创建 `RustFsClient`(AWS S3 SDK v2 封装,endpoint 指向 RustFS;实现 `upload`、`download`、`delete`、`getPresignedUrl`)— `src/main/java/com/label/common/storage/RustFsClient.java` +- [ ] T019 [P] 创建 `AiServiceClient`(`RestClient` 封装,8 个端点:`extractText`、`extractImage`、`extractFrames`、`videoToText`、`genTextQa`、`genImageQa`、`startFinetune`、`getFinetuneStatus`)— `src/main/java/com/label/common/ai/AiServiceClient.java` +- [ ] T020 创建 Shiro 三件套:`TokenFilter`(解析 `Authorization: Bearer {uuid}`,查 Redis `token:{uuid}`,注入 `CompanyContext`,请求结束 finally 清理 ThreadLocal)、`UserRealm`(先查 Redis `user:perm:{userId}` TTL 5min,未命中查 PG;含 `addInheritedRoles`)、`ShiroConfig`(过滤器链:`/api/auth/login` → `anon`,`/api/**` → `tokenFilter`)— `src/main/java/com/label/common/shiro/` +- [ ] T021 创建 `AbstractIntegrationTest`(Testcontainers,启动真实 PostgreSQL + Redis 容器,执行 sql/init.sql,注入测试用的公司和用户数据)— `src/test/java/com/label/AbstractIntegrationTest.java` +- [ ] T022 集成测试:`ShiroFilterIntegrationTest`(无 Token → 401;有效 Token 但角色不足 → 403;有效 Token 且角色满足 → 200)— `src/test/java/com/label/integration/ShiroFilterIntegrationTest.java` +- [ ] T023 单元测试:`StateMachineTest`(验证所有枚举的合法转换通过;非法转换抛出 `BusinessException("INVALID_STATE_TRANSITION")`)— `src/test/java/com/label/unit/StateMachineTest.java` + +**检查点**: 基础设施就绪,所有 Phase 3+ 的用户故事可并行开始 + +--- + +## Phase 3: 用户故事 1 — 用户登录与身份认证(优先级: P1)🎯 MVP + +**目标**: 用户可以用用户名和密码登录,获得会话凭证,使用凭证访问受保护接口,退出后凭证立即失效 + +**独立测试**: 登录 → 获取 Token → 访问 `/api/auth/me` 返回用户信息 → 退出 → 再次访问返回 401 + +- [ ] T024 [P] [US1] 创建 `SysCompany` 实体(MyBatis Plus `@TableName`)和 `SysCompanyMapper` — `src/main/java/com/label/module/user/entity/SysCompany.java` + `mapper/SysCompanyMapper.java` +- [ ] T025 [P] [US1] 创建 `SysUser` 实体(`passwordHash` 字段加 `@JsonIgnore`)和 `SysUserMapper`(含 `selectByCompanyAndUsername` 方法)— `src/main/java/com/label/module/user/entity/SysUser.java` + `mapper/SysUserMapper.java` +- [ ] T026 [US1] 实现 `AuthService`:`login()`(BCrypt 校验密码 → UUID v4 Token → Redis Hash 存储 userId/role/companyId/username → 设置 TTL = `token_ttl_seconds` 配置值);`logout()`(删除 Redis Token Key)— `src/main/java/com/label/module/user/service/AuthService.java` +- [ ] T027 [US1] 实现 `AuthController`:`POST /api/auth/login`(`anon`,调用 `AuthService.login()`)、`POST /api/auth/logout`(已登录)、`GET /api/auth/me`(返回当前用户信息);所有响应用 `Result` 包装 — `src/main/java/com/label/module/user/controller/AuthController.java` +- [ ] T028 [US1] 集成测试:正确密码登录返回 Token;Token 有效时 `/api/auth/me` 返回 200;主动退出后再访问返回 401;错误密码登录返回 401 — `src/test/java/com/label/integration/AuthIntegrationTest.java` + +**检查点**: US1 独立可测试 — 登录/退出流程完整可用 + +--- + +## Phase 4: 用户故事 2 — 原始资料上传(优先级: P1) + +**目标**: 上传员可以上传文本/图片/视频,查询自己的资料列表;管理员可查看全公司资料 + +**独立测试**: 上传文本文件 → 列表查到 → 详情含预签名 URL → 管理员可删除 + +- [ ] T029 [P] [US2] 创建 `SourceData` 实体(含 `parentSourceId` 自引用字段)和 `SourceDataMapper`(含 `updateStatus` 方法)— `src/main/java/com/label/module/source/entity/SourceData.java` + `mapper/SourceDataMapper.java` +- [ ] T030 [US2] 实现 `SourceService`:`upload()`(先 insert 获取 ID → 构造路径 → 上传 RustFS → 更新 filePath);`list()`(UPLOADER 按 `uploaderId` 过滤,ADMIN 不过滤,强制分页);`findById()`(含 15 分钟预签名 URL);`delete()`(仅 PENDING 状态可删,同步删 RustFS 文件)— `src/main/java/com/label/module/source/service/SourceService.java` +- [ ] T031 [US2] 实现 `SourceController`(`POST /api/source/upload`、`GET /api/source/list`、`GET /api/source/{id}`、`DELETE /api/source/{id}`;`@RequiresRoles` 注解声明权限;所有响应 `Result` 包装)— `src/main/java/com/label/module/source/controller/SourceController.java` +- [ ] T032 [US2] 集成测试:UPLOADER 上传文本/图片 → 列表仅返回自己的资料;ADMIN 查看列表返回全部;上传视频 → source_data 状态为 PENDING(视频预处理 Phase 9 覆盖);已进入流水线的资料删除返回 409 — `src/test/java/com/label/integration/SourceIntegrationTest.java` + +**检查点**: US2 独立可测试 — 上传/查询/删除流程完整可用 + +--- + +## Phase 5: 用户故事 3+4 — 提取阶段标注与审批(优先级: P1) + +**目标**: 标注员可以领取任务(并发安全)、AI 辅助预标注、编辑并提交;审批员可以通过(自动触发 QA 任务)或驳回(标注员可重领) + +**独立测试**: 创建任务 → 标注员领取 → AI 预标注 → 提交 → 审批通过 → QA 任务自动出现在任务池 + +### 实体与数据层 + +- [ ] T033 [P] [US3] 创建 `AnnotationTask` 实体 + `AnnotationTaskMapper`(含 `claimTask(taskId, userId, companyId)` 方法,SQL:`UPDATE ... SET status='IN_PROGRESS', claimed_by=?, claimed_at=NOW() WHERE id=? AND status='UNCLAIMED' AND company_id=?`,返回影响行数)— `src/main/java/com/label/module/task/entity/AnnotationTask.java` + `mapper/AnnotationTaskMapper.java` +- [ ] T034 [P] [US3] 创建 `AnnotationTaskHistory` 实体 + `TaskHistoryMapper` — `src/main/java/com/label/module/task/entity/AnnotationTaskHistory.java` + `mapper/TaskHistoryMapper.java` +- [ ] T035 [P] [US3] 创建 `AnnotationResult` 实体 + `AnnotationResultMapper`(含 `updateResultJson` 整体覆盖方法和 `selectByTaskId` 方法)— `src/main/java/com/label/module/annotation/entity/AnnotationResult.java` + `mapper/AnnotationResultMapper.java` + +### 任务管理服务与控制器 + +- [ ] T036 [US3] 实现 `TaskClaimService.claim()`(① Redis `SET NX task:claim:{taskId}` TTL 30s,失败抛 `TASK_CLAIMED`;② DB `claimTask()` 影响行数为 0 时抛 `TASK_CLAIMED`;③ `insertHistory(UNCLAIMED→IN_PROGRESS)`)和 `unclaim()`(StateValidator + 清 Redis 锁 + 历史)和 `reclaim()`(校验 REJECTED + claimedBy = 当前用户 + REJECTED→IN_PROGRESS + 历史)— `src/main/java/com/label/module/task/service/TaskClaimService.java` +- [ ] T037 [US3] 实现 `TaskService`(`createTask`、`getPool`(按角色过滤:ANNOTATOR→UNCLAIMED/EXTRACTION;REVIEWER→SUBMITTED)、`getMine`(含 IN_PROGRESS/SUBMITTED/REJECTED)、`getPendingReview`(SUBMITTED,分页)、`getById`、`reassign`(ADMIN,仅更新 claimedBy + 历史))— `src/main/java/com/label/module/task/service/TaskService.java` +- [ ] T038 [US3] 实现 `TaskController`(10 个端点:`POST /api/tasks`、`GET /api/tasks/pool`、`POST /api/tasks/{id}/claim`、`POST /api/tasks/{id}/unclaim`、`GET /api/tasks/mine`、`POST /api/tasks/{id}/reclaim`、`GET /api/tasks/pending-review`、`GET /api/tasks/{id}`、`GET /api/tasks`、`PUT /api/tasks/{id}/reassign`)— `src/main/java/com/label/module/task/controller/TaskController.java` + +### 提取标注服务与控制器 + +- [ ] T039 [US3] 实现 `ExtractionService.aiPreAnnotate()`(调用 `AiServiceClient.extractText/extractImage`,写入 `annotation_result`)和 `updateResult()`(整体覆盖 `result_json`,校验 JSON 格式)— `src/main/java/com/label/module/annotation/service/ExtractionService.java` +- [ ] T040 [US3] 实现 `ExtractionService.submit()`(`@Transactional`:IN_PROGRESS→SUBMITTED + `submitted_at` + insertHistory)— `src/main/java/com/label/module/annotation/service/ExtractionService.java` +- [ ] T041 [US4] 创建 `ExtractionApprovedEvent`(携带 `taskId`、`sourceId`、`sourceType`、`companyId`)— `src/main/java/com/label/module/annotation/event/ExtractionApprovedEvent.java` +- [ ] T042 [US4] 实现 `ExtractionService.approve()`(`@Transactional`:① 自审校验;② `is_final=true`;③ SUBMITTED→APPROVED + `completedAt` + 历史;④ `publishEvent(ExtractionApprovedEvent)`;AI 调用禁止在此事务内执行)— `src/main/java/com/label/module/annotation/service/ExtractionService.java` +- [ ] T043 [US4] 实现 `ExtractionApprovedEventListener`(`@TransactionalEventListener(AFTER_COMMIT)` + `@Transactional(REQUIRES_NEW)`:调用 AI 生成候选问答对 → 写 `training_dataset`(PENDING_REVIEW)→ 创建 QA_GENERATION 任务(UNCLAIMED)→ `source_data` 状态→ QA_REVIEW)— `src/main/java/com/label/module/annotation/service/ExtractionApprovedEventListener.java` +- [ ] T044 [US4] 实现 `ExtractionService.reject()`(`@Transactional`:① 自审校验;② StateValidator;③ SUBMITTED→REJECTED + 历史)— `src/main/java/com/label/module/annotation/service/ExtractionService.java` +- [ ] T045 [US4] 实现 `ExtractionController`(5 个端点:`GET /api/extraction/{taskId}`、`PUT /api/extraction/{taskId}`、`POST /api/extraction/{taskId}/submit`、`POST /api/extraction/{taskId}/approve`、`POST /api/extraction/{taskId}/reject`)— `src/main/java/com/label/module/annotation/controller/ExtractionController.java` + +### 集成测试 + +- [ ] T046 [US3] 并发集成测试:10 个线程同时争抢同一 UNCLAIMED 任务,验证恰好 1 人成功、其余均收到 `TASK_CLAIMED` 错误、DB 中 `claimed_by` 唯一 — `src/test/java/com/label/integration/TaskClaimConcurrencyTest.java` +- [ ] T047 [US4] 集成测试:审批通过 → QA 任务自动出现在任务池;自审返回 `SELF_REVIEW_FORBIDDEN` 403;驳回后标注员可重领并再次提交 — `src/test/java/com/label/integration/ExtractionApprovalIntegrationTest.java` + +**检查点**: US3+US4 独立可测试 — 完整提取流水线(领取→标注→提交→审批→QA任务自动创建)可用 + +--- + +## Phase 6: 用户故事 5 — 问答生成阶段标注与审批(优先级: P2) + +**目标**: 标注员领取 QA 任务、修改候选问答对并提交;审批员通过后训练样本入库,整条流水线完成 + +**独立测试**: 领取 QA 任务 → 修改问答对 → 提交 → 审批通过 → training_dataset 状态 APPROVED,source_data 状态 APPROVED + +- [ ] T048 [P] [US5] 创建 `TrainingDataset` 实体 + `TrainingDatasetMapper`(含 `approveByTaskId`、`deleteByTaskId` 方法)— `src/main/java/com/label/module/annotation/entity/TrainingDataset.java` + `mapper/TrainingDatasetMapper.java` +- [ ] T049 [US5] 实现 `QaService.updateResult()`(整体覆盖问答对 JSONB)和 `submit()`(`@Transactional`:IN_PROGRESS→SUBMITTED + 历史)— `src/main/java/com/label/module/annotation/service/QaService.java` +- [ ] T050 [US5] 实现 `QaService.approve()`(`@Transactional`:① `validateAndGetTask` 先于一切 DB 写入;② 自审校验;③ `training_dataset` → APPROVED;④ `annotation_task` → APPROVED + 历史;⑤ `source_data` → APPROVED)— `src/main/java/com/label/module/annotation/service/QaService.java` +- [ ] T051 [US5] 实现 `QaService.reject()`(`@Transactional`:① 自审校验;② `deleteByTaskId` 清除候选问答对;③ SUBMITTED→REJECTED + 历史;④ `source_data` 保持 QA_REVIEW 不变)— `src/main/java/com/label/module/annotation/service/QaService.java` +- [ ] T052 [US5] 实现 `QaController`(5 个端点:`GET /api/qa/{taskId}`、`PUT /api/qa/{taskId}`、`POST /api/qa/{taskId}/submit`、`POST /api/qa/{taskId}/approve`、`POST /api/qa/{taskId}/reject`)— `src/main/java/com/label/module/annotation/controller/QaController.java` +- [ ] T053 [US5] 集成测试:QA 审批通过 → `training_dataset.status = APPROVED`,`source_data.status = APPROVED`;QA 驳回 → 候选记录被删除,标注员可重领 — `src/test/java/com/label/integration/QaApprovalIntegrationTest.java` + +**检查点**: US5 独立可测试 — 完整 QA 流水线可用,training_dataset 产出验证通过 + +--- + +## Phase 7: 用户故事 6 — 训练数据导出与微调提交(优先级: P2) + +**目标**: 管理员将已审批样本批量导出为 JSONL,并可提交 GLM 微调任务 + +**独立测试**: 选取已审批样本 → 创建批次 → RustFS 中存在 JSONL 文件 → 提交微调 → 可查询状态 + +- [ ] T054 [P] [US6] 创建 `ExportBatch` 实体 + `ExportBatchMapper` — `src/main/java/com/label/module/export/entity/ExportBatch.java` + `mapper/ExportBatchMapper.java` +- [ ] T055 [US6] 实现 `ExportService.createBatch()`(`@Transactional`:① 校验全部样本为 APPROVED;② 生成 JSONL(每行一个 `glm_format_json`);③ 上传 RustFS `finetune-export/export/{batchUuid}.jsonl`;④ 批量更新 `export_batch_id`/`exported_at`;⑤ 插入 `export_batch` 记录)— `src/main/java/com/label/module/export/service/ExportService.java` +- [ ] T056 [US6] 实现 `FinetuneService`:`trigger()`(调用 `AiServiceClient.startFinetune()`,更新 `glm_job_id` 和 `finetune_status = RUNNING`)和 `getStatus()`(调用 `AiServiceClient.getFinetuneStatus()`)— `src/main/java/com/label/module/export/service/FinetuneService.java` +- [ ] T057 [US6] 实现 `ExportController`(`GET /api/training/samples`、`POST /api/export/batch`、`POST /api/export/{batchId}/finetune`、`GET /api/export/{batchId}/status`、`GET /api/export/list`;全部 `@RequiresRoles("ADMIN")`)— `src/main/java/com/label/module/export/controller/ExportController.java` +- [ ] T058 [US6] 集成测试:成功创建批次后 JSONL 文件存在于 RustFS;包含非 APPROVED 样本时返回 `INVALID_SAMPLES` 400 — `src/test/java/com/label/integration/ExportIntegrationTest.java` + +**检查点**: US6 独立可测试 — 导出批次创建和微调提交流程可用 + +--- + +## Phase 8: 用户故事 7 — 用户与权限管理(优先级: P2) + +**目标**: 管理员可以创建用户、变更角色(立即生效)、禁用账号(立即失效) + +**独立测试**: 创建标注员用户 → 验证其能领取任务 → 升为审批员 → 验证立即可以审批 → 禁用账号 → 已有 Token 立即失效 + +- [ ] T059 [US7] 实现 `UserService`:`createUser()`(BCrypt 哈希密码,强度因子 ≥ 10);`updateUser()`;`updateRole()`(DB 写入后立即 `redisTemplate.delete(userPermKey(userId))`);`updateStatus()`(禁用时删 Redis Token + 权限缓存)— `src/main/java/com/label/module/user/service/UserService.java` +- [ ] T060 [US7] 实现 `UserController`(`GET /api/users`、`POST /api/users`、`PUT /api/users/{id}`、`PUT /api/users/{id}/status`、`PUT /api/users/{id}/role`;全部 `@RequiresRoles("ADMIN")`)— `src/main/java/com/label/module/user/controller/UserController.java` +- [ ] T061 [US7] 集成测试:变更角色后权限下一次请求立即生效(无需重新登录);禁用账号后现有 Token 下一次请求立即返回 401 — `src/test/java/com/label/integration/UserManagementIntegrationTest.java` + +**检查点**: US7 独立可测试 — 用户管理和即时权限变更可用 + +--- + +## Phase 9: 用户故事 8 — 视频处理与系统配置(优先级: P3) + +**目标**: 上传视频后触发异步预处理(帧提取/转文字);AI 回调幂等处理;管理员可配置 Prompt 模板等系统参数 + +**独立测试(视频)**: 上传视频 → 创建处理任务 → 模拟成功回调 → annotation_task 出现在任务池;重复成功回调 → 任务数量不增加 +**独立测试(配置)**: 为公司设置专属 Prompt → 验证该公司使用新值;其他公司使用全局默认 + +- [ ] T062 [P] [US8] 创建 `VideoProcessJob` 实体 + `VideoProcessJobMapper` — `src/main/java/com/label/module/video/entity/VideoProcessJob.java` + `mapper/VideoProcessJobMapper.java` +- [ ] T063 [P] [US8] 创建 `SysConfig` 实体 + `SysConfigMapper`(含 `selectByCompanyAndKey(companyId, configKey)` 方法,支持 `companyId IS NULL` 查询)— `src/main/java/com/label/module/config/entity/SysConfig.java` + `mapper/SysConfigMapper.java` +- [ ] T064 [US8] 实现 `VideoProcessService`:`createJob()`(`@Transactional`:`source_data.status → PREPROCESSING` + 插入 job + 触发 AI 异步调用);`handleCallback()`(`@Transactional`:幂等检查 status==SUCCESS 则 return;成功 → SUCCESS + `source_data.status → PENDING`;失败 → 按 retry_count 决定 RETRYING 或 FAILED);`reset()`(FAILED → PENDING)— `src/main/java/com/label/module/video/service/VideoProcessService.java` +- [ ] T065 [US8] 实现 `VideoController`(`POST /api/video/process`、`GET /api/video/jobs/{jobId}`、`POST /api/video/jobs/{jobId}/reset`、`POST /api/video/callback`(内部接口,IP 白名单或服务密钥保护))— `src/main/java/com/label/module/video/controller/VideoController.java` +- [ ] T066 [US8] 实现 `SysConfigService.get(configKey)`(先按 `(companyId, key)` 查;未命中按 `(NULL, key)` 查全局默认)和 `update(key, value)`(UPSERT:公司级配置不存在则创建,存在则覆盖)— `src/main/java/com/label/module/config/service/SysConfigService.java` +- [ ] T067 [US8] 实现 `SysConfigController`(`GET /api/config`(合并公司级 + 全局,标注 scope)、`PUT /api/config/{key}`;均 `@RequiresRoles("ADMIN")`)— `src/main/java/com/label/module/config/controller/SysConfigController.java` +- [ ] T068 [US8] 集成测试:同一 jobId 两次成功回调,`annotation_task` 记录数为 1(幂等);达最大重试次数后 status = FAILED — `src/test/java/com/label/integration/VideoCallbackIdempotencyTest.java` +- [ ] T069 [US8] 集成测试:公司级配置覆盖同 Key 的全局默认;其他公司读取全局默认 — `src/test/java/com/label/integration/SysConfigIntegrationTest.java` + +**检查点**: US8 独立可测试 — 视频处理幂等和配置管理可用 + +--- + +## Phase 10: 收尾与横切关注点 + +**目标**: 多租户隔离验证、整体合规检查、快速启动验证 + +- [ ] T070 集成测试:`MultiTenantIsolationTest`(公司 A 身份查询公司 B 的资料/任务 → 返回空列表或 404,不泄露数据)— `src/test/java/com/label/integration/MultiTenantIsolationTest.java` +- [ ] T071 [P] 代码审查:检查所有 Controller 方法返回值均为 `Result` 或 `Result>`,无裸 POJO 或裸 List 返回 +- [ ] T072 [P] 代码审查:检查所有列表查询方法均含分页参数(`page`/`pageSize`),无 `selectAll()` 或不分页的查询 +- [ ] T073 [P] 代码审查:检查 `sys_operation_log` 相关代码,确认应用层零处 UPDATE 或 DELETE +- [ ] T074 [P] 代码审查:检查所有 `@Transactional` 方法内无 `AiServiceClient` 的同步 HTTP 调用(审批触发 AI 必须通过 `@TransactionalEventListener`) +- [ ] T075 运行 `quickstart.md` 端到端验证:`docker compose up -d` → 登录 → 上传文件 → 创建任务 → 领取 → 提交 → 审批通过 → 确认 QA 任务出现 + +--- + +## 依赖关系与执行顺序 + +### 阶段依赖 + +``` +Phase 1(初始化) + ↓ +Phase 2(基础设施)[全部完成后解锁所有用户故事] + ↓ +Phase 3(US1 认证) ← 可与 Phase 4/5/6/7/8/9 并行 +Phase 4(US2 上传) ← 依赖 Phase 2,独立于其他用户故事 +Phase 5(US3+4 提取) ← 依赖 Phase 2(上传已有资料的集成测试依赖 US2) +Phase 6(US5 QA) ← 依赖 Phase 5 完成(QA 任务由提取审批自动创建) +Phase 7(US6 导出) ← 依赖 Phase 6 完成(需要 APPROVED 的 training_dataset) +Phase 8(US7 用户管理) ← 依赖 Phase 3(UserService 在 AuthService 基础上扩展) +Phase 9(US8 视频+配置) ← 依赖 Phase 2,其余独立 + ↓ +Phase 10(收尾) +``` + +### 用户故事间依赖 + +- **US1(认证)**: 仅依赖 Phase 2,完全独立 +- **US2(上传)**: 仅依赖 Phase 2,完全独立 +- **US3+4(提取)**: 依赖 Phase 2;集成测试中使用已上传资料需 US2 +- **US5(QA)**: 依赖 US3+4(QA 任务来源于提取阶段审批通过的级联触发) +- **US6(导出)**: 依赖 US5(需要 APPROVED 状态的 training_dataset) +- **US7(用户管理)**: 依赖 US1(UserService 扩展 AuthService 的用户实体) +- **US8(视频+配置)**: 仅依赖 Phase 2 + +### 阶段内并行机会 + +- Phase 2:T007-T010、T012-T015、T018-T019 均可并行(独立文件) +- Phase 3:T024、T025 可并行(独立文件) +- Phase 5:T033、T034、T035 可并行(独立文件) +- Phase 9:T062、T063 可并行(独立文件) +- Phase 10:T071-T074 全部可并行(仅代码审查,无文件修改) + +--- + +## 并行执行示例 + +### Phase 2 基础设施并行 + +``` +同时启动: + 任务: "创建 BusinessException、GlobalExceptionHandler — common/exception/" [T007] + 任务: "创建 CompanyContext(ThreadLocal)— common/context/" [T008] + 任务: "创建 RustFsClient — common/storage/" [T018] + 任务: "创建 AiServiceClient — common/ai/" [T019] + 任务: "创建 SourceStatus 枚举" [T012] + 任务: "创建 TaskStatus 枚举" [T013] +``` + +### Phase 5 提取阶段并行 + +``` +同时启动(实体/Mapper): + 任务: "创建 AnnotationTask 实体 + Mapper" [T033] + 任务: "创建 AnnotationTaskHistory 实体 + Mapper" [T034] + 任务: "创建 AnnotationResult 实体 + Mapper" [T035] +``` + +--- + +## 实施策略 + +### MVP 优先(仅用户故事 1) + +1. 完成 Phase 1(初始化) +2. 完成 Phase 2(基础设施)— **关键,阻塞所有故事** +3. 完成 Phase 3(US1 认证) +4. **停止并验证**: 登录/退出/权限校验全流程可用 +5. 可以独立部署演示认证功能 + +### 增量交付 + +1. Phase 1 + Phase 2 → 基础就绪 +2. Phase 3(US1)→ 验证 → 演示(MVP) +3. Phase 4(US2)→ 验证 → 演示(上传功能) +4. Phase 5(US3+4)→ 验证 → 演示(标注流程) +5. Phase 6(US5)→ 验证 → 演示(完整双阶段流水线) +6. Phase 7(US6)→ 验证 → 演示(训练数据产出) +7. Phase 8+9 → 验证 → 演示(完整平台) +8. Phase 10 → 收尾 + +### 多人协作策略 + +Phase 2 完成后: +- 开发者 A:Phase 3(US1 认证)+ Phase 8(US7 用户管理) +- 开发者 B:Phase 4(US2 上传)+ Phase 5(US3+4 提取) +- 开发者 C:Phase 9(US8 视频+配置) + +Phase 5 完成后: +- 开发者 A/B 合力:Phase 6(US5 QA)→ Phase 7(US6 导出) + +--- + +## 说明 + +- `[P]` 任务 = 不同文件,无依赖,可并行 +- `[USn]` 标签将任务映射到具体用户故事,便于追踪 +- 每个用户故事应独立可完成和可测试 +- 每完成一个阶段后提交 git commit +- 在每个检查点停下来独立验证该用户故事 +- 避免:模糊任务、同文件并发冲突、破坏独立性的跨故事依赖 diff --git a/sql/init.sql b/sql/init.sql new file mode 100644 index 0000000..1824039 --- /dev/null +++ b/sql/init.sql @@ -0,0 +1,332 @@ +-- label_backend init.sql +-- PostgreSQL 14+ +-- 按依赖顺序建全部 11 张表: +-- sys_company → sys_user → source_data → annotation_task → annotation_result +-- → training_dataset → export_batch → sys_config → sys_operation_log +-- → annotation_task_history → video_process_job +-- 含所有索引及初始配置数据 + +-- ============================================================ +-- 扩展 +-- ============================================================ +CREATE EXTENSION IF NOT EXISTS pgcrypto; + +-- ============================================================ +-- 1. sys_company(租户) +-- ============================================================ +CREATE TABLE IF NOT EXISTS sys_company ( + id BIGSERIAL PRIMARY KEY, + company_name VARCHAR(100) NOT NULL, + company_code VARCHAR(50) NOT NULL, + status VARCHAR(10) NOT NULL DEFAULT 'ACTIVE', -- ACTIVE / DISABLED + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW(), + CONSTRAINT uk_sys_company_name UNIQUE (company_name), + CONSTRAINT uk_sys_company_code UNIQUE (company_code) +); + +-- ============================================================ +-- 2. sys_user(用户) +-- ============================================================ +CREATE TABLE IF NOT EXISTS sys_user ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + username VARCHAR(50) NOT NULL, + password_hash VARCHAR(255) NOT NULL, -- BCrypt, strength >= 10 + real_name VARCHAR(50), + role VARCHAR(20) NOT NULL, -- UPLOADER / ANNOTATOR / REVIEWER / ADMIN + status VARCHAR(10) NOT NULL DEFAULT 'ACTIVE', -- ACTIVE / DISABLED + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW(), + CONSTRAINT uk_sys_user_company_username UNIQUE (company_id, username) +); + +CREATE INDEX IF NOT EXISTS idx_sys_user_company_id + ON sys_user (company_id); + +-- ============================================================ +-- 3. source_data(原始资料) +-- ============================================================ +CREATE TABLE IF NOT EXISTS source_data ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + uploader_id BIGINT REFERENCES sys_user(id), + data_type VARCHAR(20) NOT NULL, -- TEXT / IMAGE / VIDEO + file_path VARCHAR(500) NOT NULL, -- RustFS object path + file_name VARCHAR(255) NOT NULL, + file_size BIGINT, + bucket_name VARCHAR(100) NOT NULL, + parent_source_id BIGINT REFERENCES source_data(id), -- 视频帧 / 文本片段 + status VARCHAR(20) NOT NULL DEFAULT 'PENDING', + -- PENDING / PREPROCESSING / EXTRACTING / QA_REVIEW / APPROVED + reject_reason TEXT, -- 保留字段(当前无 REJECTED 状态) + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_source_data_company_id + ON source_data (company_id); +CREATE INDEX IF NOT EXISTS idx_source_data_company_status + ON source_data (company_id, status); +CREATE INDEX IF NOT EXISTS idx_source_data_parent_source_id + ON source_data (parent_source_id); + +-- ============================================================ +-- 4. annotation_task(标注任务) +-- ============================================================ +CREATE TABLE IF NOT EXISTS annotation_task ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + source_id BIGINT NOT NULL REFERENCES source_data(id), + task_type VARCHAR(30) NOT NULL, -- EXTRACTION / QA_GENERATION + status VARCHAR(20) NOT NULL DEFAULT 'UNCLAIMED', + -- UNCLAIMED / IN_PROGRESS / SUBMITTED / APPROVED / REJECTED + claimed_by BIGINT REFERENCES sys_user(id), + claimed_at TIMESTAMP, + submitted_at TIMESTAMP, + completed_at TIMESTAMP, + is_final BOOLEAN NOT NULL DEFAULT FALSE, -- true 即 APPROVED 且无需再审 + ai_model VARCHAR(50), + reject_reason TEXT, + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_annotation_task_company_status + ON annotation_task (company_id, status); +CREATE INDEX IF NOT EXISTS idx_annotation_task_source_id + ON annotation_task (source_id); +CREATE INDEX IF NOT EXISTS idx_annotation_task_claimed_by + ON annotation_task (claimed_by); + +-- ============================================================ +-- 5. annotation_result(标注结果,JSONB) +-- ============================================================ +CREATE TABLE IF NOT EXISTS annotation_result ( + id BIGSERIAL NOT NULL, + task_id BIGINT NOT NULL REFERENCES annotation_task(id), + company_id BIGINT NOT NULL REFERENCES sys_company(id), + result_json JSONB NOT NULL DEFAULT '[]'::jsonb, -- 整体替换语义 + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW(), + CONSTRAINT pk_annotation_result PRIMARY KEY (id), + CONSTRAINT uk_annotation_result_task_id UNIQUE (task_id) +); + +CREATE INDEX IF NOT EXISTS idx_annotation_result_task_id + ON annotation_result (task_id); +CREATE INDEX IF NOT EXISTS idx_annotation_result_company_id + ON annotation_result (company_id); + +-- ============================================================ +-- 6. training_dataset(训练数据集) +-- export_batch_id FK 在 export_batch 建完后补加 +-- ============================================================ +CREATE TABLE IF NOT EXISTS training_dataset ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + task_id BIGINT NOT NULL REFERENCES annotation_task(id), + source_id BIGINT NOT NULL REFERENCES source_data(id), + sample_type VARCHAR(20) NOT NULL, -- TEXT / IMAGE / VIDEO_FRAME + glm_format_json JSONB NOT NULL, -- GLM fine-tune 格式 + status VARCHAR(20) NOT NULL DEFAULT 'PENDING_REVIEW', + -- PENDING_REVIEW / APPROVED / REJECTED + export_batch_id BIGINT, -- 导出后填写,FK 在下方补加 + exported_at TIMESTAMP, + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_training_dataset_company_status + ON training_dataset (company_id, status); +CREATE INDEX IF NOT EXISTS idx_training_dataset_task_id + ON training_dataset (task_id); + +-- ============================================================ +-- 7. export_batch(导出批次) +-- ============================================================ +CREATE TABLE IF NOT EXISTS export_batch ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + batch_uuid UUID NOT NULL DEFAULT gen_random_uuid(), + sample_count INT NOT NULL DEFAULT 0, + dataset_file_path VARCHAR(500), -- 导出 JSONL 的 RustFS 路径 + glm_job_id VARCHAR(100), -- GLM fine-tune 任务 ID + finetune_status VARCHAR(20) NOT NULL DEFAULT 'NOT_STARTED', + -- NOT_STARTED / RUNNING / COMPLETED / FAILED + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_export_batch_company_id + ON export_batch (company_id); + +-- 补加 training_dataset.export_batch_id FK +ALTER TABLE training_dataset + ADD CONSTRAINT fk_training_dataset_export_batch + FOREIGN KEY (export_batch_id) REFERENCES export_batch(id) + NOT VALID; -- 允许已有 NULL 行,不强制回溯校验 + +-- ============================================================ +-- 8. sys_config(系统配置) +-- ============================================================ +CREATE TABLE IF NOT EXISTS sys_config ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT REFERENCES sys_company(id), -- NULL = 全局默认 + config_key VARCHAR(100) NOT NULL, + config_value TEXT NOT NULL, + description VARCHAR(255), + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +-- 公司级配置唯一索引 +CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_company_key + ON sys_config (company_id, config_key) + WHERE company_id IS NOT NULL; + +-- 全局配置唯一索引 +CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_global_key + ON sys_config (config_key) + WHERE company_id IS NULL; + +CREATE INDEX IF NOT EXISTS idx_sys_config_company_key + ON sys_config (company_id, config_key); + +-- ============================================================ +-- 9. sys_operation_log(操作日志,仅追加) +-- ============================================================ +CREATE TABLE IF NOT EXISTS sys_operation_log ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + operator_id BIGINT REFERENCES sys_user(id), + operation_type VARCHAR(50) NOT NULL, -- 例如 EXTRACTION_APPROVE / USER_LOGIN + target_id BIGINT, + target_type VARCHAR(50), + detail JSONB, + result VARCHAR(10), -- SUCCESS / FAILURE + error_message TEXT, + operated_at TIMESTAMP NOT NULL DEFAULT NOW() + -- 无 updated_at(仅追加表,永不更新) +); + +CREATE INDEX IF NOT EXISTS idx_sys_operation_log_company_operated_at + ON sys_operation_log (company_id, operated_at); +CREATE INDEX IF NOT EXISTS idx_sys_operation_log_operator_id + ON sys_operation_log (operator_id); + +-- ============================================================ +-- 10. annotation_task_history(任务状态历史,仅追加) +-- ============================================================ +CREATE TABLE IF NOT EXISTS annotation_task_history ( + id BIGSERIAL PRIMARY KEY, + task_id BIGINT NOT NULL REFERENCES annotation_task(id), + company_id BIGINT NOT NULL REFERENCES sys_company(id), + from_status VARCHAR(20), + to_status VARCHAR(20) NOT NULL, + operator_id BIGINT REFERENCES sys_user(id), + operator_role VARCHAR(20), + comment TEXT, + created_at TIMESTAMP NOT NULL DEFAULT NOW() + -- 无 updated_at(仅追加表,永不更新) +); + +CREATE INDEX IF NOT EXISTS idx_annotation_task_history_task_id + ON annotation_task_history (task_id); +CREATE INDEX IF NOT EXISTS idx_annotation_task_history_company_id + ON annotation_task_history (company_id); + +-- ============================================================ +-- 11. video_process_job(视频处理作业) +-- ============================================================ +CREATE TABLE IF NOT EXISTS video_process_job ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + source_id BIGINT NOT NULL REFERENCES source_data(id), + job_type VARCHAR(30) NOT NULL, -- FRAME_EXTRACT / VIDEO_TO_TEXT + status VARCHAR(20) NOT NULL DEFAULT 'PENDING', + -- PENDING / RUNNING / SUCCESS / FAILED / RETRYING + params JSONB, -- 例如 {"frameInterval": 30, "mode": "FRAME"} + output_path VARCHAR(500), -- 完成后的 RustFS 输出路径 + retry_count INT NOT NULL DEFAULT 0, + max_retries INT NOT NULL DEFAULT 3, + error_message TEXT, + started_at TIMESTAMP, + completed_at TIMESTAMP, + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_video_process_job_company_id + ON video_process_job (company_id); +CREATE INDEX IF NOT EXISTS idx_video_process_job_source_id + ON video_process_job (source_id); +CREATE INDEX IF NOT EXISTS idx_video_process_job_status + ON video_process_job (status); + +-- ============================================================ +-- 初始数据 +-- ============================================================ + +-- 1. 演示公司 +INSERT INTO sys_company (company_name, company_code, status) +VALUES ('演示公司', 'DEMO', 'ACTIVE') +ON CONFLICT DO NOTHING; + +-- 2. 初始用户(BCrypt strength=10) +-- admin / admin123 +-- reviewer01/ review123 +-- annotator01/annot123 +-- uploader01 / upload123 +INSERT INTO sys_user (company_id, username, password_hash, real_name, role, status) +SELECT + c.id, + u.username, + u.password_hash, + u.real_name, + u.role, + 'ACTIVE' +FROM sys_company c +CROSS JOIN (VALUES + ('admin', + '$2a$10$B8iR5z43URiNPm.eut3JvufIPBuvGx5ZZmqyUqE1A1WdbZppX5bmi', + '管理员', + 'ADMIN'), + ('reviewer01', + '$2a$10$euOJZRfUtYNW7WHpfW1Ciee5b3rjkYFe3yQHT/uCQWrYVc0XQcukm', + '审核员01', + 'REVIEWER'), + ('annotator01', + '$2a$10$8UKwHPNASauKMTrqosR0Reg1X1gkFzFlGa/HBwNLXUELaj4e/zcqu', + '标注员01', + 'ANNOTATOR'), + ('uploader01', + '$2a$10$o2d7jsT31vyxIJHUo50mUefoZLLvGqft97zaL9OQCjRxn9ie1H/1O', + '上传员01', + 'UPLOADER') +) AS u(username, password_hash, real_name, role) +WHERE c.company_code = 'DEMO' +ON CONFLICT (company_id, username) DO NOTHING; + +-- 3. 全局系统配置 +INSERT INTO sys_config (company_id, config_key, config_value, description) +VALUES + (NULL, 'token_ttl_seconds', '7200', + '会话凭证有效期(秒)'), + (NULL, 'model_default', 'glm-4', + 'AI 辅助默认模型'), + (NULL, 'video_frame_interval', '30', + '视频帧提取间隔(帧数)'), + (NULL, 'prompt_extract_text', + '请提取以下文本中的主语-谓语-宾语三元组,以JSON数组格式返回,每个元素包含subject、predicate、object、sourceText、startOffset、endOffset字段。', + '文本三元组提取 Prompt 模板'), + (NULL, 'prompt_extract_image', + '请提取图片中的实体关系四元组,以JSON数组格式返回,每个元素包含subject、relation、object、modifier、confidence字段。', + '图片四元组提取 Prompt 模板'), + (NULL, 'prompt_qa_gen_text', + '根据以下文本三元组生成高质量问答对,以JSON数组格式返回,每个元素包含question、answer、difficulty字段。', + '文本问答生成 Prompt 模板'), + (NULL, 'prompt_qa_gen_image', + '根据以下图片四元组生成高质量问答对,以JSON数组格式返回,每个元素包含question、answer、imageRef、difficulty字段。', + '图片问答生成 Prompt 模板') +ON CONFLICT DO NOTHING; diff --git a/src/main/java/com/label/LabelBackendApplication.java b/src/main/java/com/label/LabelBackendApplication.java new file mode 100644 index 0000000..a3650bd --- /dev/null +++ b/src/main/java/com/label/LabelBackendApplication.java @@ -0,0 +1,12 @@ +package com.label; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; + +@SpringBootApplication +public class LabelBackendApplication { + + public static void main(String[] args) { + SpringApplication.run(LabelBackendApplication.class, args); + } +} diff --git a/src/main/java/com/label/common/ai/AiServiceClient.java b/src/main/java/com/label/common/ai/AiServiceClient.java new file mode 100644 index 0000000..93da8f9 --- /dev/null +++ b/src/main/java/com/label/common/ai/AiServiceClient.java @@ -0,0 +1,149 @@ +package com.label.common.ai; + +import lombok.Builder; +import lombok.Data; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; +import org.springframework.web.client.RestClient; + +import jakarta.annotation.PostConstruct; +import java.util.List; +import java.util.Map; + +@Component +public class AiServiceClient { + + @Value("${ai-service.base-url}") + private String baseUrl; + + @Value("${ai-service.timeout:30000}") + private int timeoutMs; + + private RestClient restClient; + + @PostConstruct + public void init() { + restClient = RestClient.builder() + .baseUrl(baseUrl) + .build(); + } + + // DTO classes + + @Data + @Builder + public static class ExtractionRequest { + private Long sourceId; + private String filePath; + private String bucket; + private String model; + private String prompt; + } + + @Data + public static class ExtractionResponse { + private List> items; // triple/quadruple items + private String rawOutput; + } + + @Data + @Builder + public static class VideoProcessRequest { + private Long sourceId; + private String filePath; + private String bucket; + private Map params; // frameInterval, mode etc. + } + + @Data + public static class QaGenResponse { + private List> qaPairs; + } + + @Data + @Builder + public static class FinetuneRequest { + private String datasetPath; // RustFS path to JSONL file + private String model; + private Long batchId; + } + + @Data + public static class FinetuneResponse { + private String jobId; + private String status; + } + + @Data + public static class FinetuneStatusResponse { + private String jobId; + private String status; // PENDING/RUNNING/COMPLETED/FAILED + private Integer progress; // 0-100 + private String errorMessage; + } + + // The 8 endpoints: + + public ExtractionResponse extractText(ExtractionRequest request) { + return restClient.post() + .uri("/extract/text") + .body(request) + .retrieve() + .body(ExtractionResponse.class); + } + + public ExtractionResponse extractImage(ExtractionRequest request) { + return restClient.post() + .uri("/extract/image") + .body(request) + .retrieve() + .body(ExtractionResponse.class); + } + + public void extractFrames(VideoProcessRequest request) { + restClient.post() + .uri("/video/extract-frames") + .body(request) + .retrieve() + .toBodilessEntity(); + } + + public void videoToText(VideoProcessRequest request) { + restClient.post() + .uri("/video/to-text") + .body(request) + .retrieve() + .toBodilessEntity(); + } + + public QaGenResponse genTextQa(ExtractionRequest request) { + return restClient.post() + .uri("/qa/gen-text") + .body(request) + .retrieve() + .body(QaGenResponse.class); + } + + public QaGenResponse genImageQa(ExtractionRequest request) { + return restClient.post() + .uri("/qa/gen-image") + .body(request) + .retrieve() + .body(QaGenResponse.class); + } + + public FinetuneResponse startFinetune(FinetuneRequest request) { + return restClient.post() + .uri("/finetune/start") + .body(request) + .retrieve() + .body(FinetuneResponse.class); + } + + public FinetuneStatusResponse getFinetuneStatus(String jobId) { + return restClient.get() + .uri("/finetune/status/{jobId}", jobId) + .retrieve() + .body(FinetuneStatusResponse.class); + } +} diff --git a/src/main/java/com/label/common/aop/AuditAspect.java b/src/main/java/com/label/common/aop/AuditAspect.java new file mode 100644 index 0000000..a2f0b61 --- /dev/null +++ b/src/main/java/com/label/common/aop/AuditAspect.java @@ -0,0 +1,75 @@ +package com.label.common.aop; + +import com.label.common.context.CompanyContext; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.aspectj.lang.ProceedingJoinPoint; +import org.aspectj.lang.annotation.Around; +import org.aspectj.lang.annotation.Aspect; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.stereotype.Component; + +/** + * AOP aspect for audit logging. + * + * KEY DESIGN DECISIONS: + * 1. Uses JdbcTemplate directly (not MyBatis Mapper) to bypass TenantLineInnerInterceptor + * — operation logs need to capture company_id explicitly, not via thread-local injection + * 2. Written in finally block — audit log is written regardless of business method success/failure + * 3. Audit failures are logged as ERROR but NEVER rethrown — business transactions must not be + * affected by audit failures + * 4. Captures result of business method to log SUCCESS or FAILURE + */ +@Slf4j +@Aspect +@Component +@RequiredArgsConstructor +public class AuditAspect { + + private final JdbcTemplate jdbcTemplate; + + @Around("@annotation(operationLog)") + public Object audit(ProceedingJoinPoint joinPoint, OperationLog operationLog) throws Throwable { + Long companyId = CompanyContext.get(); + // operator_id can be obtained from SecurityContext or ThreadLocal in the future + // For now, use null as a safe default when not available + Long operatorId = null; + + String result = "SUCCESS"; + String errorMessage = null; + Object returnValue = null; + + try { + returnValue = joinPoint.proceed(); + } catch (Throwable e) { + result = "FAILURE"; + errorMessage = e.getMessage(); + throw e; // Always rethrow business exceptions + } finally { + // Write audit log in finally block — runs regardless of success or failure + // CRITICAL: Never throw from here — would swallow the original exception + try { + writeAuditLog(companyId, operatorId, operationLog.type(), + operationLog.targetType(), result, errorMessage); + } catch (Exception auditEx) { + // Audit failure must NOT affect business transaction + log.error("审计日志写入失败: type={}, error={}", + operationLog.type(), auditEx.getMessage(), auditEx); + } + } + + return returnValue; + } + + private void writeAuditLog(Long companyId, Long operatorId, String operationType, + String targetType, String result, String errorMessage) { + String sql = """ + INSERT INTO sys_operation_log + (company_id, operator_id, operation_type, target_type, result, error_message, operated_at) + VALUES (?, ?, ?, ?, ?, ?, NOW()) + """; + jdbcTemplate.update(sql, companyId, operatorId, operationType, + targetType.isEmpty() ? null : targetType, + result, errorMessage); + } +} diff --git a/src/main/java/com/label/common/aop/OperationLog.java b/src/main/java/com/label/common/aop/OperationLog.java new file mode 100644 index 0000000..8c96a4a --- /dev/null +++ b/src/main/java/com/label/common/aop/OperationLog.java @@ -0,0 +1,18 @@ +package com.label.common.aop; + +import java.lang.annotation.*; + +/** + * Marks a method for audit logging. + * The AuditAspect intercepts this annotation and writes to sys_operation_log. + */ +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@Documented +public @interface OperationLog { + /** Operation type, e.g., "EXTRACTION_APPROVE", "USER_LOGIN", "TASK_CLAIM" */ + String type(); + + /** Target entity type, e.g., "annotation_task", "sys_user" */ + String targetType() default ""; +} diff --git a/src/main/java/com/label/common/config/MybatisPlusConfig.java b/src/main/java/com/label/common/config/MybatisPlusConfig.java new file mode 100644 index 0000000..2b52882 --- /dev/null +++ b/src/main/java/com/label/common/config/MybatisPlusConfig.java @@ -0,0 +1,57 @@ +package com.label.common.config; + +import com.baomidou.mybatisplus.extension.plugins.MybatisPlusInterceptor; +import com.baomidou.mybatisplus.extension.plugins.handler.TenantLineHandler; +import com.baomidou.mybatisplus.extension.plugins.inner.PaginationInnerInterceptor; +import com.baomidou.mybatisplus.extension.plugins.inner.TenantLineInnerInterceptor; +import com.label.common.context.CompanyContext; +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.LongValue; +import net.sf.jsqlparser.expression.NullValue; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import java.util.Arrays; +import java.util.List; + +@Configuration +public class MybatisPlusConfig { + + // Tables that do NOT need tenant isolation (either global or tenant root tables) + private static final List IGNORED_TABLES = Arrays.asList( + "sys_company", // the tenant root table itself + "sys_config" // has company_id=NULL for global defaults; service handles this manually + ); + + @Bean + public MybatisPlusInterceptor mybatisPlusInterceptor() { + MybatisPlusInterceptor interceptor = new MybatisPlusInterceptor(); + + // 1. Tenant isolation - auto-injects WHERE company_id = ? + interceptor.addInnerInterceptor(new TenantLineInnerInterceptor(new TenantLineHandler() { + @Override + public Expression getTenantId() { + Long companyId = CompanyContext.get(); + if (companyId == null) { + return new NullValue(); + } + return new LongValue(companyId); + } + + @Override + public String getTenantIdColumn() { + return "company_id"; + } + + @Override + public boolean ignoreTable(String tableName) { + return IGNORED_TABLES.contains(tableName); + } + })); + + // 2. Pagination interceptor (required for MyBatis Plus Page queries) + interceptor.addInnerInterceptor(new PaginationInnerInterceptor()); + + return interceptor; + } +} diff --git a/src/main/java/com/label/common/config/RedisConfig.java b/src/main/java/com/label/common/config/RedisConfig.java new file mode 100644 index 0000000..d2394b9 --- /dev/null +++ b/src/main/java/com/label/common/config/RedisConfig.java @@ -0,0 +1,24 @@ +package com.label.common.config; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.data.redis.connection.RedisConnectionFactory; +import org.springframework.data.redis.core.RedisTemplate; +import org.springframework.data.redis.serializer.StringRedisSerializer; + +@Configuration +public class RedisConfig { + + @Bean + public RedisTemplate redisTemplate(RedisConnectionFactory connectionFactory) { + RedisTemplate template = new RedisTemplate<>(); + template.setConnectionFactory(connectionFactory); + StringRedisSerializer serializer = new StringRedisSerializer(); + template.setKeySerializer(serializer); + template.setValueSerializer(serializer); + template.setHashKeySerializer(serializer); + template.setHashValueSerializer(serializer); + template.afterPropertiesSet(); + return template; + } +} diff --git a/src/main/java/com/label/common/context/CompanyContext.java b/src/main/java/com/label/common/context/CompanyContext.java new file mode 100644 index 0000000..1606633 --- /dev/null +++ b/src/main/java/com/label/common/context/CompanyContext.java @@ -0,0 +1,21 @@ +package com.label.common.context; + +public class CompanyContext { + private static final ThreadLocal COMPANY_ID = new ThreadLocal<>(); + + public static void set(Long companyId) { + COMPANY_ID.set(companyId); + } + + public static Long get() { + return COMPANY_ID.get(); + } + + public static void clear() { + COMPANY_ID.remove(); // Use remove() not set(null) to prevent memory leaks + } + + private CompanyContext() { // Prevent instantiation + throw new UnsupportedOperationException("Utility class"); + } +} diff --git a/src/main/java/com/label/common/exception/BusinessException.java b/src/main/java/com/label/common/exception/BusinessException.java new file mode 100644 index 0000000..1e7aede --- /dev/null +++ b/src/main/java/com/label/common/exception/BusinessException.java @@ -0,0 +1,22 @@ +package com.label.common.exception; + +import lombok.Getter; +import org.springframework.http.HttpStatus; + +@Getter +public class BusinessException extends RuntimeException { + private final String code; + private final HttpStatus httpStatus; + + public BusinessException(String code, String message) { + super(message); + this.code = code; + this.httpStatus = HttpStatus.BAD_REQUEST; + } + + public BusinessException(String code, String message, HttpStatus httpStatus) { + super(message); + this.code = code; + this.httpStatus = httpStatus; + } +} diff --git a/src/main/java/com/label/common/exception/GlobalExceptionHandler.java b/src/main/java/com/label/common/exception/GlobalExceptionHandler.java new file mode 100644 index 0000000..327e4e4 --- /dev/null +++ b/src/main/java/com/label/common/exception/GlobalExceptionHandler.java @@ -0,0 +1,28 @@ +package com.label.common.exception; + +import com.label.common.result.Result; +import lombok.extern.slf4j.Slf4j; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.ExceptionHandler; +import org.springframework.web.bind.annotation.RestControllerAdvice; + +@Slf4j +@RestControllerAdvice +public class GlobalExceptionHandler { + + @ExceptionHandler(BusinessException.class) + public ResponseEntity> handleBusinessException(BusinessException e) { + log.warn("业务异常: code={}, message={}", e.getCode(), e.getMessage()); + return ResponseEntity + .status(e.getHttpStatus()) + .body(Result.failure(e.getCode(), e.getMessage())); + } + + @ExceptionHandler(Exception.class) + public ResponseEntity> handleException(Exception e) { + log.error("系统异常", e); + return ResponseEntity + .internalServerError() + .body(Result.failure("INTERNAL_ERROR", "系统内部错误")); + } +} diff --git a/src/main/java/com/label/common/redis/RedisKeyManager.java b/src/main/java/com/label/common/redis/RedisKeyManager.java new file mode 100644 index 0000000..d0286b1 --- /dev/null +++ b/src/main/java/com/label/common/redis/RedisKeyManager.java @@ -0,0 +1,25 @@ +package com.label.common.redis; + +/** + * Centralized Redis key naming conventions. + * All keys follow the pattern: prefix:{id} + */ +public final class RedisKeyManager { + + private RedisKeyManager() {} + + /** Session token key: token:{uuid} */ + public static String tokenKey(String uuid) { + return "token:" + uuid; + } + + /** User permission cache key: user:perm:{userId} */ + public static String userPermKey(Long userId) { + return "user:perm:" + userId; + } + + /** Task claim distributed lock key: task:claim:{taskId} */ + public static String taskClaimKey(Long taskId) { + return "task:claim:" + taskId; + } +} diff --git a/src/main/java/com/label/common/redis/RedisService.java b/src/main/java/com/label/common/redis/RedisService.java new file mode 100644 index 0000000..af305ae --- /dev/null +++ b/src/main/java/com/label/common/redis/RedisService.java @@ -0,0 +1,61 @@ +package com.label.common.redis; + +import lombok.RequiredArgsConstructor; +import org.springframework.data.redis.core.RedisTemplate; +import org.springframework.stereotype.Service; + +import java.util.Map; +import java.util.concurrent.TimeUnit; + +@Service +@RequiredArgsConstructor +public class RedisService { + + private final RedisTemplate redisTemplate; + + // String operations + + public void set(String key, String value, long ttlSeconds) { + redisTemplate.opsForValue().set(key, value, ttlSeconds, TimeUnit.SECONDS); + } + + public String get(String key) { + return redisTemplate.opsForValue().get(key); + } + + public void delete(String key) { + redisTemplate.delete(key); + } + + public boolean exists(String key) { + return Boolean.TRUE.equals(redisTemplate.hasKey(key)); + } + + /** Set if absent (NX). Returns true if key was set (lock acquired). */ + public boolean setIfAbsent(String key, String value, long ttlSeconds) { + Boolean result = redisTemplate.opsForValue() + .setIfAbsent(key, value, ttlSeconds, TimeUnit.SECONDS); + return Boolean.TRUE.equals(result); + } + + /** Refresh TTL on an existing key (sliding expiration). */ + public void expire(String key, long ttlSeconds) { + redisTemplate.expire(key, ttlSeconds, TimeUnit.SECONDS); + } + + // Hash operations (for token storage: token:{uuid} → Hash) + + public void hSetAll(String key, Map entries, long ttlSeconds) { + redisTemplate.opsForHash().putAll(key, entries); + redisTemplate.expire(key, ttlSeconds, TimeUnit.SECONDS); + } + + public Map hGetAll(String key) { + return redisTemplate.opsForHash().entries(key); + } + + public String hGet(String key, String field) { + Object val = redisTemplate.opsForHash().get(key, field); + return val != null ? val.toString() : null; + } +} diff --git a/src/main/java/com/label/common/result/PageResult.java b/src/main/java/com/label/common/result/PageResult.java new file mode 100644 index 0000000..aeeeaf7 --- /dev/null +++ b/src/main/java/com/label/common/result/PageResult.java @@ -0,0 +1,22 @@ +package com.label.common.result; + +import lombok.Data; + +import java.util.List; + +@Data +public class PageResult { + private List items; + private long total; + private int page; + private int pageSize; + + public static PageResult of(List items, long total, int page, int pageSize) { + PageResult pageResult = new PageResult<>(); + pageResult.setItems(items); + pageResult.setTotal(total); + pageResult.setPage(page); + pageResult.setPageSize(pageSize); + return pageResult; + } +} diff --git a/src/main/java/com/label/common/result/Result.java b/src/main/java/com/label/common/result/Result.java new file mode 100644 index 0000000..0cae155 --- /dev/null +++ b/src/main/java/com/label/common/result/Result.java @@ -0,0 +1,37 @@ +package com.label.common.result; + +import lombok.Data; + +@Data +public class Result { + private String code; + private T data; + private String message; + + public static Result success(T data) { + Result result = new Result<>(); + result.setCode(ResultCode.SUCCESS.name()); + result.setData(data); + return result; + } + + public static Result success() { + Result result = new Result<>(); + result.setCode(ResultCode.SUCCESS.name()); + return result; + } + + public static Result failure(ResultCode code, String message) { + Result result = new Result<>(); + result.setCode(code.name()); + result.setMessage(message); + return result; + } + + public static Result failure(String code, String message) { + Result result = new Result<>(); + result.setCode(code); + result.setMessage(message); + return result; + } +} diff --git a/src/main/java/com/label/common/result/ResultCode.java b/src/main/java/com/label/common/result/ResultCode.java new file mode 100644 index 0000000..92ae104 --- /dev/null +++ b/src/main/java/com/label/common/result/ResultCode.java @@ -0,0 +1,19 @@ +package com.label.common.result; + +public enum ResultCode { + SUCCESS, + FAILURE, + UNAUTHORIZED, // 401 - no valid token + FORBIDDEN, // 403 - insufficient role + NOT_FOUND, // 404 + CONFLICT, // 409 + INVALID_STATE, // 409 state machine violation + TASK_CLAIMED, // 409 task already claimed + SELF_REVIEW_FORBIDDEN, // 403 self-review prevention + UNKNOWN_CONFIG_KEY, // 400 unknown config key + INVALID_SAMPLES, // 400 invalid export samples + EMPTY_SAMPLES, // 400 empty sample list + FINETUNE_ALREADY_STARTED, // 409 fine-tune already started + INVALID_STATE_TRANSITION, // 409 invalid state machine transition + INTERNAL_ERROR // 500 +} diff --git a/src/main/java/com/label/common/shiro/BearerToken.java b/src/main/java/com/label/common/shiro/BearerToken.java new file mode 100644 index 0000000..5febfc9 --- /dev/null +++ b/src/main/java/com/label/common/shiro/BearerToken.java @@ -0,0 +1,26 @@ +package com.label.common.shiro; + +import org.apache.shiro.authc.AuthenticationToken; + +/** + * Shiro AuthenticationToken wrapper for Bearer token strings. + */ +public class BearerToken implements AuthenticationToken { + private final String token; + private final TokenPrincipal principal; + + public BearerToken(String token, TokenPrincipal principal) { + this.token = token; + this.principal = principal; + } + + @Override + public Object getPrincipal() { + return principal; + } + + @Override + public Object getCredentials() { + return token; + } +} diff --git a/src/main/java/com/label/common/shiro/ShiroConfig.java b/src/main/java/com/label/common/shiro/ShiroConfig.java new file mode 100644 index 0000000..b199f5d --- /dev/null +++ b/src/main/java/com/label/common/shiro/ShiroConfig.java @@ -0,0 +1,71 @@ +package com.label.common.shiro; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.label.common.redis.RedisService; +import org.apache.shiro.mgt.SecurityManager; +import org.apache.shiro.realm.Realm; +import org.apache.shiro.spring.web.ShiroFilterFactoryBean; +import org.apache.shiro.web.mgt.DefaultWebSecurityManager; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import jakarta.servlet.Filter; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * Shiro security configuration. + * + * Filter chain: + * /api/auth/login → anon (no auth required) + * /api/auth/logout → tokenFilter + * /api/** → tokenFilter (all other API endpoints require auth) + * /actuator/** → anon (health check) + * /** → anon (default) + * + * NOTE: spring.mvc.pathmatch.matching-strategy=ant_path_matcher MUST be set + * in application.yml for Shiro to work correctly with Spring Boot 3. + */ +@Configuration +public class ShiroConfig { + + @Bean + public UserRealm userRealm(RedisService redisService) { + return new UserRealm(redisService); + } + + @Bean + public SecurityManager securityManager(UserRealm userRealm) { + DefaultWebSecurityManager manager = new DefaultWebSecurityManager(); + manager.setRealms(List.of(userRealm)); + return manager; + } + + @Bean + public TokenFilter tokenFilter(RedisService redisService, ObjectMapper objectMapper) { + return new TokenFilter(redisService, objectMapper); + } + + @Bean + public ShiroFilterFactoryBean shiroFilterFactoryBean(SecurityManager securityManager, + TokenFilter tokenFilter) { + ShiroFilterFactoryBean factory = new ShiroFilterFactoryBean(); + factory.setSecurityManager(securityManager); + + // Register custom filters + Map filters = new LinkedHashMap<>(); + filters.put("tokenFilter", tokenFilter); + factory.setFilters(filters); + + // Filter chain definition (ORDER MATTERS - first match wins) + Map filterChainDef = new LinkedHashMap<>(); + filterChainDef.put("/api/auth/login", "anon"); + filterChainDef.put("/actuator/**", "anon"); + filterChainDef.put("/api/**", "tokenFilter"); + filterChainDef.put("/**", "anon"); + factory.setFilterChainDefinitionMap(filterChainDef); + + return factory; + } +} diff --git a/src/main/java/com/label/common/shiro/TokenFilter.java b/src/main/java/com/label/common/shiro/TokenFilter.java new file mode 100644 index 0000000..19ce508 --- /dev/null +++ b/src/main/java/com/label/common/shiro/TokenFilter.java @@ -0,0 +1,95 @@ +package com.label.common.shiro; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.label.common.context.CompanyContext; +import com.label.common.redis.RedisKeyManager; +import com.label.common.redis.RedisService; +import com.label.common.result.Result; +import jakarta.servlet.FilterChain; +import jakarta.servlet.ServletException; +import jakarta.servlet.ServletRequest; +import jakarta.servlet.ServletResponse; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.apache.shiro.web.filter.PathMatchingFilter; +import org.springframework.http.MediaType; + +import java.io.IOException; +import java.util.Map; + +/** + * Shiro filter: parses "Authorization: Bearer {uuid}", validates against Redis, + * injects CompanyContext and Shiro subject principals. + * + * KEY DESIGN: + * - CompanyContext.clear() MUST be called in finally block to prevent thread pool leakage + * - Token lookup is from Redis Hash token:{uuid} → {userId, role, companyId, username} + * - 401 on missing/invalid token; filter continues for valid token + */ +@Slf4j +@RequiredArgsConstructor +public class TokenFilter extends PathMatchingFilter { + + private final RedisService redisService; + private final ObjectMapper objectMapper; + + @Override + protected boolean onPreHandle(ServletRequest request, ServletResponse response, Object mappedValue) throws Exception { + HttpServletRequest req = (HttpServletRequest) request; + HttpServletResponse resp = (HttpServletResponse) response; + + String authHeader = req.getHeader("Authorization"); + if (authHeader == null || !authHeader.startsWith("Bearer ")) { + writeUnauthorized(resp, "缺少或无效的认证令牌"); + return false; + } + + String token = authHeader.substring(7).trim(); + String tokenKey = RedisKeyManager.tokenKey(token); + Map tokenData = redisService.hGetAll(tokenKey); + + if (tokenData == null || tokenData.isEmpty()) { + writeUnauthorized(resp, "令牌已过期或不存在"); + return false; + } + + try { + Long userId = Long.parseLong(tokenData.get("userId").toString()); + String role = tokenData.get("role").toString(); + Long companyId = Long.parseLong(tokenData.get("companyId").toString()); + String username = tokenData.get("username").toString(); + + // Inject company context (must be cleared in finally) + CompanyContext.set(companyId); + + // Bind Shiro subject with token principal + TokenPrincipal principal = new TokenPrincipal(userId, role, companyId, username, token); + request.setAttribute("__token_principal__", principal); + + return true; + } catch (Exception e) { + log.error("解析 Token 数据失败: {}", e.getMessage()); + writeUnauthorized(resp, "令牌数据格式错误"); + return false; + } + } + + @Override + public void doFilterInternal(ServletRequest request, ServletResponse response, FilterChain chain) + throws ServletException, IOException { + try { + super.doFilterInternal(request, response, chain); + } finally { + // CRITICAL: Always clear ThreadLocal to prevent leakage in thread pool + CompanyContext.clear(); + } + } + + private void writeUnauthorized(HttpServletResponse resp, String message) throws IOException { + resp.setStatus(HttpServletResponse.SC_UNAUTHORIZED); + resp.setContentType(MediaType.APPLICATION_JSON_VALUE + ";charset=UTF-8"); + resp.getWriter().write(objectMapper.writeValueAsString(Result.failure("UNAUTHORIZED", message))); + } +} diff --git a/src/main/java/com/label/common/shiro/TokenPrincipal.java b/src/main/java/com/label/common/shiro/TokenPrincipal.java new file mode 100644 index 0000000..39aa63e --- /dev/null +++ b/src/main/java/com/label/common/shiro/TokenPrincipal.java @@ -0,0 +1,18 @@ +package com.label.common.shiro; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import java.io.Serializable; + +/** + * Shiro principal carrying the authenticated user's session data. + */ +@Getter +@AllArgsConstructor +public class TokenPrincipal implements Serializable { + private final Long userId; + private final String role; + private final Long companyId; + private final String username; + private final String token; +} diff --git a/src/main/java/com/label/common/shiro/UserRealm.java b/src/main/java/com/label/common/shiro/UserRealm.java new file mode 100644 index 0000000..0fb11d9 --- /dev/null +++ b/src/main/java/com/label/common/shiro/UserRealm.java @@ -0,0 +1,87 @@ +package com.label.common.shiro; + +import com.label.common.redis.RedisKeyManager; +import com.label.common.redis.RedisService; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.apache.shiro.authc.*; +import org.apache.shiro.authz.AuthorizationInfo; +import org.apache.shiro.authz.SimpleAuthorizationInfo; +import org.apache.shiro.realm.AuthorizingRealm; +import org.apache.shiro.subject.PrincipalCollection; + +/** + * Shiro Realm for role-based authorization using token-based authentication. + * + * Role hierarchy (addInheritedRoles): + * ADMIN ⊃ REVIEWER ⊃ ANNOTATOR ⊃ UPLOADER + * + * Permission lookup order: + * 1. Redis user:perm:{userId} (TTL 5 min) + * 2. If miss: use role from TokenPrincipal + */ +@Slf4j +@RequiredArgsConstructor +public class UserRealm extends AuthorizingRealm { + + private static final long PERM_CACHE_TTL = 300L; // 5 minutes + + private final RedisService redisService; + + @Override + public boolean supports(AuthenticationToken token) { + return token instanceof BearerToken; + } + + @Override + protected AuthenticationInfo doGetAuthenticationInfo(AuthenticationToken token) throws AuthenticationException { + // Token validation is done in TokenFilter; this realm only handles authorization + // For authentication, we trust the token that was validated by TokenFilter + return new SimpleAuthenticationInfo(token.getPrincipal(), token.getCredentials(), getName()); + } + + @Override + protected AuthorizationInfo doGetAuthorizationInfo(PrincipalCollection principals) { + TokenPrincipal principal = (TokenPrincipal) principals.getPrimaryPrincipal(); + if (principal == null) { + return new SimpleAuthorizationInfo(); + } + + String role = getRoleFromCacheOrPrincipal(principal); + SimpleAuthorizationInfo info = new SimpleAuthorizationInfo(); + info.addRole(role); + addInheritedRoles(info, role); + return info; + } + + private String getRoleFromCacheOrPrincipal(TokenPrincipal principal) { + String permKey = RedisKeyManager.userPermKey(principal.getUserId()); + String cachedRole = redisService.get(permKey); + if (cachedRole != null && !cachedRole.isEmpty()) { + return cachedRole; + } + // Cache miss: use role from token, then refresh cache + String role = principal.getRole(); + redisService.set(permKey, role, PERM_CACHE_TTL); + return role; + } + + /** + * ADMIN inherits all roles: ADMIN ⊃ REVIEWER ⊃ ANNOTATOR ⊃ UPLOADER + */ + private void addInheritedRoles(SimpleAuthorizationInfo info, String role) { + switch (role) { + case "ADMIN": + info.addRole("REVIEWER"); + // fall through + case "REVIEWER": + info.addRole("ANNOTATOR"); + // fall through + case "ANNOTATOR": + info.addRole("UPLOADER"); + break; + default: + break; + } + } +} diff --git a/src/main/java/com/label/common/statemachine/DatasetStatus.java b/src/main/java/com/label/common/statemachine/DatasetStatus.java new file mode 100644 index 0000000..e1eca1c --- /dev/null +++ b/src/main/java/com/label/common/statemachine/DatasetStatus.java @@ -0,0 +1,14 @@ +package com.label.common.statemachine; + +import java.util.Map; +import java.util.Set; + +public enum DatasetStatus { + PENDING_REVIEW, APPROVED, REJECTED; + + public static final Map> TRANSITIONS = Map.of( + PENDING_REVIEW, Set.of(APPROVED, REJECTED), + REJECTED, Set.of(PENDING_REVIEW) // 重新提交审核 + // APPROVED: terminal state + ); +} diff --git a/src/main/java/com/label/common/statemachine/SourceStatus.java b/src/main/java/com/label/common/statemachine/SourceStatus.java new file mode 100644 index 0000000..324d673 --- /dev/null +++ b/src/main/java/com/label/common/statemachine/SourceStatus.java @@ -0,0 +1,15 @@ +package com.label.common.statemachine; + +import java.util.Map; +import java.util.Set; + +public enum SourceStatus { + PENDING, PREPROCESSING, EXTRACTING, QA_REVIEW, APPROVED; + + public static final Map> TRANSITIONS = Map.of( + PENDING, Set.of(EXTRACTING, PREPROCESSING), + PREPROCESSING, Set.of(PENDING), + EXTRACTING, Set.of(QA_REVIEW), + QA_REVIEW, Set.of(APPROVED) + ); +} diff --git a/src/main/java/com/label/common/statemachine/StateValidator.java b/src/main/java/com/label/common/statemachine/StateValidator.java new file mode 100644 index 0000000..cf4385b --- /dev/null +++ b/src/main/java/com/label/common/statemachine/StateValidator.java @@ -0,0 +1,36 @@ +package com.label.common.statemachine; + +import com.label.common.exception.BusinessException; +import org.springframework.http.HttpStatus; + +import java.util.Map; +import java.util.Set; + +/** + * Generic state machine validator. + * Validates state transitions against a predefined transitions map. + */ +public final class StateValidator { + + private StateValidator() {} + + /** + * Assert that a state transition from {@code current} to {@code next} is valid. + * + * @param transitions the allowed transitions map + * @param current the current state + * @param next the desired next state + * @param the state type (enum) + * @throws BusinessException with code INVALID_STATE_TRANSITION if transition not allowed + */ + public static void assertTransition(Map> transitions, S current, S next) { + Set allowed = transitions.get(current); + if (allowed == null || !allowed.contains(next)) { + throw new BusinessException( + "INVALID_STATE_TRANSITION", + String.format("不允许的状态转换: %s → %s", current, next), + HttpStatus.CONFLICT + ); + } + } +} diff --git a/src/main/java/com/label/common/statemachine/TaskStatus.java b/src/main/java/com/label/common/statemachine/TaskStatus.java new file mode 100644 index 0000000..ccca0aa --- /dev/null +++ b/src/main/java/com/label/common/statemachine/TaskStatus.java @@ -0,0 +1,16 @@ +package com.label.common.statemachine; + +import java.util.Map; +import java.util.Set; + +public enum TaskStatus { + UNCLAIMED, IN_PROGRESS, SUBMITTED, APPROVED, REJECTED; + + public static final Map> TRANSITIONS = Map.of( + UNCLAIMED, Set.of(IN_PROGRESS), + IN_PROGRESS, Set.of(SUBMITTED, UNCLAIMED, IN_PROGRESS), // IN_PROGRESS->IN_PROGRESS for ADMIN reassign + SUBMITTED, Set.of(APPROVED, REJECTED), + REJECTED, Set.of(IN_PROGRESS) + // APPROVED: terminal state, no outgoing transitions + ); +} diff --git a/src/main/java/com/label/common/statemachine/VideoJobStatus.java b/src/main/java/com/label/common/statemachine/VideoJobStatus.java new file mode 100644 index 0000000..0af2c9d --- /dev/null +++ b/src/main/java/com/label/common/statemachine/VideoJobStatus.java @@ -0,0 +1,20 @@ +package com.label.common.statemachine; + +import java.util.Map; +import java.util.Set; + +public enum VideoJobStatus { + PENDING, RUNNING, SUCCESS, FAILED, RETRYING; + + /** + * Automatic state machine transitions. + * Note: FAILED → PENDING is a manual ADMIN operation, handled separately in VideoProcessService.reset(). + */ + public static final Map> TRANSITIONS = Map.of( + PENDING, Set.of(RUNNING), + RUNNING, Set.of(SUCCESS, FAILED, RETRYING), + RETRYING, Set.of(RUNNING, FAILED) + // SUCCESS: terminal state + // FAILED → PENDING: manual ADMIN reset, NOT in this automatic transitions map + ); +} diff --git a/src/main/java/com/label/common/storage/RustFsClient.java b/src/main/java/com/label/common/storage/RustFsClient.java new file mode 100644 index 0000000..b9bd1ae --- /dev/null +++ b/src/main/java/com/label/common/storage/RustFsClient.java @@ -0,0 +1,118 @@ +package com.label.common.storage; + +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.*; +import software.amazon.awssdk.services.s3.presigner.S3Presigner; +import software.amazon.awssdk.services.s3.presigner.model.GetObjectPresignRequest; + +import jakarta.annotation.PostConstruct; +import java.io.InputStream; +import java.net.URI; +import java.time.Duration; + +@Slf4j +@Component +public class RustFsClient { + + @Value("${rustfs.endpoint}") + private String endpoint; + + @Value("${rustfs.access-key}") + private String accessKey; + + @Value("${rustfs.secret-key}") + private String secretKey; + + private S3Client s3Client; + private S3Presigner presigner; + + @PostConstruct + public void init() { + var credentials = StaticCredentialsProvider.create( + AwsBasicCredentials.create(accessKey, secretKey)); + + s3Client = S3Client.builder() + .endpointOverride(URI.create(endpoint)) + .credentialsProvider(credentials) + .region(Region.US_EAST_1) + .forcePathStyle(true) // Required for MinIO/RustFS + .build(); + + presigner = S3Presigner.builder() + .endpointOverride(URI.create(endpoint)) + .credentialsProvider(credentials) + .region(Region.US_EAST_1) + .build(); + } + + /** + * Upload file to RustFS. + * @param bucket bucket name + * @param key object key (path) + * @param inputStream file content + * @param contentLength file size in bytes + * @param contentType MIME type + */ + public void upload(String bucket, String key, InputStream inputStream, + long contentLength, String contentType) { + // Ensure bucket exists + ensureBucketExists(bucket); + + s3Client.putObject( + PutObjectRequest.builder() + .bucket(bucket) + .key(key) + .contentType(contentType) + .contentLength(contentLength) + .build(), + RequestBody.fromInputStream(inputStream, contentLength) + ); + } + + /** + * Download file from RustFS. + */ + public InputStream download(String bucket, String key) { + return s3Client.getObject( + GetObjectRequest.builder().bucket(bucket).key(key).build() + ); + } + + /** + * Delete file from RustFS. + */ + public void delete(String bucket, String key) { + s3Client.deleteObject( + DeleteObjectRequest.builder().bucket(bucket).key(key).build() + ); + } + + /** + * Generate a presigned URL for temporary read access. + * @param expirationMinutes URL validity in minutes + */ + public String getPresignedUrl(String bucket, String key, int expirationMinutes) { + var presignRequest = GetObjectPresignRequest.builder() + .signatureDuration(Duration.ofMinutes(expirationMinutes)) + .getObjectRequest(GetObjectRequest.builder().bucket(bucket).key(key).build()) + .build(); + + return presigner.presignGetObject(presignRequest).url().toString(); + } + + private void ensureBucketExists(String bucket) { + try { + s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()); + } catch (NoSuchBucketException e) { + s3Client.createBucket(CreateBucketRequest.builder().bucket(bucket).build()); + log.info("Created bucket: {}", bucket); + } + } +} diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml new file mode 100644 index 0000000..76c49bc --- /dev/null +++ b/src/main/resources/application.yml @@ -0,0 +1,68 @@ +server: + port: 8080 + +spring: + datasource: + url: ${SPRING_DATASOURCE_URL:jdbc:postgresql://localhost:5432/label_db} + username: ${SPRING_DATASOURCE_USERNAME:label} + password: ${SPRING_DATASOURCE_PASSWORD:label_password} + driver-class-name: org.postgresql.Driver + hikari: + maximum-pool-size: 20 + minimum-idle: 5 + connection-timeout: 30000 + + data: + redis: + host: ${SPRING_DATA_REDIS_HOST:localhost} + port: ${SPRING_DATA_REDIS_PORT:6379} + password: ${SPRING_DATA_REDIS_PASSWORD:redis_password} + timeout: 5000ms + lettuce: + pool: + max-active: 8 + max-idle: 8 + min-idle: 0 + + jackson: + default-property-inclusion: non_null + serialization: + write-dates-as-timestamps: false + + mvc: + pathmatch: + matching-strategy: ant_path_matcher # Shiro 与 Spring Boot 3 兼容性需要 + +mybatis-plus: + mapper-locations: classpath*:mapper/**/*.xml + type-aliases-package: com.label.module + configuration: + map-underscore-to-camel-case: true + log-impl: org.apache.ibatis.logging.slf4j.Slf4jImpl + global-config: + db-config: + id-type: auto + +rustfs: + endpoint: ${RUSTFS_ENDPOINT:http://localhost:9000} + access-key: ${RUSTFS_ACCESS_KEY:minioadmin} + secret-key: ${RUSTFS_SECRET_KEY:minioadmin} + region: us-east-1 + +ai-service: + base-url: ${AI_SERVICE_BASE_URL:http://localhost:8000} + timeout: 30000 # milliseconds + +shiro: + loginUrl: /api/auth/login + successUrl: / + unauthorizedUrl: /api/auth/unauthorized + sessionManager: + sessionIdCookieEnabled: false # REST API,不使用基于 Cookie 的会话 + sessionIdUrlRewritingEnabled: false + +logging: + level: + com.label: DEBUG + org.apache.shiro: INFO + com.baomidou.mybatisplus: INFO diff --git a/src/test/java/com/label/AbstractIntegrationTest.java b/src/test/java/com/label/AbstractIntegrationTest.java new file mode 100644 index 0000000..8679173 --- /dev/null +++ b/src/test/java/com/label/AbstractIntegrationTest.java @@ -0,0 +1,87 @@ +package com.label; + +import org.junit.jupiter.api.BeforeEach; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.web.server.LocalServerPort; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.test.context.DynamicPropertyRegistry; +import org.springframework.test.context.DynamicPropertySource; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.PostgreSQLContainer; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; +import org.testcontainers.utility.DockerImageName; +import org.testcontainers.utility.MountableFile; + +/** + * Base class for all integration tests. + * + * Starts real PostgreSQL 16 and Redis 7 containers (shared across test class instances). + * Executes sql/init.sql to initialize schema and seed data. + * + * DESIGN: + * - @Container with static fields → containers are shared across test methods (faster) + * - @DynamicPropertySource → overrides datasource/redis properties at runtime + * - @BeforeEach cleanData() → truncates business tables (not sys_company/sys_user) between tests + */ +@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) +@Testcontainers +public abstract class AbstractIntegrationTest { + + @LocalServerPort + protected int port; + + @Autowired + protected JdbcTemplate jdbcTemplate; + + @SuppressWarnings("resource") + @Container + protected static final PostgreSQLContainer postgres = + new PostgreSQLContainer<>(DockerImageName.parse("postgres:16-alpine")) + .withDatabaseName("label_db") + .withUsername("label") + .withPassword("label_password") + .withCopyFileToContainer( + MountableFile.forClasspathResource("db/init.sql"), + "/docker-entrypoint-initdb.d/init.sql"); + + @SuppressWarnings("resource") + @Container + protected static final GenericContainer redis = + new GenericContainer<>(DockerImageName.parse("redis:7-alpine")) + .withExposedPorts(6379) + .withCommand("redis-server", "--requirepass", "test_redis_password"); + + @DynamicPropertySource + static void configureProperties(DynamicPropertyRegistry registry) { + registry.add("spring.datasource.url", postgres::getJdbcUrl); + registry.add("spring.datasource.username", postgres::getUsername); + registry.add("spring.datasource.password", postgres::getPassword); + registry.add("spring.data.redis.host", redis::getHost); + registry.add("spring.data.redis.port", () -> redis.getMappedPort(6379)); + registry.add("spring.data.redis.password", () -> "test_redis_password"); + } + + /** + * Clean only business data between tests to keep schema intact. + * Keep sys_company and sys_user since init.sql seeds them. + */ + @BeforeEach + void cleanData() { + jdbcTemplate.execute("TRUNCATE TABLE video_process_job, annotation_task_history, " + + "sys_operation_log, sys_config, export_batch, training_dataset, " + + "annotation_result, annotation_task, source_data RESTART IDENTITY CASCADE"); + // Re-insert global sys_config entries that were truncated + jdbcTemplate.execute("INSERT INTO sys_config (company_id, config_key, config_value) VALUES " + + "(NULL, 'token_ttl_seconds', '7200'), " + + "(NULL, 'model_default', 'glm-4'), " + + "(NULL, 'video_frame_interval', '30') " + + "ON CONFLICT DO NOTHING"); + } + + /** Helper: get base URL for REST calls */ + protected String baseUrl(String path) { + return "http://localhost:" + port + path; + } +} diff --git a/src/test/java/com/label/LabelBackendApplicationTests.java b/src/test/java/com/label/LabelBackendApplicationTests.java new file mode 100644 index 0000000..f83f216 --- /dev/null +++ b/src/test/java/com/label/LabelBackendApplicationTests.java @@ -0,0 +1,7 @@ +package com.label; + +import org.springframework.boot.test.context.SpringBootTest; + +@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE) +class LabelBackendApplicationTests { +} diff --git a/src/test/java/com/label/unit/StateMachineTest.java b/src/test/java/com/label/unit/StateMachineTest.java new file mode 100644 index 0000000..a563970 --- /dev/null +++ b/src/test/java/com/label/unit/StateMachineTest.java @@ -0,0 +1,265 @@ +package com.label.unit; + +import com.label.common.exception.BusinessException; +import com.label.common.statemachine.*; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.*; + +/** + * Unit tests for all state machine enums and StateValidator. + * No Spring context needed - pure unit tests. + */ +@DisplayName("状态机单元测试") +class StateMachineTest { + + // ===== SourceStatus ===== + @Nested + @DisplayName("SourceStatus 状态机") + class SourceStatusTest { + + @Test + @DisplayName("合法转换:PENDING → EXTRACTING(文本/图片直接提取)") + void pendingToExtracting() { + assertThatCode(() -> + StateValidator.assertTransition(SourceStatus.TRANSITIONS, SourceStatus.PENDING, SourceStatus.EXTRACTING) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("合法转换:PENDING → PREPROCESSING(视频上传)") + void pendingToPreprocessing() { + assertThatCode(() -> + StateValidator.assertTransition(SourceStatus.TRANSITIONS, SourceStatus.PENDING, SourceStatus.PREPROCESSING) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("合法转换:PREPROCESSING → PENDING(视频预处理完成)") + void preprocessingToPending() { + assertThatCode(() -> + StateValidator.assertTransition(SourceStatus.TRANSITIONS, SourceStatus.PREPROCESSING, SourceStatus.PENDING) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("合法转换:EXTRACTING → QA_REVIEW(提取审批通过)") + void extractingToQaReview() { + assertThatCode(() -> + StateValidator.assertTransition(SourceStatus.TRANSITIONS, SourceStatus.EXTRACTING, SourceStatus.QA_REVIEW) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("合法转换:QA_REVIEW → APPROVED(QA 审批通过)") + void qaReviewToApproved() { + assertThatCode(() -> + StateValidator.assertTransition(SourceStatus.TRANSITIONS, SourceStatus.QA_REVIEW, SourceStatus.APPROVED) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("非法转换:APPROVED → PENDING 抛出异常") + void approvedToPendingFails() { + assertThatThrownBy(() -> + StateValidator.assertTransition(SourceStatus.TRANSITIONS, SourceStatus.APPROVED, SourceStatus.PENDING) + ).isInstanceOf(BusinessException.class) + .extracting("code").isEqualTo("INVALID_STATE_TRANSITION"); + } + + @Test + @DisplayName("非法转换:PENDING → APPROVED(跳过中间状态)抛出异常") + void pendingToApprovedFails() { + assertThatThrownBy(() -> + StateValidator.assertTransition(SourceStatus.TRANSITIONS, SourceStatus.PENDING, SourceStatus.APPROVED) + ).isInstanceOf(BusinessException.class) + .extracting("code").isEqualTo("INVALID_STATE_TRANSITION"); + } + } + + // ===== TaskStatus ===== + @Nested + @DisplayName("TaskStatus 状态机") + class TaskStatusTest { + + @Test + @DisplayName("合法转换:UNCLAIMED → IN_PROGRESS(领取)") + void unclaimedToInProgress() { + assertThatCode(() -> + StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.UNCLAIMED, TaskStatus.IN_PROGRESS) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("合法转换:IN_PROGRESS → SUBMITTED(提交)") + void inProgressToSubmitted() { + assertThatCode(() -> + StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.IN_PROGRESS, TaskStatus.SUBMITTED) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("合法转换:IN_PROGRESS → UNCLAIMED(放弃)") + void inProgressToUnclaimed() { + assertThatCode(() -> + StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.IN_PROGRESS, TaskStatus.UNCLAIMED) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("合法转换:IN_PROGRESS → IN_PROGRESS(ADMIN 强制转移,持有人变更)") + void inProgressToInProgress() { + assertThatCode(() -> + StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.IN_PROGRESS, TaskStatus.IN_PROGRESS) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("合法转换:SUBMITTED → APPROVED(审批通过)") + void submittedToApproved() { + assertThatCode(() -> + StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.SUBMITTED, TaskStatus.APPROVED) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("合法转换:SUBMITTED → REJECTED(审批驳回)") + void submittedToRejected() { + assertThatCode(() -> + StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.SUBMITTED, TaskStatus.REJECTED) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("合法转换:REJECTED → IN_PROGRESS(标注员重领)") + void rejectedToInProgress() { + assertThatCode(() -> + StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.REJECTED, TaskStatus.IN_PROGRESS) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("非法转换:APPROVED → IN_PROGRESS 抛出异常") + void approvedToInProgressFails() { + assertThatThrownBy(() -> + StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.APPROVED, TaskStatus.IN_PROGRESS) + ).isInstanceOf(BusinessException.class) + .extracting("code").isEqualTo("INVALID_STATE_TRANSITION"); + } + + @Test + @DisplayName("非法转换:UNCLAIMED → SUBMITTED(跳过 IN_PROGRESS)抛出异常") + void unclaimedToSubmittedFails() { + assertThatThrownBy(() -> + StateValidator.assertTransition(TaskStatus.TRANSITIONS, TaskStatus.UNCLAIMED, TaskStatus.SUBMITTED) + ).isInstanceOf(BusinessException.class) + .extracting("code").isEqualTo("INVALID_STATE_TRANSITION"); + } + } + + // ===== DatasetStatus ===== + @Nested + @DisplayName("DatasetStatus 状态机") + class DatasetStatusTest { + + @Test + @DisplayName("合法转换:PENDING_REVIEW → APPROVED") + void pendingReviewToApproved() { + assertThatCode(() -> + StateValidator.assertTransition(DatasetStatus.TRANSITIONS, DatasetStatus.PENDING_REVIEW, DatasetStatus.APPROVED) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("合法转换:PENDING_REVIEW → REJECTED") + void pendingReviewToRejected() { + assertThatCode(() -> + StateValidator.assertTransition(DatasetStatus.TRANSITIONS, DatasetStatus.PENDING_REVIEW, DatasetStatus.REJECTED) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("合法转换:REJECTED → PENDING_REVIEW(重新提交)") + void rejectedToPendingReview() { + assertThatCode(() -> + StateValidator.assertTransition(DatasetStatus.TRANSITIONS, DatasetStatus.REJECTED, DatasetStatus.PENDING_REVIEW) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("非法转换:APPROVED → REJECTED 抛出异常") + void approvedToRejectedFails() { + assertThatThrownBy(() -> + StateValidator.assertTransition(DatasetStatus.TRANSITIONS, DatasetStatus.APPROVED, DatasetStatus.REJECTED) + ).isInstanceOf(BusinessException.class) + .extracting("code").isEqualTo("INVALID_STATE_TRANSITION"); + } + } + + // ===== VideoJobStatus ===== + @Nested + @DisplayName("VideoJobStatus 状态机") + class VideoJobStatusTest { + + @Test + @DisplayName("合法转换:PENDING → RUNNING") + void pendingToRunning() { + assertThatCode(() -> + StateValidator.assertTransition(VideoJobStatus.TRANSITIONS, VideoJobStatus.PENDING, VideoJobStatus.RUNNING) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("合法转换:RUNNING → SUCCESS") + void runningToSuccess() { + assertThatCode(() -> + StateValidator.assertTransition(VideoJobStatus.TRANSITIONS, VideoJobStatus.RUNNING, VideoJobStatus.SUCCESS) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("合法转换:RUNNING → RETRYING(失败且未超重试次数)") + void runningToRetrying() { + assertThatCode(() -> + StateValidator.assertTransition(VideoJobStatus.TRANSITIONS, VideoJobStatus.RUNNING, VideoJobStatus.RETRYING) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("合法转换:RUNNING → FAILED(失败且超过最大重试)") + void runningToFailed() { + assertThatCode(() -> + StateValidator.assertTransition(VideoJobStatus.TRANSITIONS, VideoJobStatus.RUNNING, VideoJobStatus.FAILED) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("合法转换:RETRYING → RUNNING(AI 重试)") + void retryingToRunning() { + assertThatCode(() -> + StateValidator.assertTransition(VideoJobStatus.TRANSITIONS, VideoJobStatus.RETRYING, VideoJobStatus.RUNNING) + ).doesNotThrowAnyException(); + } + + @Test + @DisplayName("非法转换:FAILED → PENDING 不在状态机内(ADMIN 手动触发,不走 StateValidator)") + void failedToPendingNotInStateMachine() { + // FAILED → PENDING is intentionally NOT in TRANSITIONS (ADMIN manual reset via special API) + assertThatThrownBy(() -> + StateValidator.assertTransition(VideoJobStatus.TRANSITIONS, VideoJobStatus.FAILED, VideoJobStatus.PENDING) + ).isInstanceOf(BusinessException.class) + .extracting("code").isEqualTo("INVALID_STATE_TRANSITION"); + } + + @Test + @DisplayName("非法转换:SUCCESS → RUNNING 抛出异常") + void successToRunningFails() { + assertThatThrownBy(() -> + StateValidator.assertTransition(VideoJobStatus.TRANSITIONS, VideoJobStatus.SUCCESS, VideoJobStatus.RUNNING) + ).isInstanceOf(BusinessException.class) + .extracting("code").isEqualTo("INVALID_STATE_TRANSITION"); + } + } +} diff --git a/src/test/resources/.gitkeep b/src/test/resources/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/test/resources/db/init.sql b/src/test/resources/db/init.sql new file mode 100644 index 0000000..1824039 --- /dev/null +++ b/src/test/resources/db/init.sql @@ -0,0 +1,332 @@ +-- label_backend init.sql +-- PostgreSQL 14+ +-- 按依赖顺序建全部 11 张表: +-- sys_company → sys_user → source_data → annotation_task → annotation_result +-- → training_dataset → export_batch → sys_config → sys_operation_log +-- → annotation_task_history → video_process_job +-- 含所有索引及初始配置数据 + +-- ============================================================ +-- 扩展 +-- ============================================================ +CREATE EXTENSION IF NOT EXISTS pgcrypto; + +-- ============================================================ +-- 1. sys_company(租户) +-- ============================================================ +CREATE TABLE IF NOT EXISTS sys_company ( + id BIGSERIAL PRIMARY KEY, + company_name VARCHAR(100) NOT NULL, + company_code VARCHAR(50) NOT NULL, + status VARCHAR(10) NOT NULL DEFAULT 'ACTIVE', -- ACTIVE / DISABLED + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW(), + CONSTRAINT uk_sys_company_name UNIQUE (company_name), + CONSTRAINT uk_sys_company_code UNIQUE (company_code) +); + +-- ============================================================ +-- 2. sys_user(用户) +-- ============================================================ +CREATE TABLE IF NOT EXISTS sys_user ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + username VARCHAR(50) NOT NULL, + password_hash VARCHAR(255) NOT NULL, -- BCrypt, strength >= 10 + real_name VARCHAR(50), + role VARCHAR(20) NOT NULL, -- UPLOADER / ANNOTATOR / REVIEWER / ADMIN + status VARCHAR(10) NOT NULL DEFAULT 'ACTIVE', -- ACTIVE / DISABLED + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW(), + CONSTRAINT uk_sys_user_company_username UNIQUE (company_id, username) +); + +CREATE INDEX IF NOT EXISTS idx_sys_user_company_id + ON sys_user (company_id); + +-- ============================================================ +-- 3. source_data(原始资料) +-- ============================================================ +CREATE TABLE IF NOT EXISTS source_data ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + uploader_id BIGINT REFERENCES sys_user(id), + data_type VARCHAR(20) NOT NULL, -- TEXT / IMAGE / VIDEO + file_path VARCHAR(500) NOT NULL, -- RustFS object path + file_name VARCHAR(255) NOT NULL, + file_size BIGINT, + bucket_name VARCHAR(100) NOT NULL, + parent_source_id BIGINT REFERENCES source_data(id), -- 视频帧 / 文本片段 + status VARCHAR(20) NOT NULL DEFAULT 'PENDING', + -- PENDING / PREPROCESSING / EXTRACTING / QA_REVIEW / APPROVED + reject_reason TEXT, -- 保留字段(当前无 REJECTED 状态) + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_source_data_company_id + ON source_data (company_id); +CREATE INDEX IF NOT EXISTS idx_source_data_company_status + ON source_data (company_id, status); +CREATE INDEX IF NOT EXISTS idx_source_data_parent_source_id + ON source_data (parent_source_id); + +-- ============================================================ +-- 4. annotation_task(标注任务) +-- ============================================================ +CREATE TABLE IF NOT EXISTS annotation_task ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + source_id BIGINT NOT NULL REFERENCES source_data(id), + task_type VARCHAR(30) NOT NULL, -- EXTRACTION / QA_GENERATION + status VARCHAR(20) NOT NULL DEFAULT 'UNCLAIMED', + -- UNCLAIMED / IN_PROGRESS / SUBMITTED / APPROVED / REJECTED + claimed_by BIGINT REFERENCES sys_user(id), + claimed_at TIMESTAMP, + submitted_at TIMESTAMP, + completed_at TIMESTAMP, + is_final BOOLEAN NOT NULL DEFAULT FALSE, -- true 即 APPROVED 且无需再审 + ai_model VARCHAR(50), + reject_reason TEXT, + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_annotation_task_company_status + ON annotation_task (company_id, status); +CREATE INDEX IF NOT EXISTS idx_annotation_task_source_id + ON annotation_task (source_id); +CREATE INDEX IF NOT EXISTS idx_annotation_task_claimed_by + ON annotation_task (claimed_by); + +-- ============================================================ +-- 5. annotation_result(标注结果,JSONB) +-- ============================================================ +CREATE TABLE IF NOT EXISTS annotation_result ( + id BIGSERIAL NOT NULL, + task_id BIGINT NOT NULL REFERENCES annotation_task(id), + company_id BIGINT NOT NULL REFERENCES sys_company(id), + result_json JSONB NOT NULL DEFAULT '[]'::jsonb, -- 整体替换语义 + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW(), + CONSTRAINT pk_annotation_result PRIMARY KEY (id), + CONSTRAINT uk_annotation_result_task_id UNIQUE (task_id) +); + +CREATE INDEX IF NOT EXISTS idx_annotation_result_task_id + ON annotation_result (task_id); +CREATE INDEX IF NOT EXISTS idx_annotation_result_company_id + ON annotation_result (company_id); + +-- ============================================================ +-- 6. training_dataset(训练数据集) +-- export_batch_id FK 在 export_batch 建完后补加 +-- ============================================================ +CREATE TABLE IF NOT EXISTS training_dataset ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + task_id BIGINT NOT NULL REFERENCES annotation_task(id), + source_id BIGINT NOT NULL REFERENCES source_data(id), + sample_type VARCHAR(20) NOT NULL, -- TEXT / IMAGE / VIDEO_FRAME + glm_format_json JSONB NOT NULL, -- GLM fine-tune 格式 + status VARCHAR(20) NOT NULL DEFAULT 'PENDING_REVIEW', + -- PENDING_REVIEW / APPROVED / REJECTED + export_batch_id BIGINT, -- 导出后填写,FK 在下方补加 + exported_at TIMESTAMP, + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_training_dataset_company_status + ON training_dataset (company_id, status); +CREATE INDEX IF NOT EXISTS idx_training_dataset_task_id + ON training_dataset (task_id); + +-- ============================================================ +-- 7. export_batch(导出批次) +-- ============================================================ +CREATE TABLE IF NOT EXISTS export_batch ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + batch_uuid UUID NOT NULL DEFAULT gen_random_uuid(), + sample_count INT NOT NULL DEFAULT 0, + dataset_file_path VARCHAR(500), -- 导出 JSONL 的 RustFS 路径 + glm_job_id VARCHAR(100), -- GLM fine-tune 任务 ID + finetune_status VARCHAR(20) NOT NULL DEFAULT 'NOT_STARTED', + -- NOT_STARTED / RUNNING / COMPLETED / FAILED + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_export_batch_company_id + ON export_batch (company_id); + +-- 补加 training_dataset.export_batch_id FK +ALTER TABLE training_dataset + ADD CONSTRAINT fk_training_dataset_export_batch + FOREIGN KEY (export_batch_id) REFERENCES export_batch(id) + NOT VALID; -- 允许已有 NULL 行,不强制回溯校验 + +-- ============================================================ +-- 8. sys_config(系统配置) +-- ============================================================ +CREATE TABLE IF NOT EXISTS sys_config ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT REFERENCES sys_company(id), -- NULL = 全局默认 + config_key VARCHAR(100) NOT NULL, + config_value TEXT NOT NULL, + description VARCHAR(255), + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +-- 公司级配置唯一索引 +CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_company_key + ON sys_config (company_id, config_key) + WHERE company_id IS NOT NULL; + +-- 全局配置唯一索引 +CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_global_key + ON sys_config (config_key) + WHERE company_id IS NULL; + +CREATE INDEX IF NOT EXISTS idx_sys_config_company_key + ON sys_config (company_id, config_key); + +-- ============================================================ +-- 9. sys_operation_log(操作日志,仅追加) +-- ============================================================ +CREATE TABLE IF NOT EXISTS sys_operation_log ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + operator_id BIGINT REFERENCES sys_user(id), + operation_type VARCHAR(50) NOT NULL, -- 例如 EXTRACTION_APPROVE / USER_LOGIN + target_id BIGINT, + target_type VARCHAR(50), + detail JSONB, + result VARCHAR(10), -- SUCCESS / FAILURE + error_message TEXT, + operated_at TIMESTAMP NOT NULL DEFAULT NOW() + -- 无 updated_at(仅追加表,永不更新) +); + +CREATE INDEX IF NOT EXISTS idx_sys_operation_log_company_operated_at + ON sys_operation_log (company_id, operated_at); +CREATE INDEX IF NOT EXISTS idx_sys_operation_log_operator_id + ON sys_operation_log (operator_id); + +-- ============================================================ +-- 10. annotation_task_history(任务状态历史,仅追加) +-- ============================================================ +CREATE TABLE IF NOT EXISTS annotation_task_history ( + id BIGSERIAL PRIMARY KEY, + task_id BIGINT NOT NULL REFERENCES annotation_task(id), + company_id BIGINT NOT NULL REFERENCES sys_company(id), + from_status VARCHAR(20), + to_status VARCHAR(20) NOT NULL, + operator_id BIGINT REFERENCES sys_user(id), + operator_role VARCHAR(20), + comment TEXT, + created_at TIMESTAMP NOT NULL DEFAULT NOW() + -- 无 updated_at(仅追加表,永不更新) +); + +CREATE INDEX IF NOT EXISTS idx_annotation_task_history_task_id + ON annotation_task_history (task_id); +CREATE INDEX IF NOT EXISTS idx_annotation_task_history_company_id + ON annotation_task_history (company_id); + +-- ============================================================ +-- 11. video_process_job(视频处理作业) +-- ============================================================ +CREATE TABLE IF NOT EXISTS video_process_job ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + source_id BIGINT NOT NULL REFERENCES source_data(id), + job_type VARCHAR(30) NOT NULL, -- FRAME_EXTRACT / VIDEO_TO_TEXT + status VARCHAR(20) NOT NULL DEFAULT 'PENDING', + -- PENDING / RUNNING / SUCCESS / FAILED / RETRYING + params JSONB, -- 例如 {"frameInterval": 30, "mode": "FRAME"} + output_path VARCHAR(500), -- 完成后的 RustFS 输出路径 + retry_count INT NOT NULL DEFAULT 0, + max_retries INT NOT NULL DEFAULT 3, + error_message TEXT, + started_at TIMESTAMP, + completed_at TIMESTAMP, + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_video_process_job_company_id + ON video_process_job (company_id); +CREATE INDEX IF NOT EXISTS idx_video_process_job_source_id + ON video_process_job (source_id); +CREATE INDEX IF NOT EXISTS idx_video_process_job_status + ON video_process_job (status); + +-- ============================================================ +-- 初始数据 +-- ============================================================ + +-- 1. 演示公司 +INSERT INTO sys_company (company_name, company_code, status) +VALUES ('演示公司', 'DEMO', 'ACTIVE') +ON CONFLICT DO NOTHING; + +-- 2. 初始用户(BCrypt strength=10) +-- admin / admin123 +-- reviewer01/ review123 +-- annotator01/annot123 +-- uploader01 / upload123 +INSERT INTO sys_user (company_id, username, password_hash, real_name, role, status) +SELECT + c.id, + u.username, + u.password_hash, + u.real_name, + u.role, + 'ACTIVE' +FROM sys_company c +CROSS JOIN (VALUES + ('admin', + '$2a$10$B8iR5z43URiNPm.eut3JvufIPBuvGx5ZZmqyUqE1A1WdbZppX5bmi', + '管理员', + 'ADMIN'), + ('reviewer01', + '$2a$10$euOJZRfUtYNW7WHpfW1Ciee5b3rjkYFe3yQHT/uCQWrYVc0XQcukm', + '审核员01', + 'REVIEWER'), + ('annotator01', + '$2a$10$8UKwHPNASauKMTrqosR0Reg1X1gkFzFlGa/HBwNLXUELaj4e/zcqu', + '标注员01', + 'ANNOTATOR'), + ('uploader01', + '$2a$10$o2d7jsT31vyxIJHUo50mUefoZLLvGqft97zaL9OQCjRxn9ie1H/1O', + '上传员01', + 'UPLOADER') +) AS u(username, password_hash, real_name, role) +WHERE c.company_code = 'DEMO' +ON CONFLICT (company_id, username) DO NOTHING; + +-- 3. 全局系统配置 +INSERT INTO sys_config (company_id, config_key, config_value, description) +VALUES + (NULL, 'token_ttl_seconds', '7200', + '会话凭证有效期(秒)'), + (NULL, 'model_default', 'glm-4', + 'AI 辅助默认模型'), + (NULL, 'video_frame_interval', '30', + '视频帧提取间隔(帧数)'), + (NULL, 'prompt_extract_text', + '请提取以下文本中的主语-谓语-宾语三元组,以JSON数组格式返回,每个元素包含subject、predicate、object、sourceText、startOffset、endOffset字段。', + '文本三元组提取 Prompt 模板'), + (NULL, 'prompt_extract_image', + '请提取图片中的实体关系四元组,以JSON数组格式返回,每个元素包含subject、relation、object、modifier、confidence字段。', + '图片四元组提取 Prompt 模板'), + (NULL, 'prompt_qa_gen_text', + '根据以下文本三元组生成高质量问答对,以JSON数组格式返回,每个元素包含question、answer、difficulty字段。', + '文本问答生成 Prompt 模板'), + (NULL, 'prompt_qa_gen_image', + '根据以下图片四元组生成高质量问答对,以JSON数组格式返回,每个元素包含question、answer、imageRef、difficulty字段。', + '图片问答生成 Prompt 模板') +ON CONFLICT DO NOTHING;