@@ -0,0 +1,332 @@
-- label_backend init.sql
-- PostgreSQL 14+
-- 按依赖顺序建全部 11 张表:
-- sys_company → sys_user → source_data → annotation_task → annotation_result
-- → training_dataset → export_batch → sys_config → sys_operation_log
-- → annotation_task_history → video_process_job
-- 含所有索引及初始配置数据
-- ============================================================
-- 扩展
-- ============================================================
CREATE EXTENSION IF NOT EXISTS pgcrypto ;
-- ============================================================
-- 1. sys_company( 租户)
-- ============================================================
CREATE TABLE IF NOT EXISTS sys_company (
id BIGSERIAL PRIMARY KEY ,
company_name VARCHAR ( 100 ) NOT NULL ,
company_code VARCHAR ( 50 ) NOT NULL ,
status VARCHAR ( 10 ) NOT NULL DEFAULT ' ACTIVE ' , -- ACTIVE / DISABLED
created_at TIMESTAMP NOT NULL DEFAULT NOW ( ) ,
updated_at TIMESTAMP NOT NULL DEFAULT NOW ( ) ,
CONSTRAINT uk_sys_company_name UNIQUE ( company_name ) ,
CONSTRAINT uk_sys_company_code UNIQUE ( company_code )
) ;
-- ============================================================
-- 2. sys_user( 用户)
-- ============================================================
CREATE TABLE IF NOT EXISTS sys_user (
id BIGSERIAL PRIMARY KEY ,
company_id BIGINT NOT NULL REFERENCES sys_company ( id ) ,
username VARCHAR ( 50 ) NOT NULL ,
password_hash VARCHAR ( 255 ) NOT NULL , -- BCrypt, strength >= 10
real_name VARCHAR ( 50 ) ,
role VARCHAR ( 20 ) NOT NULL , -- UPLOADER / ANNOTATOR / REVIEWER / ADMIN
status VARCHAR ( 10 ) NOT NULL DEFAULT ' ACTIVE ' , -- ACTIVE / DISABLED
created_at TIMESTAMP NOT NULL DEFAULT NOW ( ) ,
updated_at TIMESTAMP NOT NULL DEFAULT NOW ( ) ,
CONSTRAINT uk_sys_user_company_username UNIQUE ( company_id , username )
) ;
CREATE INDEX IF NOT EXISTS idx_sys_user_company_id
ON sys_user ( company_id ) ;
-- ============================================================
-- 3. source_data( 原始资料)
-- ============================================================
CREATE TABLE IF NOT EXISTS source_data (
id BIGSERIAL PRIMARY KEY ,
company_id BIGINT NOT NULL REFERENCES sys_company ( id ) ,
uploader_id BIGINT REFERENCES sys_user ( id ) ,
data_type VARCHAR ( 20 ) NOT NULL , -- TEXT / IMAGE / VIDEO
file_path VARCHAR ( 500 ) NOT NULL , -- RustFS object path
file_name VARCHAR ( 255 ) NOT NULL ,
file_size BIGINT ,
bucket_name VARCHAR ( 100 ) NOT NULL ,
parent_source_id BIGINT REFERENCES source_data ( id ) , -- 视频帧 / 文本片段
status VARCHAR ( 20 ) NOT NULL DEFAULT ' PENDING ' ,
-- PENDING / PREPROCESSING / EXTRACTING / QA_REVIEW / APPROVED
reject_reason TEXT , -- 保留字段(当前无 REJECTED 状态)
created_at TIMESTAMP NOT NULL DEFAULT NOW ( ) ,
updated_at TIMESTAMP NOT NULL DEFAULT NOW ( )
) ;
CREATE INDEX IF NOT EXISTS idx_source_data_company_id
ON source_data ( company_id ) ;
CREATE INDEX IF NOT EXISTS idx_source_data_company_status
ON source_data ( company_id , status ) ;
CREATE INDEX IF NOT EXISTS idx_source_data_parent_source_id
ON source_data ( parent_source_id ) ;
-- ============================================================
-- 4. annotation_task( 标注任务)
-- ============================================================
CREATE TABLE IF NOT EXISTS annotation_task (
id BIGSERIAL PRIMARY KEY ,
company_id BIGINT NOT NULL REFERENCES sys_company ( id ) ,
source_id BIGINT NOT NULL REFERENCES source_data ( id ) ,
task_type VARCHAR ( 30 ) NOT NULL , -- EXTRACTION / QA_GENERATION
status VARCHAR ( 20 ) NOT NULL DEFAULT ' UNCLAIMED ' ,
-- UNCLAIMED / IN_PROGRESS / SUBMITTED / APPROVED / REJECTED
claimed_by BIGINT REFERENCES sys_user ( id ) ,
claimed_at TIMESTAMP ,
submitted_at TIMESTAMP ,
completed_at TIMESTAMP ,
is_final BOOLEAN NOT NULL DEFAULT FALSE , -- true 即 APPROVED 且无需再审
ai_model VARCHAR ( 50 ) ,
reject_reason TEXT ,
created_at TIMESTAMP NOT NULL DEFAULT NOW ( ) ,
updated_at TIMESTAMP NOT NULL DEFAULT NOW ( )
) ;
CREATE INDEX IF NOT EXISTS idx_annotation_task_company_status
ON annotation_task ( company_id , status ) ;
CREATE INDEX IF NOT EXISTS idx_annotation_task_source_id
ON annotation_task ( source_id ) ;
CREATE INDEX IF NOT EXISTS idx_annotation_task_claimed_by
ON annotation_task ( claimed_by ) ;
-- ============================================================
-- 5. annotation_result( 标注结果, JSONB)
-- ============================================================
CREATE TABLE IF NOT EXISTS annotation_result (
id BIGSERIAL NOT NULL ,
task_id BIGINT NOT NULL REFERENCES annotation_task ( id ) ,
company_id BIGINT NOT NULL REFERENCES sys_company ( id ) ,
result_json JSONB NOT NULL DEFAULT ' [] ' : : jsonb , -- 整体替换语义
created_at TIMESTAMP NOT NULL DEFAULT NOW ( ) ,
updated_at TIMESTAMP NOT NULL DEFAULT NOW ( ) ,
CONSTRAINT pk_annotation_result PRIMARY KEY ( id ) ,
CONSTRAINT uk_annotation_result_task_id UNIQUE ( task_id )
) ;
CREATE INDEX IF NOT EXISTS idx_annotation_result_task_id
ON annotation_result ( task_id ) ;
CREATE INDEX IF NOT EXISTS idx_annotation_result_company_id
ON annotation_result ( company_id ) ;
-- ============================================================
-- 6. training_dataset( 训练数据集)
-- export_batch_id FK 在 export_batch 建完后补加
-- ============================================================
CREATE TABLE IF NOT EXISTS training_dataset (
id BIGSERIAL PRIMARY KEY ,
company_id BIGINT NOT NULL REFERENCES sys_company ( id ) ,
task_id BIGINT NOT NULL REFERENCES annotation_task ( id ) ,
source_id BIGINT NOT NULL REFERENCES source_data ( id ) ,
sample_type VARCHAR ( 20 ) NOT NULL , -- TEXT / IMAGE / VIDEO_FRAME
glm_format_json JSONB NOT NULL , -- GLM fine-tune 格式
status VARCHAR ( 20 ) NOT NULL DEFAULT ' PENDING_REVIEW ' ,
-- PENDING_REVIEW / APPROVED / REJECTED
export_batch_id BIGINT , -- 导出后填写, FK 在下方补加
exported_at TIMESTAMP ,
created_at TIMESTAMP NOT NULL DEFAULT NOW ( ) ,
updated_at TIMESTAMP NOT NULL DEFAULT NOW ( )
) ;
CREATE INDEX IF NOT EXISTS idx_training_dataset_company_status
ON training_dataset ( company_id , status ) ;
CREATE INDEX IF NOT EXISTS idx_training_dataset_task_id
ON training_dataset ( task_id ) ;
-- ============================================================
-- 7. export_batch( 导出批次)
-- ============================================================
CREATE TABLE IF NOT EXISTS export_batch (
id BIGSERIAL PRIMARY KEY ,
company_id BIGINT NOT NULL REFERENCES sys_company ( id ) ,
batch_uuid UUID NOT NULL DEFAULT gen_random_uuid ( ) ,
sample_count INT NOT NULL DEFAULT 0 ,
dataset_file_path VARCHAR ( 500 ) , -- 导出 JSONL 的 RustFS 路径
glm_job_id VARCHAR ( 100 ) , -- GLM fine-tune 任务 ID
finetune_status VARCHAR ( 20 ) NOT NULL DEFAULT ' NOT_STARTED ' ,
-- NOT_STARTED / RUNNING / COMPLETED / FAILED
created_at TIMESTAMP NOT NULL DEFAULT NOW ( ) ,
updated_at TIMESTAMP NOT NULL DEFAULT NOW ( )
) ;
CREATE INDEX IF NOT EXISTS idx_export_batch_company_id
ON export_batch ( company_id ) ;
-- 补加 training_dataset.export_batch_id FK
ALTER TABLE training_dataset
ADD CONSTRAINT fk_training_dataset_export_batch
FOREIGN KEY ( export_batch_id ) REFERENCES export_batch ( id )
NOT VALID ; -- 允许已有 NULL 行,不强制回溯校验
-- ============================================================
-- 8. sys_config( 系统配置)
-- ============================================================
CREATE TABLE IF NOT EXISTS sys_config (
id BIGSERIAL PRIMARY KEY ,
company_id BIGINT REFERENCES sys_company ( id ) , -- NULL = 全局默认
config_key VARCHAR ( 100 ) NOT NULL ,
config_value TEXT NOT NULL ,
description VARCHAR ( 255 ) ,
created_at TIMESTAMP NOT NULL DEFAULT NOW ( ) ,
updated_at TIMESTAMP NOT NULL DEFAULT NOW ( )
) ;
-- 公司级配置唯一索引
CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_company_key
ON sys_config ( company_id , config_key )
WHERE company_id IS NOT NULL ;
-- 全局配置唯一索引
CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_global_key
ON sys_config ( config_key )
WHERE company_id IS NULL ;
CREATE INDEX IF NOT EXISTS idx_sys_config_company_key
ON sys_config ( company_id , config_key ) ;
-- ============================================================
-- 9. sys_operation_log( 操作日志, 仅追加)
-- ============================================================
CREATE TABLE IF NOT EXISTS sys_operation_log (
id BIGSERIAL PRIMARY KEY ,
company_id BIGINT NOT NULL REFERENCES sys_company ( id ) ,
operator_id BIGINT REFERENCES sys_user ( id ) ,
operation_type VARCHAR ( 50 ) NOT NULL , -- 例如 EXTRACTION_APPROVE / USER_LOGIN
target_id BIGINT ,
target_type VARCHAR ( 50 ) ,
detail JSONB ,
result VARCHAR ( 10 ) , -- SUCCESS / FAILURE
error_message TEXT ,
operated_at TIMESTAMP NOT NULL DEFAULT NOW ( )
-- 无 updated_at( 仅追加表, 永不更新)
) ;
CREATE INDEX IF NOT EXISTS idx_sys_operation_log_company_operated_at
ON sys_operation_log ( company_id , operated_at ) ;
CREATE INDEX IF NOT EXISTS idx_sys_operation_log_operator_id
ON sys_operation_log ( operator_id ) ;
-- ============================================================
-- 10. annotation_task_history( 任务状态历史, 仅追加)
-- ============================================================
CREATE TABLE IF NOT EXISTS annotation_task_history (
id BIGSERIAL PRIMARY KEY ,
task_id BIGINT NOT NULL REFERENCES annotation_task ( id ) ,
company_id BIGINT NOT NULL REFERENCES sys_company ( id ) ,
from_status VARCHAR ( 20 ) ,
to_status VARCHAR ( 20 ) NOT NULL ,
operator_id BIGINT REFERENCES sys_user ( id ) ,
operator_role VARCHAR ( 20 ) ,
comment TEXT ,
created_at TIMESTAMP NOT NULL DEFAULT NOW ( )
-- 无 updated_at( 仅追加表, 永不更新)
) ;
CREATE INDEX IF NOT EXISTS idx_annotation_task_history_task_id
ON annotation_task_history ( task_id ) ;
CREATE INDEX IF NOT EXISTS idx_annotation_task_history_company_id
ON annotation_task_history ( company_id ) ;
-- ============================================================
-- 11. video_process_job( 视频处理作业)
-- ============================================================
CREATE TABLE IF NOT EXISTS video_process_job (
id BIGSERIAL PRIMARY KEY ,
company_id BIGINT NOT NULL REFERENCES sys_company ( id ) ,
source_id BIGINT NOT NULL REFERENCES source_data ( id ) ,
job_type VARCHAR ( 30 ) NOT NULL , -- FRAME_EXTRACT / VIDEO_TO_TEXT
status VARCHAR ( 20 ) NOT NULL DEFAULT ' PENDING ' ,
-- PENDING / RUNNING / SUCCESS / FAILED / RETRYING
params JSONB , -- 例如 {"frameInterval": 30, "mode": "FRAME"}
output_path VARCHAR ( 500 ) , -- 完成后的 RustFS 输出路径
retry_count INT NOT NULL DEFAULT 0 ,
max_retries INT NOT NULL DEFAULT 3 ,
error_message TEXT ,
started_at TIMESTAMP ,
completed_at TIMESTAMP ,
created_at TIMESTAMP NOT NULL DEFAULT NOW ( ) ,
updated_at TIMESTAMP NOT NULL DEFAULT NOW ( )
) ;
CREATE INDEX IF NOT EXISTS idx_video_process_job_company_id
ON video_process_job ( company_id ) ;
CREATE INDEX IF NOT EXISTS idx_video_process_job_source_id
ON video_process_job ( source_id ) ;
CREATE INDEX IF NOT EXISTS idx_video_process_job_status
ON video_process_job ( status ) ;
-- ============================================================
-- 初始数据
-- ============================================================
-- 1. 演示公司
INSERT INTO sys_company ( company_name , company_code , status )
VALUES ( ' 演示公司 ' , ' DEMO ' , ' ACTIVE ' )
ON CONFLICT DO NOTHING ;
-- 2. 初始用户( BCrypt strength=10)
-- admin / admin123
-- reviewer01/ review123
-- annotator01/annot123
-- uploader01 / upload123
INSERT INTO sys_user ( company_id , username , password_hash , real_name , role , status )
SELECT
c . id ,
u . username ,
u . password_hash ,
u . real_name ,
u . role ,
' ACTIVE '
FROM sys_company c
CROSS JOIN ( VALUES
( ' admin ' ,
' $2a$10$B8iR5z43URiNPm.eut3JvufIPBuvGx5ZZmqyUqE1A1WdbZppX5bmi ' ,
' 管理员 ' ,
' ADMIN ' ) ,
( ' reviewer01 ' ,
' $2a$10$euOJZRfUtYNW7WHpfW1Ciee5b3rjkYFe3yQHT/uCQWrYVc0XQcukm ' ,
' 审核员01 ' ,
' REVIEWER ' ) ,
( ' annotator01 ' ,
' $2a$10$8UKwHPNASauKMTrqosR0Reg1X1gkFzFlGa/HBwNLXUELaj4e/zcqu ' ,
' 标注员01 ' ,
' ANNOTATOR ' ) ,
( ' uploader01 ' ,
' $2a$10$o2d7jsT31vyxIJHUo50mUefoZLLvGqft97zaL9OQCjRxn9ie1H/1O ' ,
' 上传员01 ' ,
' UPLOADER ' )
) AS u ( username , password_hash , real_name , role )
WHERE c . company_code = ' DEMO '
ON CONFLICT ( company_id , username ) DO NOTHING ;
-- 3. 全局系统配置
INSERT INTO sys_config ( company_id , config_key , config_value , description )
VALUES
( NULL , ' token_ttl_seconds ' , ' 7200 ' ,
' 会话凭证有效期(秒) ' ) ,
( NULL , ' model_default ' , ' glm-4 ' ,
' AI 辅助默认模型 ' ) ,
( NULL , ' video_frame_interval ' , ' 30 ' ,
' 视频帧提取间隔(帧数) ' ) ,
( NULL , ' prompt_extract_text ' ,
' 请提取以下文本中的主语-谓语-宾语三元组, 以JSON数组格式返回, 每个元素包含subject、predicate、object、sourceText、startOffset、endOffset字段。 ' ,
' 文本三元组提取 Prompt 模板 ' ) ,
( NULL , ' prompt_extract_image ' ,
' 请提取图片中的实体关系四元组, 以JSON数组格式返回, 每个元素包含subject、relation、object、modifier、confidence字段。 ' ,
' 图片四元组提取 Prompt 模板 ' ) ,
( NULL , ' prompt_qa_gen_text ' ,
' 根据以下文本三元组生成高质量问答对, 以JSON数组格式返回, 每个元素包含question、answer、difficulty字段。 ' ,
' 文本问答生成 Prompt 模板 ' ) ,
( NULL , ' prompt_qa_gen_image ' ,
' 根据以下图片四元组生成高质量问答对, 以JSON数组格式返回, 每个元素包含question、answer、imageRef、difficulty字段。 ' ,
' 图片问答生成 Prompt 模板 ' )
ON CONFLICT DO NOTHING ;