333 lines
14 KiB
SQL
333 lines
14 KiB
SQL
-- label_backend init.sql
|
||
-- PostgreSQL 14+
|
||
-- 按依赖顺序建全部 11 张表:
|
||
-- sys_company → sys_user → source_data → annotation_task → annotation_result
|
||
-- → training_dataset → export_batch → sys_config → sys_operation_log
|
||
-- → annotation_task_history → video_process_job
|
||
-- 含所有索引及初始配置数据
|
||
|
||
-- ============================================================
|
||
-- 扩展
|
||
-- ============================================================
|
||
CREATE EXTENSION IF NOT EXISTS pgcrypto;
|
||
|
||
-- ============================================================
|
||
-- 1. sys_company(租户)
|
||
-- ============================================================
|
||
CREATE TABLE IF NOT EXISTS sys_company (
|
||
id BIGSERIAL PRIMARY KEY,
|
||
company_name VARCHAR(100) NOT NULL,
|
||
company_code VARCHAR(50) NOT NULL,
|
||
status VARCHAR(10) NOT NULL DEFAULT 'ACTIVE', -- ACTIVE / DISABLED
|
||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||
CONSTRAINT uk_sys_company_name UNIQUE (company_name),
|
||
CONSTRAINT uk_sys_company_code UNIQUE (company_code)
|
||
);
|
||
|
||
-- ============================================================
|
||
-- 2. sys_user(用户)
|
||
-- ============================================================
|
||
CREATE TABLE IF NOT EXISTS sys_user (
|
||
id BIGSERIAL PRIMARY KEY,
|
||
company_id BIGINT NOT NULL REFERENCES sys_company(id),
|
||
username VARCHAR(50) NOT NULL,
|
||
password_hash VARCHAR(255) NOT NULL, -- BCrypt, strength >= 10
|
||
real_name VARCHAR(50),
|
||
role VARCHAR(20) NOT NULL, -- UPLOADER / ANNOTATOR / REVIEWER / ADMIN
|
||
status VARCHAR(10) NOT NULL DEFAULT 'ACTIVE', -- ACTIVE / DISABLED
|
||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||
CONSTRAINT uk_sys_user_company_username UNIQUE (company_id, username)
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_sys_user_company_id
|
||
ON sys_user (company_id);
|
||
|
||
-- ============================================================
|
||
-- 3. source_data(原始资料)
|
||
-- ============================================================
|
||
CREATE TABLE IF NOT EXISTS source_data (
|
||
id BIGSERIAL PRIMARY KEY,
|
||
company_id BIGINT NOT NULL REFERENCES sys_company(id),
|
||
uploader_id BIGINT REFERENCES sys_user(id),
|
||
data_type VARCHAR(20) NOT NULL, -- TEXT / IMAGE / VIDEO
|
||
file_path VARCHAR(500) NOT NULL, -- RustFS object path
|
||
file_name VARCHAR(255) NOT NULL,
|
||
file_size BIGINT,
|
||
bucket_name VARCHAR(100) NOT NULL,
|
||
parent_source_id BIGINT REFERENCES source_data(id), -- 视频帧 / 文本片段
|
||
status VARCHAR(20) NOT NULL DEFAULT 'PENDING',
|
||
-- PENDING / PREPROCESSING / EXTRACTING / QA_REVIEW / APPROVED
|
||
reject_reason TEXT, -- 保留字段(当前无 REJECTED 状态)
|
||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_source_data_company_id
|
||
ON source_data (company_id);
|
||
CREATE INDEX IF NOT EXISTS idx_source_data_company_status
|
||
ON source_data (company_id, status);
|
||
CREATE INDEX IF NOT EXISTS idx_source_data_parent_source_id
|
||
ON source_data (parent_source_id);
|
||
|
||
-- ============================================================
|
||
-- 4. annotation_task(标注任务)
|
||
-- ============================================================
|
||
CREATE TABLE IF NOT EXISTS annotation_task (
|
||
id BIGSERIAL PRIMARY KEY,
|
||
company_id BIGINT NOT NULL REFERENCES sys_company(id),
|
||
source_id BIGINT NOT NULL REFERENCES source_data(id),
|
||
task_type VARCHAR(30) NOT NULL, -- EXTRACTION / QA_GENERATION
|
||
status VARCHAR(20) NOT NULL DEFAULT 'UNCLAIMED',
|
||
-- UNCLAIMED / IN_PROGRESS / SUBMITTED / APPROVED / REJECTED
|
||
claimed_by BIGINT REFERENCES sys_user(id),
|
||
claimed_at TIMESTAMP,
|
||
submitted_at TIMESTAMP,
|
||
completed_at TIMESTAMP,
|
||
is_final BOOLEAN NOT NULL DEFAULT FALSE, -- true 即 APPROVED 且无需再审
|
||
ai_model VARCHAR(50),
|
||
reject_reason TEXT,
|
||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_annotation_task_company_status
|
||
ON annotation_task (company_id, status);
|
||
CREATE INDEX IF NOT EXISTS idx_annotation_task_source_id
|
||
ON annotation_task (source_id);
|
||
CREATE INDEX IF NOT EXISTS idx_annotation_task_claimed_by
|
||
ON annotation_task (claimed_by);
|
||
|
||
-- ============================================================
|
||
-- 5. annotation_result(标注结果,JSONB)
|
||
-- ============================================================
|
||
CREATE TABLE IF NOT EXISTS annotation_result (
|
||
id BIGSERIAL NOT NULL,
|
||
task_id BIGINT NOT NULL REFERENCES annotation_task(id),
|
||
company_id BIGINT NOT NULL REFERENCES sys_company(id),
|
||
result_json JSONB NOT NULL DEFAULT '[]'::jsonb, -- 整体替换语义
|
||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||
CONSTRAINT pk_annotation_result PRIMARY KEY (id),
|
||
CONSTRAINT uk_annotation_result_task_id UNIQUE (task_id)
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_annotation_result_task_id
|
||
ON annotation_result (task_id);
|
||
CREATE INDEX IF NOT EXISTS idx_annotation_result_company_id
|
||
ON annotation_result (company_id);
|
||
|
||
-- ============================================================
|
||
-- 6. training_dataset(训练数据集)
|
||
-- export_batch_id FK 在 export_batch 建完后补加
|
||
-- ============================================================
|
||
CREATE TABLE IF NOT EXISTS training_dataset (
|
||
id BIGSERIAL PRIMARY KEY,
|
||
company_id BIGINT NOT NULL REFERENCES sys_company(id),
|
||
task_id BIGINT NOT NULL REFERENCES annotation_task(id),
|
||
source_id BIGINT NOT NULL REFERENCES source_data(id),
|
||
sample_type VARCHAR(20) NOT NULL, -- TEXT / IMAGE / VIDEO_FRAME
|
||
glm_format_json JSONB NOT NULL, -- GLM fine-tune 格式
|
||
status VARCHAR(20) NOT NULL DEFAULT 'PENDING_REVIEW',
|
||
-- PENDING_REVIEW / APPROVED / REJECTED
|
||
export_batch_id BIGINT, -- 导出后填写,FK 在下方补加
|
||
exported_at TIMESTAMP,
|
||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_training_dataset_company_status
|
||
ON training_dataset (company_id, status);
|
||
CREATE INDEX IF NOT EXISTS idx_training_dataset_task_id
|
||
ON training_dataset (task_id);
|
||
|
||
-- ============================================================
|
||
-- 7. export_batch(导出批次)
|
||
-- ============================================================
|
||
CREATE TABLE IF NOT EXISTS export_batch (
|
||
id BIGSERIAL PRIMARY KEY,
|
||
company_id BIGINT NOT NULL REFERENCES sys_company(id),
|
||
batch_uuid UUID NOT NULL DEFAULT gen_random_uuid(),
|
||
sample_count INT NOT NULL DEFAULT 0,
|
||
dataset_file_path VARCHAR(500), -- 导出 JSONL 的 RustFS 路径
|
||
glm_job_id VARCHAR(100), -- GLM fine-tune 任务 ID
|
||
finetune_status VARCHAR(20) NOT NULL DEFAULT 'NOT_STARTED',
|
||
-- NOT_STARTED / RUNNING / COMPLETED / FAILED
|
||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_export_batch_company_id
|
||
ON export_batch (company_id);
|
||
|
||
-- 补加 training_dataset.export_batch_id FK
|
||
ALTER TABLE training_dataset
|
||
ADD CONSTRAINT fk_training_dataset_export_batch
|
||
FOREIGN KEY (export_batch_id) REFERENCES export_batch(id)
|
||
NOT VALID; -- 允许已有 NULL 行,不强制回溯校验
|
||
|
||
-- ============================================================
|
||
-- 8. sys_config(系统配置)
|
||
-- ============================================================
|
||
CREATE TABLE IF NOT EXISTS sys_config (
|
||
id BIGSERIAL PRIMARY KEY,
|
||
company_id BIGINT REFERENCES sys_company(id), -- NULL = 全局默认
|
||
config_key VARCHAR(100) NOT NULL,
|
||
config_value TEXT NOT NULL,
|
||
description VARCHAR(255),
|
||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||
);
|
||
|
||
-- 公司级配置唯一索引
|
||
CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_company_key
|
||
ON sys_config (company_id, config_key)
|
||
WHERE company_id IS NOT NULL;
|
||
|
||
-- 全局配置唯一索引
|
||
CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_global_key
|
||
ON sys_config (config_key)
|
||
WHERE company_id IS NULL;
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_sys_config_company_key
|
||
ON sys_config (company_id, config_key);
|
||
|
||
-- ============================================================
|
||
-- 9. sys_operation_log(操作日志,仅追加)
|
||
-- ============================================================
|
||
CREATE TABLE IF NOT EXISTS sys_operation_log (
|
||
id BIGSERIAL PRIMARY KEY,
|
||
company_id BIGINT NOT NULL REFERENCES sys_company(id),
|
||
operator_id BIGINT REFERENCES sys_user(id),
|
||
operation_type VARCHAR(50) NOT NULL, -- 例如 EXTRACTION_APPROVE / USER_LOGIN
|
||
target_id BIGINT,
|
||
target_type VARCHAR(50),
|
||
detail JSONB,
|
||
result VARCHAR(10), -- SUCCESS / FAILURE
|
||
error_message TEXT,
|
||
operated_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||
-- 无 updated_at(仅追加表,永不更新)
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_sys_operation_log_company_operated_at
|
||
ON sys_operation_log (company_id, operated_at);
|
||
CREATE INDEX IF NOT EXISTS idx_sys_operation_log_operator_id
|
||
ON sys_operation_log (operator_id);
|
||
|
||
-- ============================================================
|
||
-- 10. annotation_task_history(任务状态历史,仅追加)
|
||
-- ============================================================
|
||
CREATE TABLE IF NOT EXISTS annotation_task_history (
|
||
id BIGSERIAL PRIMARY KEY,
|
||
task_id BIGINT NOT NULL REFERENCES annotation_task(id),
|
||
company_id BIGINT NOT NULL REFERENCES sys_company(id),
|
||
from_status VARCHAR(20),
|
||
to_status VARCHAR(20) NOT NULL,
|
||
operator_id BIGINT REFERENCES sys_user(id),
|
||
operator_role VARCHAR(20),
|
||
comment TEXT,
|
||
created_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||
-- 无 updated_at(仅追加表,永不更新)
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_annotation_task_history_task_id
|
||
ON annotation_task_history (task_id);
|
||
CREATE INDEX IF NOT EXISTS idx_annotation_task_history_company_id
|
||
ON annotation_task_history (company_id);
|
||
|
||
-- ============================================================
|
||
-- 11. video_process_job(视频处理作业)
|
||
-- ============================================================
|
||
CREATE TABLE IF NOT EXISTS video_process_job (
|
||
id BIGSERIAL PRIMARY KEY,
|
||
company_id BIGINT NOT NULL REFERENCES sys_company(id),
|
||
source_id BIGINT NOT NULL REFERENCES source_data(id),
|
||
job_type VARCHAR(30) NOT NULL, -- FRAME_EXTRACT / VIDEO_TO_TEXT
|
||
status VARCHAR(20) NOT NULL DEFAULT 'PENDING',
|
||
-- PENDING / RUNNING / SUCCESS / FAILED / RETRYING
|
||
params JSONB, -- 例如 {"frameInterval": 30, "mode": "FRAME"}
|
||
output_path VARCHAR(500), -- 完成后的 RustFS 输出路径
|
||
retry_count INT NOT NULL DEFAULT 0,
|
||
max_retries INT NOT NULL DEFAULT 3,
|
||
error_message TEXT,
|
||
started_at TIMESTAMP,
|
||
completed_at TIMESTAMP,
|
||
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
||
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_video_process_job_company_id
|
||
ON video_process_job (company_id);
|
||
CREATE INDEX IF NOT EXISTS idx_video_process_job_source_id
|
||
ON video_process_job (source_id);
|
||
CREATE INDEX IF NOT EXISTS idx_video_process_job_status
|
||
ON video_process_job (status);
|
||
|
||
-- ============================================================
|
||
-- 初始数据
|
||
-- ============================================================
|
||
|
||
-- 1. 演示公司
|
||
INSERT INTO sys_company (company_name, company_code, status)
|
||
VALUES ('演示公司', 'DEMO', 'ACTIVE')
|
||
ON CONFLICT DO NOTHING;
|
||
|
||
-- 2. 初始用户(BCrypt strength=10)
|
||
-- admin / admin123
|
||
-- reviewer01/ review123
|
||
-- annotator01/annot123
|
||
-- uploader01 / upload123
|
||
INSERT INTO sys_user (company_id, username, password_hash, real_name, role, status)
|
||
SELECT
|
||
c.id,
|
||
u.username,
|
||
u.password_hash,
|
||
u.real_name,
|
||
u.role,
|
||
'ACTIVE'
|
||
FROM sys_company c
|
||
CROSS JOIN (VALUES
|
||
('admin',
|
||
'$2a$10$B8iR5z43URiNPm.eut3JvufIPBuvGx5ZZmqyUqE1A1WdbZppX5bmi',
|
||
'管理员',
|
||
'ADMIN'),
|
||
('reviewer01',
|
||
'$2a$10$euOJZRfUtYNW7WHpfW1Ciee5b3rjkYFe3yQHT/uCQWrYVc0XQcukm',
|
||
'审核员01',
|
||
'REVIEWER'),
|
||
('annotator01',
|
||
'$2a$10$8UKwHPNASauKMTrqosR0Reg1X1gkFzFlGa/HBwNLXUELaj4e/zcqu',
|
||
'标注员01',
|
||
'ANNOTATOR'),
|
||
('uploader01',
|
||
'$2a$10$o2d7jsT31vyxIJHUo50mUefoZLLvGqft97zaL9OQCjRxn9ie1H/1O',
|
||
'上传员01',
|
||
'UPLOADER')
|
||
) AS u(username, password_hash, real_name, role)
|
||
WHERE c.company_code = 'DEMO'
|
||
ON CONFLICT (company_id, username) DO NOTHING;
|
||
|
||
-- 3. 全局系统配置
|
||
INSERT INTO sys_config (company_id, config_key, config_value, description)
|
||
VALUES
|
||
(NULL, 'token_ttl_seconds', '7200',
|
||
'会话凭证有效期(秒)'),
|
||
(NULL, 'model_default', 'glm-4',
|
||
'AI 辅助默认模型'),
|
||
(NULL, 'video_frame_interval', '30',
|
||
'视频帧提取间隔(帧数)'),
|
||
(NULL, 'prompt_extract_text',
|
||
'请提取以下文本中的主语-谓语-宾语三元组,以JSON数组格式返回,每个元素包含subject、predicate、object、sourceText、startOffset、endOffset字段。',
|
||
'文本三元组提取 Prompt 模板'),
|
||
(NULL, 'prompt_extract_image',
|
||
'请提取图片中的实体关系四元组,以JSON数组格式返回,每个元素包含subject、relation、object、modifier、confidence字段。',
|
||
'图片四元组提取 Prompt 模板'),
|
||
(NULL, 'prompt_qa_gen_text',
|
||
'根据以下文本三元组生成高质量问答对,以JSON数组格式返回,每个元素包含question、answer、difficulty字段。',
|
||
'文本问答生成 Prompt 模板'),
|
||
(NULL, 'prompt_qa_gen_image',
|
||
'根据以下图片四元组生成高质量问答对,以JSON数组格式返回,每个元素包含question、answer、imageRef、difficulty字段。',
|
||
'图片问答生成 Prompt 模板')
|
||
ON CONFLICT DO NOTHING;
|