Files
label_backend/sql/init.sql

333 lines
14 KiB
SQL
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- label_backend init.sql
-- PostgreSQL 14+
-- 按依赖顺序建全部 11 张表:
-- sys_company → sys_user → source_data → annotation_task → annotation_result
-- → training_dataset → export_batch → sys_config → sys_operation_log
-- → annotation_task_history → video_process_job
-- 含所有索引及初始配置数据
-- ============================================================
-- 扩展
-- ============================================================
CREATE EXTENSION IF NOT EXISTS pgcrypto;
-- ============================================================
-- 1. sys_company租户
-- ============================================================
CREATE TABLE IF NOT EXISTS sys_company (
id BIGSERIAL PRIMARY KEY,
company_name VARCHAR(100) NOT NULL,
company_code VARCHAR(50) NOT NULL,
status VARCHAR(10) NOT NULL DEFAULT 'ACTIVE', -- ACTIVE / DISABLED
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
CONSTRAINT uk_sys_company_name UNIQUE (company_name),
CONSTRAINT uk_sys_company_code UNIQUE (company_code)
);
-- ============================================================
-- 2. sys_user用户
-- ============================================================
CREATE TABLE IF NOT EXISTS sys_user (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
username VARCHAR(50) NOT NULL,
password_hash VARCHAR(255) NOT NULL, -- BCrypt, strength >= 10
real_name VARCHAR(50),
role VARCHAR(20) NOT NULL, -- UPLOADER / ANNOTATOR / REVIEWER / ADMIN
status VARCHAR(10) NOT NULL DEFAULT 'ACTIVE', -- ACTIVE / DISABLED
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
CONSTRAINT uk_sys_user_company_username UNIQUE (company_id, username)
);
CREATE INDEX IF NOT EXISTS idx_sys_user_company_id
ON sys_user (company_id);
-- ============================================================
-- 3. source_data原始资料
-- ============================================================
CREATE TABLE IF NOT EXISTS source_data (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
uploader_id BIGINT REFERENCES sys_user(id),
data_type VARCHAR(20) NOT NULL, -- TEXT / IMAGE / VIDEO
file_path VARCHAR(500) NOT NULL, -- RustFS object path
file_name VARCHAR(255) NOT NULL,
file_size BIGINT,
bucket_name VARCHAR(100) NOT NULL,
parent_source_id BIGINT REFERENCES source_data(id), -- 视频帧 / 文本片段
status VARCHAR(20) NOT NULL DEFAULT 'PENDING',
-- PENDING / PREPROCESSING / EXTRACTING / QA_REVIEW / APPROVED
reject_reason TEXT, -- 保留字段(当前无 REJECTED 状态)
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_source_data_company_id
ON source_data (company_id);
CREATE INDEX IF NOT EXISTS idx_source_data_company_status
ON source_data (company_id, status);
CREATE INDEX IF NOT EXISTS idx_source_data_parent_source_id
ON source_data (parent_source_id);
-- ============================================================
-- 4. annotation_task标注任务
-- ============================================================
CREATE TABLE IF NOT EXISTS annotation_task (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
source_id BIGINT NOT NULL REFERENCES source_data(id),
task_type VARCHAR(30) NOT NULL, -- EXTRACTION / QA_GENERATION
status VARCHAR(20) NOT NULL DEFAULT 'UNCLAIMED',
-- UNCLAIMED / IN_PROGRESS / SUBMITTED / APPROVED / REJECTED
claimed_by BIGINT REFERENCES sys_user(id),
claimed_at TIMESTAMP,
submitted_at TIMESTAMP,
completed_at TIMESTAMP,
is_final BOOLEAN NOT NULL DEFAULT FALSE, -- true 即 APPROVED 且无需再审
ai_model VARCHAR(50),
reject_reason TEXT,
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_annotation_task_company_status
ON annotation_task (company_id, status);
CREATE INDEX IF NOT EXISTS idx_annotation_task_source_id
ON annotation_task (source_id);
CREATE INDEX IF NOT EXISTS idx_annotation_task_claimed_by
ON annotation_task (claimed_by);
-- ============================================================
-- 5. annotation_result标注结果JSONB
-- ============================================================
CREATE TABLE IF NOT EXISTS annotation_result (
id BIGSERIAL NOT NULL,
task_id BIGINT NOT NULL REFERENCES annotation_task(id),
company_id BIGINT NOT NULL REFERENCES sys_company(id),
result_json JSONB NOT NULL DEFAULT '[]'::jsonb, -- 整体替换语义
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
CONSTRAINT pk_annotation_result PRIMARY KEY (id),
CONSTRAINT uk_annotation_result_task_id UNIQUE (task_id)
);
CREATE INDEX IF NOT EXISTS idx_annotation_result_task_id
ON annotation_result (task_id);
CREATE INDEX IF NOT EXISTS idx_annotation_result_company_id
ON annotation_result (company_id);
-- ============================================================
-- 6. training_dataset训练数据集
-- export_batch_id FK 在 export_batch 建完后补加
-- ============================================================
CREATE TABLE IF NOT EXISTS training_dataset (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
task_id BIGINT NOT NULL REFERENCES annotation_task(id),
source_id BIGINT NOT NULL REFERENCES source_data(id),
sample_type VARCHAR(20) NOT NULL, -- TEXT / IMAGE / VIDEO_FRAME
glm_format_json JSONB NOT NULL, -- GLM fine-tune 格式
status VARCHAR(20) NOT NULL DEFAULT 'PENDING_REVIEW',
-- PENDING_REVIEW / APPROVED / REJECTED
export_batch_id BIGINT, -- 导出后填写FK 在下方补加
exported_at TIMESTAMP,
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_training_dataset_company_status
ON training_dataset (company_id, status);
CREATE INDEX IF NOT EXISTS idx_training_dataset_task_id
ON training_dataset (task_id);
-- ============================================================
-- 7. export_batch导出批次
-- ============================================================
CREATE TABLE IF NOT EXISTS export_batch (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
batch_uuid UUID NOT NULL DEFAULT gen_random_uuid(),
sample_count INT NOT NULL DEFAULT 0,
dataset_file_path VARCHAR(500), -- 导出 JSONL 的 RustFS 路径
glm_job_id VARCHAR(100), -- GLM fine-tune 任务 ID
finetune_status VARCHAR(20) NOT NULL DEFAULT 'NOT_STARTED',
-- NOT_STARTED / RUNNING / COMPLETED / FAILED
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_export_batch_company_id
ON export_batch (company_id);
-- 补加 training_dataset.export_batch_id FK
ALTER TABLE training_dataset
ADD CONSTRAINT fk_training_dataset_export_batch
FOREIGN KEY (export_batch_id) REFERENCES export_batch(id)
NOT VALID; -- 允许已有 NULL 行,不强制回溯校验
-- ============================================================
-- 8. sys_config系统配置
-- ============================================================
CREATE TABLE IF NOT EXISTS sys_config (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT REFERENCES sys_company(id), -- NULL = 全局默认
config_key VARCHAR(100) NOT NULL,
config_value TEXT NOT NULL,
description VARCHAR(255),
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
-- 公司级配置唯一索引
CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_company_key
ON sys_config (company_id, config_key)
WHERE company_id IS NOT NULL;
-- 全局配置唯一索引
CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_global_key
ON sys_config (config_key)
WHERE company_id IS NULL;
CREATE INDEX IF NOT EXISTS idx_sys_config_company_key
ON sys_config (company_id, config_key);
-- ============================================================
-- 9. sys_operation_log操作日志仅追加
-- ============================================================
CREATE TABLE IF NOT EXISTS sys_operation_log (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
operator_id BIGINT REFERENCES sys_user(id),
operation_type VARCHAR(50) NOT NULL, -- 例如 EXTRACTION_APPROVE / USER_LOGIN
target_id BIGINT,
target_type VARCHAR(50),
detail JSONB,
result VARCHAR(10), -- SUCCESS / FAILURE
error_message TEXT,
operated_at TIMESTAMP NOT NULL DEFAULT NOW()
-- 无 updated_at仅追加表永不更新
);
CREATE INDEX IF NOT EXISTS idx_sys_operation_log_company_operated_at
ON sys_operation_log (company_id, operated_at);
CREATE INDEX IF NOT EXISTS idx_sys_operation_log_operator_id
ON sys_operation_log (operator_id);
-- ============================================================
-- 10. annotation_task_history任务状态历史仅追加
-- ============================================================
CREATE TABLE IF NOT EXISTS annotation_task_history (
id BIGSERIAL PRIMARY KEY,
task_id BIGINT NOT NULL REFERENCES annotation_task(id),
company_id BIGINT NOT NULL REFERENCES sys_company(id),
from_status VARCHAR(20),
to_status VARCHAR(20) NOT NULL,
operator_id BIGINT REFERENCES sys_user(id),
operator_role VARCHAR(20),
comment TEXT,
created_at TIMESTAMP NOT NULL DEFAULT NOW()
-- 无 updated_at仅追加表永不更新
);
CREATE INDEX IF NOT EXISTS idx_annotation_task_history_task_id
ON annotation_task_history (task_id);
CREATE INDEX IF NOT EXISTS idx_annotation_task_history_company_id
ON annotation_task_history (company_id);
-- ============================================================
-- 11. video_process_job视频处理作业
-- ============================================================
CREATE TABLE IF NOT EXISTS video_process_job (
id BIGSERIAL PRIMARY KEY,
company_id BIGINT NOT NULL REFERENCES sys_company(id),
source_id BIGINT NOT NULL REFERENCES source_data(id),
job_type VARCHAR(30) NOT NULL, -- FRAME_EXTRACT / VIDEO_TO_TEXT
status VARCHAR(20) NOT NULL DEFAULT 'PENDING',
-- PENDING / RUNNING / SUCCESS / FAILED / RETRYING
params JSONB, -- 例如 {"frameInterval": 30, "mode": "FRAME"}
output_path VARCHAR(500), -- 完成后的 RustFS 输出路径
retry_count INT NOT NULL DEFAULT 0,
max_retries INT NOT NULL DEFAULT 3,
error_message TEXT,
started_at TIMESTAMP,
completed_at TIMESTAMP,
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_video_process_job_company_id
ON video_process_job (company_id);
CREATE INDEX IF NOT EXISTS idx_video_process_job_source_id
ON video_process_job (source_id);
CREATE INDEX IF NOT EXISTS idx_video_process_job_status
ON video_process_job (status);
-- ============================================================
-- 初始数据
-- ============================================================
-- 1. 演示公司
INSERT INTO sys_company (company_name, company_code, status)
VALUES ('演示公司', 'DEMO', 'ACTIVE')
ON CONFLICT DO NOTHING;
-- 2. 初始用户BCrypt strength=10
-- admin / admin123
-- reviewer01/ review123
-- annotator01/annot123
-- uploader01 / upload123
INSERT INTO sys_user (company_id, username, password_hash, real_name, role, status)
SELECT
c.id,
u.username,
u.password_hash,
u.real_name,
u.role,
'ACTIVE'
FROM sys_company c
CROSS JOIN (VALUES
('admin',
'$2a$10$7EqJtq98hPqEX7fNZaFWoOe2Wo7qnrBCShP0e9r6xLuU3WnYjFWRa',
'管理员',
'ADMIN'),
('reviewer01',
'$2a$10$LbyxRjSqPiN7z0bqQlLn5eL7ZjFQFJuDFMX3/ycVdDkZ5RNY2/7Oi',
'审核员01',
'REVIEWER'),
('annotator01',
'$2a$10$Nrk2z1OdKfHE1j4Sq3JpCOP0K0i5q2jV5tICqX7c1W2YqEwPq8HHi',
'标注员01',
'ANNOTATOR'),
('uploader01',
'$2a$10$3z5q9mLw4r0F6f8v1Xe3AO2bFdG9kK7m5pN1sT8uY4wZ6dH0jI2eR',
'上传员01',
'UPLOADER')
) AS u(username, password_hash, real_name, role)
WHERE c.company_code = 'DEMO'
ON CONFLICT (company_id, username) DO NOTHING;
-- 3. 全局系统配置
INSERT INTO sys_config (company_id, config_key, config_value, description)
VALUES
(NULL, 'token_ttl_seconds', '7200',
'会话凭证有效期(秒)'),
(NULL, 'model_default', 'glm-4',
'AI 辅助默认模型'),
(NULL, 'video_frame_interval', '30',
'视频帧提取间隔(帧数)'),
(NULL, 'prompt_extract_text',
'请提取以下文本中的主语-谓语-宾语三元组以JSON数组格式返回每个元素包含subject、predicate、object、sourceText、startOffset、endOffset字段。',
'文本三元组提取 Prompt 模板'),
(NULL, 'prompt_extract_image',
'请提取图片中的实体关系四元组以JSON数组格式返回每个元素包含subject、relation、object、modifier、confidence字段。',
'图片四元组提取 Prompt 模板'),
(NULL, 'prompt_qa_gen_text',
'根据以下文本三元组生成高质量问答对以JSON数组格式返回每个元素包含question、answer、difficulty字段。',
'文本问答生成 Prompt 模板'),
(NULL, 'prompt_qa_gen_image',
'根据以下图片四元组生成高质量问答对以JSON数组格式返回每个元素包含question、answer、imageRef、difficulty字段。',
'图片问答生成 Prompt 模板')
ON CONFLICT DO NOTHING;