-- label_backend init.sql -- PostgreSQL 14+ -- 按依赖顺序建全部 11 张表: -- sys_company → sys_user → source_data → annotation_task → annotation_result -- → training_dataset → export_batch → sys_config → sys_operation_log -- → annotation_task_history → video_process_job -- 含所有索引及初始配置数据 -- ============================================================ -- 扩展 -- ============================================================ CREATE EXTENSION IF NOT EXISTS pgcrypto; -- ============================================================ -- 1. sys_company(租户) -- ============================================================ CREATE TABLE IF NOT EXISTS sys_company ( id BIGSERIAL PRIMARY KEY, company_name VARCHAR(100) NOT NULL, company_code VARCHAR(50) NOT NULL, status VARCHAR(10) NOT NULL DEFAULT 'ACTIVE', -- ACTIVE / DISABLED created_at TIMESTAMP NOT NULL DEFAULT NOW(), updated_at TIMESTAMP NOT NULL DEFAULT NOW(), CONSTRAINT uk_sys_company_name UNIQUE (company_name), CONSTRAINT uk_sys_company_code UNIQUE (company_code) ); -- ============================================================ -- 2. sys_user(用户) -- ============================================================ CREATE TABLE IF NOT EXISTS sys_user ( id BIGSERIAL PRIMARY KEY, company_id BIGINT NOT NULL REFERENCES sys_company(id), username VARCHAR(50) NOT NULL, password_hash VARCHAR(255) NOT NULL, -- BCrypt, strength >= 10 real_name VARCHAR(50), role VARCHAR(20) NOT NULL, -- UPLOADER / ANNOTATOR / REVIEWER / ADMIN status VARCHAR(10) NOT NULL DEFAULT 'ACTIVE', -- ACTIVE / DISABLED created_at TIMESTAMP NOT NULL DEFAULT NOW(), updated_at TIMESTAMP NOT NULL DEFAULT NOW(), CONSTRAINT uk_sys_user_company_username UNIQUE (company_id, username) ); CREATE INDEX IF NOT EXISTS idx_sys_user_company_id ON sys_user (company_id); -- ============================================================ -- 3. source_data(原始资料) -- ============================================================ CREATE TABLE IF NOT EXISTS source_data ( id BIGSERIAL PRIMARY KEY, company_id BIGINT NOT NULL REFERENCES sys_company(id), uploader_id BIGINT REFERENCES sys_user(id), data_type VARCHAR(20) NOT NULL, -- TEXT / IMAGE / VIDEO file_path VARCHAR(500) NOT NULL, -- RustFS object path file_name VARCHAR(255) NOT NULL, file_size BIGINT, bucket_name VARCHAR(100) NOT NULL, parent_source_id BIGINT REFERENCES source_data(id), -- 视频帧 / 文本片段 status VARCHAR(20) NOT NULL DEFAULT 'PENDING', -- PENDING / PREPROCESSING / EXTRACTING / QA_REVIEW / APPROVED reject_reason TEXT, -- 保留字段(当前无 REJECTED 状态) created_at TIMESTAMP NOT NULL DEFAULT NOW(), updated_at TIMESTAMP NOT NULL DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_source_data_company_id ON source_data (company_id); CREATE INDEX IF NOT EXISTS idx_source_data_company_status ON source_data (company_id, status); CREATE INDEX IF NOT EXISTS idx_source_data_parent_source_id ON source_data (parent_source_id); -- ============================================================ -- 4. annotation_task(标注任务) -- ============================================================ CREATE TABLE IF NOT EXISTS annotation_task ( id BIGSERIAL PRIMARY KEY, company_id BIGINT NOT NULL REFERENCES sys_company(id), source_id BIGINT NOT NULL REFERENCES source_data(id), task_type VARCHAR(30) NOT NULL, -- EXTRACTION / QA_GENERATION status VARCHAR(20) NOT NULL DEFAULT 'UNCLAIMED', -- UNCLAIMED / IN_PROGRESS / SUBMITTED / APPROVED / REJECTED claimed_by BIGINT REFERENCES sys_user(id), claimed_at TIMESTAMP, submitted_at TIMESTAMP, completed_at TIMESTAMP, is_final BOOLEAN NOT NULL DEFAULT FALSE, -- true 即 APPROVED 且无需再审 ai_model VARCHAR(50), reject_reason TEXT, created_at TIMESTAMP NOT NULL DEFAULT NOW(), updated_at TIMESTAMP NOT NULL DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_annotation_task_company_status ON annotation_task (company_id, status); CREATE INDEX IF NOT EXISTS idx_annotation_task_source_id ON annotation_task (source_id); CREATE INDEX IF NOT EXISTS idx_annotation_task_claimed_by ON annotation_task (claimed_by); -- ============================================================ -- 5. annotation_result(标注结果,JSONB) -- ============================================================ CREATE TABLE IF NOT EXISTS annotation_result ( id BIGSERIAL NOT NULL, task_id BIGINT NOT NULL REFERENCES annotation_task(id), company_id BIGINT NOT NULL REFERENCES sys_company(id), result_json JSONB NOT NULL DEFAULT '[]'::jsonb, -- 整体替换语义 created_at TIMESTAMP NOT NULL DEFAULT NOW(), updated_at TIMESTAMP NOT NULL DEFAULT NOW(), CONSTRAINT pk_annotation_result PRIMARY KEY (id), CONSTRAINT uk_annotation_result_task_id UNIQUE (task_id) ); CREATE INDEX IF NOT EXISTS idx_annotation_result_task_id ON annotation_result (task_id); CREATE INDEX IF NOT EXISTS idx_annotation_result_company_id ON annotation_result (company_id); -- ============================================================ -- 6. training_dataset(训练数据集) -- export_batch_id FK 在 export_batch 建完后补加 -- ============================================================ CREATE TABLE IF NOT EXISTS training_dataset ( id BIGSERIAL PRIMARY KEY, company_id BIGINT NOT NULL REFERENCES sys_company(id), task_id BIGINT NOT NULL REFERENCES annotation_task(id), source_id BIGINT NOT NULL REFERENCES source_data(id), sample_type VARCHAR(20) NOT NULL, -- TEXT / IMAGE / VIDEO_FRAME glm_format_json JSONB NOT NULL, -- GLM fine-tune 格式 status VARCHAR(20) NOT NULL DEFAULT 'PENDING_REVIEW', -- PENDING_REVIEW / APPROVED / REJECTED export_batch_id BIGINT, -- 导出后填写,FK 在下方补加 exported_at TIMESTAMP, created_at TIMESTAMP NOT NULL DEFAULT NOW(), updated_at TIMESTAMP NOT NULL DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_training_dataset_company_status ON training_dataset (company_id, status); CREATE INDEX IF NOT EXISTS idx_training_dataset_task_id ON training_dataset (task_id); -- ============================================================ -- 7. export_batch(导出批次) -- ============================================================ CREATE TABLE IF NOT EXISTS export_batch ( id BIGSERIAL PRIMARY KEY, company_id BIGINT NOT NULL REFERENCES sys_company(id), batch_uuid UUID NOT NULL DEFAULT gen_random_uuid(), sample_count INT NOT NULL DEFAULT 0, dataset_file_path VARCHAR(500), -- 导出 JSONL 的 RustFS 路径 glm_job_id VARCHAR(100), -- GLM fine-tune 任务 ID finetune_status VARCHAR(20) NOT NULL DEFAULT 'NOT_STARTED', -- NOT_STARTED / RUNNING / COMPLETED / FAILED created_at TIMESTAMP NOT NULL DEFAULT NOW(), updated_at TIMESTAMP NOT NULL DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_export_batch_company_id ON export_batch (company_id); -- 补加 training_dataset.export_batch_id FK ALTER TABLE training_dataset ADD CONSTRAINT fk_training_dataset_export_batch FOREIGN KEY (export_batch_id) REFERENCES export_batch(id) NOT VALID; -- 允许已有 NULL 行,不强制回溯校验 -- ============================================================ -- 8. sys_config(系统配置) -- ============================================================ CREATE TABLE IF NOT EXISTS sys_config ( id BIGSERIAL PRIMARY KEY, company_id BIGINT REFERENCES sys_company(id), -- NULL = 全局默认 config_key VARCHAR(100) NOT NULL, config_value TEXT NOT NULL, description VARCHAR(255), created_at TIMESTAMP NOT NULL DEFAULT NOW(), updated_at TIMESTAMP NOT NULL DEFAULT NOW() ); -- 公司级配置唯一索引 CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_company_key ON sys_config (company_id, config_key) WHERE company_id IS NOT NULL; -- 全局配置唯一索引 CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_global_key ON sys_config (config_key) WHERE company_id IS NULL; CREATE INDEX IF NOT EXISTS idx_sys_config_company_key ON sys_config (company_id, config_key); -- ============================================================ -- 9. sys_operation_log(操作日志,仅追加) -- ============================================================ CREATE TABLE IF NOT EXISTS sys_operation_log ( id BIGSERIAL PRIMARY KEY, company_id BIGINT NOT NULL REFERENCES sys_company(id), operator_id BIGINT REFERENCES sys_user(id), operation_type VARCHAR(50) NOT NULL, -- 例如 EXTRACTION_APPROVE / USER_LOGIN target_id BIGINT, target_type VARCHAR(50), detail JSONB, result VARCHAR(10), -- SUCCESS / FAILURE error_message TEXT, operated_at TIMESTAMP NOT NULL DEFAULT NOW() -- 无 updated_at(仅追加表,永不更新) ); CREATE INDEX IF NOT EXISTS idx_sys_operation_log_company_operated_at ON sys_operation_log (company_id, operated_at); CREATE INDEX IF NOT EXISTS idx_sys_operation_log_operator_id ON sys_operation_log (operator_id); -- ============================================================ -- 10. annotation_task_history(任务状态历史,仅追加) -- ============================================================ CREATE TABLE IF NOT EXISTS annotation_task_history ( id BIGSERIAL PRIMARY KEY, task_id BIGINT NOT NULL REFERENCES annotation_task(id), company_id BIGINT NOT NULL REFERENCES sys_company(id), from_status VARCHAR(20), to_status VARCHAR(20) NOT NULL, operator_id BIGINT REFERENCES sys_user(id), operator_role VARCHAR(20), comment TEXT, created_at TIMESTAMP NOT NULL DEFAULT NOW() -- 无 updated_at(仅追加表,永不更新) ); CREATE INDEX IF NOT EXISTS idx_annotation_task_history_task_id ON annotation_task_history (task_id); CREATE INDEX IF NOT EXISTS idx_annotation_task_history_company_id ON annotation_task_history (company_id); -- ============================================================ -- 11. video_process_job(视频处理作业) -- ============================================================ CREATE TABLE IF NOT EXISTS video_process_job ( id BIGSERIAL PRIMARY KEY, company_id BIGINT NOT NULL REFERENCES sys_company(id), source_id BIGINT NOT NULL REFERENCES source_data(id), job_type VARCHAR(30) NOT NULL, -- FRAME_EXTRACT / VIDEO_TO_TEXT status VARCHAR(20) NOT NULL DEFAULT 'PENDING', -- PENDING / RUNNING / SUCCESS / FAILED / RETRYING params JSONB, -- 例如 {"frameInterval": 30, "mode": "FRAME"} output_path VARCHAR(500), -- 完成后的 RustFS 输出路径 retry_count INT NOT NULL DEFAULT 0, max_retries INT NOT NULL DEFAULT 3, error_message TEXT, started_at TIMESTAMP, completed_at TIMESTAMP, created_at TIMESTAMP NOT NULL DEFAULT NOW(), updated_at TIMESTAMP NOT NULL DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_video_process_job_company_id ON video_process_job (company_id); CREATE INDEX IF NOT EXISTS idx_video_process_job_source_id ON video_process_job (source_id); CREATE INDEX IF NOT EXISTS idx_video_process_job_status ON video_process_job (status); -- ============================================================ -- 初始数据 -- ============================================================ -- 1. 演示公司 INSERT INTO sys_company (company_name, company_code, status) VALUES ('演示公司', 'DEMO', 'ACTIVE') ON CONFLICT DO NOTHING; -- 2. 初始用户(BCrypt strength=10) -- admin / admin123 -- reviewer01/ review123 -- annotator01/annot123 -- uploader01 / upload123 INSERT INTO sys_user (company_id, username, password_hash, real_name, role, status) SELECT c.id, u.username, u.password_hash, u.real_name, u.role, 'ACTIVE' FROM sys_company c CROSS JOIN (VALUES ('admin', '$2a$10$7EqJtq98hPqEX7fNZaFWoOe2Wo7qnrBCShP0e9r6xLuU3WnYjFWRa', '管理员', 'ADMIN'), ('reviewer01', '$2a$10$LbyxRjSqPiN7z0bqQlLn5eL7ZjFQFJuDFMX3/ycVdDkZ5RNY2/7Oi', '审核员01', 'REVIEWER'), ('annotator01', '$2a$10$Nrk2z1OdKfHE1j4Sq3JpCOP0K0i5q2jV5tICqX7c1W2YqEwPq8HHi', '标注员01', 'ANNOTATOR'), ('uploader01', '$2a$10$3z5q9mLw4r0F6f8v1Xe3AO2bFdG9kK7m5pN1sT8uY4wZ6dH0jI2eR', '上传员01', 'UPLOADER') ) AS u(username, password_hash, real_name, role) WHERE c.company_code = 'DEMO' ON CONFLICT (company_id, username) DO NOTHING; -- 3. 全局系统配置 INSERT INTO sys_config (company_id, config_key, config_value, description) VALUES (NULL, 'token_ttl_seconds', '7200', '会话凭证有效期(秒)'), (NULL, 'model_default', 'glm-4', 'AI 辅助默认模型'), (NULL, 'video_frame_interval', '30', '视频帧提取间隔(帧数)'), (NULL, 'prompt_extract_text', '请提取以下文本中的主语-谓语-宾语三元组,以JSON数组格式返回,每个元素包含subject、predicate、object、sourceText、startOffset、endOffset字段。', '文本三元组提取 Prompt 模板'), (NULL, 'prompt_extract_image', '请提取图片中的实体关系四元组,以JSON数组格式返回,每个元素包含subject、relation、object、modifier、confidence字段。', '图片四元组提取 Prompt 模板'), (NULL, 'prompt_qa_gen_text', '根据以下文本三元组生成高质量问答对,以JSON数组格式返回,每个元素包含question、answer、difficulty字段。', '文本问答生成 Prompt 模板'), (NULL, 'prompt_qa_gen_image', '根据以下图片四元组生成高质量问答对,以JSON数组格式返回,每个元素包含question、answer、imageRef、difficulty字段。', '图片问答生成 Prompt 模板') ON CONFLICT DO NOTHING;