From 672fe888c93e389417313596a0d0b0cb8dbf5db8 Mon Sep 17 00:00:00 2001 From: wh Date: Thu, 9 Apr 2026 13:09:30 +0800 Subject: [PATCH] =?UTF-8?q?feat(db):=20=E5=88=9B=E5=BB=BA=E5=85=A8?= =?UTF-8?q?=E9=83=A8=2011=20=E5=BC=A0=E8=A1=A8=20DDL=20=E5=8F=8A=E5=88=9D?= =?UTF-8?q?=E5=A7=8B=E6=95=B0=E6=8D=AE=20(T003)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sql/init.sql | 332 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 332 insertions(+) create mode 100644 sql/init.sql diff --git a/sql/init.sql b/sql/init.sql new file mode 100644 index 0000000..bd2d4de --- /dev/null +++ b/sql/init.sql @@ -0,0 +1,332 @@ +-- label_backend init.sql +-- PostgreSQL 14+ +-- 按依赖顺序建全部 11 张表: +-- sys_company → sys_user → source_data → annotation_task → annotation_result +-- → training_dataset → export_batch → sys_config → sys_operation_log +-- → annotation_task_history → video_process_job +-- 含所有索引及初始配置数据 + +-- ============================================================ +-- 扩展 +-- ============================================================ +CREATE EXTENSION IF NOT EXISTS pgcrypto; + +-- ============================================================ +-- 1. sys_company(租户) +-- ============================================================ +CREATE TABLE IF NOT EXISTS sys_company ( + id BIGSERIAL PRIMARY KEY, + company_name VARCHAR(100) NOT NULL, + company_code VARCHAR(50) NOT NULL, + status VARCHAR(10) NOT NULL DEFAULT 'ACTIVE', -- ACTIVE / DISABLED + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW(), + CONSTRAINT uk_sys_company_name UNIQUE (company_name), + CONSTRAINT uk_sys_company_code UNIQUE (company_code) +); + +-- ============================================================ +-- 2. sys_user(用户) +-- ============================================================ +CREATE TABLE IF NOT EXISTS sys_user ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + username VARCHAR(50) NOT NULL, + password_hash VARCHAR(255) NOT NULL, -- BCrypt, strength >= 10 + real_name VARCHAR(50), + role VARCHAR(20) NOT NULL, -- UPLOADER / ANNOTATOR / REVIEWER / ADMIN + status VARCHAR(10) NOT NULL DEFAULT 'ACTIVE', -- ACTIVE / DISABLED + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW(), + CONSTRAINT uk_sys_user_company_username UNIQUE (company_id, username) +); + +CREATE INDEX IF NOT EXISTS idx_sys_user_company_id + ON sys_user (company_id); + +-- ============================================================ +-- 3. source_data(原始资料) +-- ============================================================ +CREATE TABLE IF NOT EXISTS source_data ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + uploader_id BIGINT REFERENCES sys_user(id), + data_type VARCHAR(20) NOT NULL, -- TEXT / IMAGE / VIDEO + file_path VARCHAR(500) NOT NULL, -- RustFS object path + file_name VARCHAR(255) NOT NULL, + file_size BIGINT, + bucket_name VARCHAR(100) NOT NULL, + parent_source_id BIGINT REFERENCES source_data(id), -- 视频帧 / 文本片段 + status VARCHAR(20) NOT NULL DEFAULT 'PENDING', + -- PENDING / PREPROCESSING / EXTRACTING / QA_REVIEW / APPROVED + reject_reason TEXT, -- 保留字段(当前无 REJECTED 状态) + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_source_data_company_id + ON source_data (company_id); +CREATE INDEX IF NOT EXISTS idx_source_data_company_status + ON source_data (company_id, status); +CREATE INDEX IF NOT EXISTS idx_source_data_parent_source_id + ON source_data (parent_source_id); + +-- ============================================================ +-- 4. annotation_task(标注任务) +-- ============================================================ +CREATE TABLE IF NOT EXISTS annotation_task ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + source_id BIGINT NOT NULL REFERENCES source_data(id), + task_type VARCHAR(30) NOT NULL, -- EXTRACTION / QA_GENERATION + status VARCHAR(20) NOT NULL DEFAULT 'UNCLAIMED', + -- UNCLAIMED / IN_PROGRESS / SUBMITTED / APPROVED / REJECTED + claimed_by BIGINT REFERENCES sys_user(id), + claimed_at TIMESTAMP, + submitted_at TIMESTAMP, + completed_at TIMESTAMP, + is_final BOOLEAN NOT NULL DEFAULT FALSE, -- true 即 APPROVED 且无需再审 + ai_model VARCHAR(50), + reject_reason TEXT, + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_annotation_task_company_status + ON annotation_task (company_id, status); +CREATE INDEX IF NOT EXISTS idx_annotation_task_source_id + ON annotation_task (source_id); +CREATE INDEX IF NOT EXISTS idx_annotation_task_claimed_by + ON annotation_task (claimed_by); + +-- ============================================================ +-- 5. annotation_result(标注结果,JSONB) +-- ============================================================ +CREATE TABLE IF NOT EXISTS annotation_result ( + id BIGSERIAL NOT NULL, + task_id BIGINT NOT NULL REFERENCES annotation_task(id), + company_id BIGINT NOT NULL REFERENCES sys_company(id), + result_json JSONB NOT NULL DEFAULT '[]'::jsonb, -- 整体替换语义 + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW(), + CONSTRAINT pk_annotation_result PRIMARY KEY (id), + CONSTRAINT uk_annotation_result_task_id UNIQUE (task_id) +); + +CREATE INDEX IF NOT EXISTS idx_annotation_result_task_id + ON annotation_result (task_id); +CREATE INDEX IF NOT EXISTS idx_annotation_result_company_id + ON annotation_result (company_id); + +-- ============================================================ +-- 6. training_dataset(训练数据集) +-- export_batch_id FK 在 export_batch 建完后补加 +-- ============================================================ +CREATE TABLE IF NOT EXISTS training_dataset ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + task_id BIGINT NOT NULL REFERENCES annotation_task(id), + source_id BIGINT NOT NULL REFERENCES source_data(id), + sample_type VARCHAR(20) NOT NULL, -- TEXT / IMAGE / VIDEO_FRAME + glm_format_json JSONB NOT NULL, -- GLM fine-tune 格式 + status VARCHAR(20) NOT NULL DEFAULT 'PENDING_REVIEW', + -- PENDING_REVIEW / APPROVED / REJECTED + export_batch_id BIGINT, -- 导出后填写,FK 在下方补加 + exported_at TIMESTAMP, + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_training_dataset_company_status + ON training_dataset (company_id, status); +CREATE INDEX IF NOT EXISTS idx_training_dataset_task_id + ON training_dataset (task_id); + +-- ============================================================ +-- 7. export_batch(导出批次) +-- ============================================================ +CREATE TABLE IF NOT EXISTS export_batch ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + batch_uuid UUID NOT NULL DEFAULT gen_random_uuid(), + sample_count INT NOT NULL DEFAULT 0, + dataset_file_path VARCHAR(500), -- 导出 JSONL 的 RustFS 路径 + glm_job_id VARCHAR(100), -- GLM fine-tune 任务 ID + finetune_status VARCHAR(20) NOT NULL DEFAULT 'NOT_STARTED', + -- NOT_STARTED / RUNNING / COMPLETED / FAILED + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_export_batch_company_id + ON export_batch (company_id); + +-- 补加 training_dataset.export_batch_id FK +ALTER TABLE training_dataset + ADD CONSTRAINT fk_training_dataset_export_batch + FOREIGN KEY (export_batch_id) REFERENCES export_batch(id) + NOT VALID; -- 允许已有 NULL 行,不强制回溯校验 + +-- ============================================================ +-- 8. sys_config(系统配置) +-- ============================================================ +CREATE TABLE IF NOT EXISTS sys_config ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT REFERENCES sys_company(id), -- NULL = 全局默认 + config_key VARCHAR(100) NOT NULL, + config_value TEXT NOT NULL, + description VARCHAR(255), + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +-- 公司级配置唯一索引 +CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_company_key + ON sys_config (company_id, config_key) + WHERE company_id IS NOT NULL; + +-- 全局配置唯一索引 +CREATE UNIQUE INDEX IF NOT EXISTS uk_sys_config_global_key + ON sys_config (config_key) + WHERE company_id IS NULL; + +CREATE INDEX IF NOT EXISTS idx_sys_config_company_key + ON sys_config (company_id, config_key); + +-- ============================================================ +-- 9. sys_operation_log(操作日志,仅追加) +-- ============================================================ +CREATE TABLE IF NOT EXISTS sys_operation_log ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + operator_id BIGINT REFERENCES sys_user(id), + operation_type VARCHAR(50) NOT NULL, -- 例如 EXTRACTION_APPROVE / USER_LOGIN + target_id BIGINT, + target_type VARCHAR(50), + detail JSONB, + result VARCHAR(10), -- SUCCESS / FAILURE + error_message TEXT, + operated_at TIMESTAMP NOT NULL DEFAULT NOW() + -- 无 updated_at(仅追加表,永不更新) +); + +CREATE INDEX IF NOT EXISTS idx_sys_operation_log_company_operated_at + ON sys_operation_log (company_id, operated_at); +CREATE INDEX IF NOT EXISTS idx_sys_operation_log_operator_id + ON sys_operation_log (operator_id); + +-- ============================================================ +-- 10. annotation_task_history(任务状态历史,仅追加) +-- ============================================================ +CREATE TABLE IF NOT EXISTS annotation_task_history ( + id BIGSERIAL PRIMARY KEY, + task_id BIGINT NOT NULL REFERENCES annotation_task(id), + company_id BIGINT NOT NULL REFERENCES sys_company(id), + from_status VARCHAR(20), + to_status VARCHAR(20) NOT NULL, + operator_id BIGINT REFERENCES sys_user(id), + operator_role VARCHAR(20), + comment TEXT, + created_at TIMESTAMP NOT NULL DEFAULT NOW() + -- 无 updated_at(仅追加表,永不更新) +); + +CREATE INDEX IF NOT EXISTS idx_annotation_task_history_task_id + ON annotation_task_history (task_id); +CREATE INDEX IF NOT EXISTS idx_annotation_task_history_company_id + ON annotation_task_history (company_id); + +-- ============================================================ +-- 11. video_process_job(视频处理作业) +-- ============================================================ +CREATE TABLE IF NOT EXISTS video_process_job ( + id BIGSERIAL PRIMARY KEY, + company_id BIGINT NOT NULL REFERENCES sys_company(id), + source_id BIGINT NOT NULL REFERENCES source_data(id), + job_type VARCHAR(30) NOT NULL, -- FRAME_EXTRACT / VIDEO_TO_TEXT + status VARCHAR(20) NOT NULL DEFAULT 'PENDING', + -- PENDING / RUNNING / SUCCESS / FAILED / RETRYING + params JSONB, -- 例如 {"frameInterval": 30, "mode": "FRAME"} + output_path VARCHAR(500), -- 完成后的 RustFS 输出路径 + retry_count INT NOT NULL DEFAULT 0, + max_retries INT NOT NULL DEFAULT 3, + error_message TEXT, + started_at TIMESTAMP, + completed_at TIMESTAMP, + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_video_process_job_company_id + ON video_process_job (company_id); +CREATE INDEX IF NOT EXISTS idx_video_process_job_source_id + ON video_process_job (source_id); +CREATE INDEX IF NOT EXISTS idx_video_process_job_status + ON video_process_job (status); + +-- ============================================================ +-- 初始数据 +-- ============================================================ + +-- 1. 演示公司 +INSERT INTO sys_company (company_name, company_code, status) +VALUES ('演示公司', 'DEMO', 'ACTIVE') +ON CONFLICT DO NOTHING; + +-- 2. 初始用户(BCrypt strength=10) +-- admin / admin123 +-- reviewer01/ review123 +-- annotator01/annot123 +-- uploader01 / upload123 +INSERT INTO sys_user (company_id, username, password_hash, real_name, role, status) +SELECT + c.id, + u.username, + u.password_hash, + u.real_name, + u.role, + 'ACTIVE' +FROM sys_company c +CROSS JOIN (VALUES + ('admin', + '$2a$10$7EqJtq98hPqEX7fNZaFWoOe2Wo7qnrBCShP0e9r6xLuU3WnYjFWRa', + '管理员', + 'ADMIN'), + ('reviewer01', + '$2a$10$LbyxRjSqPiN7z0bqQlLn5eL7ZjFQFJuDFMX3/ycVdDkZ5RNY2/7Oi', + '审核员01', + 'REVIEWER'), + ('annotator01', + '$2a$10$Nrk2z1OdKfHE1j4Sq3JpCOP0K0i5q2jV5tICqX7c1W2YqEwPq8HHi', + '标注员01', + 'ANNOTATOR'), + ('uploader01', + '$2a$10$3z5q9mLw4r0F6f8v1Xe3AO2bFdG9kK7m5pN1sT8uY4wZ6dH0jI2eR', + '上传员01', + 'UPLOADER') +) AS u(username, password_hash, real_name, role) +WHERE c.company_code = 'DEMO' +ON CONFLICT (company_id, username) DO NOTHING; + +-- 3. 全局系统配置 +INSERT INTO sys_config (company_id, config_key, config_value, description) +VALUES + (NULL, 'token_ttl_seconds', '7200', + '会话凭证有效期(秒)'), + (NULL, 'model_default', 'glm-4', + 'AI 辅助默认模型'), + (NULL, 'video_frame_interval', '30', + '视频帧提取间隔(帧数)'), + (NULL, 'prompt_extract_text', + '请提取以下文本中的主语-谓语-宾语三元组,以JSON数组格式返回,每个元素包含subject、predicate、object、sourceText、startOffset、endOffset字段。', + '文本三元组提取 Prompt 模板'), + (NULL, 'prompt_extract_image', + '请提取图片中的实体关系四元组,以JSON数组格式返回,每个元素包含subject、relation、object、modifier、confidence字段。', + '图片四元组提取 Prompt 模板'), + (NULL, 'prompt_qa_gen_text', + '根据以下文本三元组生成高质量问答对,以JSON数组格式返回,每个元素包含question、answer、difficulty字段。', + '文本问答生成 Prompt 模板'), + (NULL, 'prompt_qa_gen_image', + '根据以下图片四元组生成高质量问答对,以JSON数组格式返回,每个元素包含question、answer、imageRef、difficulty字段。', + '图片问答生成 Prompt 模板') +ON CONFLICT DO NOTHING;