feat(US7+US8): finetune management and health check test

- app/models/finetune_models.py: FinetuneStartRequest, FinetuneStartResponse, FinetuneStatusResponse
- app/services/finetune_service.py: submit_finetune + get_finetune_status via run_in_executor; status map running→RUNNING, succeeded→SUCCESS, failed→FAILED, unknown→RUNNING; LLMCallError on SDK failure
- app/routers/finetune.py: POST /finetune/start + GET /finetune/status/{job_id} with get_llm_client dependency
- tests/test_finetune_service.py: 12 unit tests (TDD, written before implementation)
- tests/test_finetune_router.py: 6 integration tests
- tests/test_health.py: GET /health → 200 {"status":"ok"}

Full suite: 72/72 passing (was 53)
This commit is contained in:
wh
2026-04-10 16:27:51 +08:00
parent 00f092e728
commit 603382d1fa
6 changed files with 379 additions and 1 deletions

View File

@@ -0,0 +1,61 @@
import asyncio
from app.core.exceptions import LLMCallError
from app.core.logging import get_logger
from app.models.finetune_models import (
FinetuneStartRequest,
FinetuneStartResponse,
FinetuneStatusResponse,
)
logger = get_logger(__name__)
_STATUS_MAP = {
"running": "RUNNING",
"succeeded": "SUCCESS",
"failed": "FAILED",
}
async def submit_finetune(req: FinetuneStartRequest, llm) -> FinetuneStartResponse:
"""Submit a fine-tune job to ZhipuAI and return the job ID."""
loop = asyncio.get_event_loop()
try:
response = await loop.run_in_executor(
None,
lambda: llm._client.fine_tuning.jobs.create(
training_file=req.jsonl_url,
model=req.base_model,
hyperparameters=req.hyperparams or {},
),
)
job_id = response.id
logger.info("finetune_submit", extra={"job_id": job_id, "model": req.base_model})
return FinetuneStartResponse(job_id=job_id)
except Exception as exc:
logger.error("finetune_submit_error", extra={"error": str(exc)})
raise LLMCallError(f"微调任务提交失败: {exc}") from exc
async def get_finetune_status(job_id: str, llm) -> FinetuneStatusResponse:
"""Retrieve fine-tune job status from ZhipuAI."""
loop = asyncio.get_event_loop()
try:
response = await loop.run_in_executor(
None,
lambda: llm._client.fine_tuning.jobs.retrieve(job_id),
)
status_raw = response.status
status = _STATUS_MAP.get(status_raw, "RUNNING") # conservative fallback
progress = getattr(response, "progress", None)
error_message = getattr(response, "error_message", None)
logger.info("finetune_status", extra={"job_id": job_id, "status": status})
return FinetuneStatusResponse(
job_id=job_id,
status=status,
progress=progress,
error_message=error_message,
)
except Exception as exc:
logger.error("finetune_status_error", extra={"job_id": job_id, "error": str(exc)})
raise LLMCallError(f"微调状态查询失败: {exc}") from exc