feat(US7+US8): finetune management and health check test

- app/models/finetune_models.py: FinetuneStartRequest, FinetuneStartResponse, FinetuneStatusResponse
- app/services/finetune_service.py: submit_finetune + get_finetune_status via run_in_executor; status map running→RUNNING, succeeded→SUCCESS, failed→FAILED, unknown→RUNNING; LLMCallError on SDK failure
- app/routers/finetune.py: POST /finetune/start + GET /finetune/status/{job_id} with get_llm_client dependency
- tests/test_finetune_service.py: 12 unit tests (TDD, written before implementation)
- tests/test_finetune_router.py: 6 integration tests
- tests/test_health.py: GET /health → 200 {"status":"ok"}

Full suite: 72/72 passing (was 53)
This commit is contained in:
wh
2026-04-10 16:27:51 +08:00
parent 00f092e728
commit 603382d1fa
6 changed files with 379 additions and 1 deletions

View File

@@ -0,0 +1,18 @@
from pydantic import BaseModel
class FinetuneStartRequest(BaseModel):
jsonl_url: str
base_model: str
hyperparams: dict | None = None
class FinetuneStartResponse(BaseModel):
job_id: str
class FinetuneStatusResponse(BaseModel):
job_id: str
status: str
progress: int | None = None
error_message: str | None = None

View File

@@ -1,3 +1,28 @@
from fastapi import APIRouter
from fastapi import APIRouter, Depends
from app.clients.llm.base import LLMClient
from app.core.dependencies import get_llm_client
from app.models.finetune_models import (
FinetuneStartRequest,
FinetuneStartResponse,
FinetuneStatusResponse,
)
from app.services import finetune_service
router = APIRouter(tags=["Finetune"])
@router.post("/finetune/start", response_model=FinetuneStartResponse)
async def start_finetune(
req: FinetuneStartRequest,
llm: LLMClient = Depends(get_llm_client),
) -> FinetuneStartResponse:
return await finetune_service.submit_finetune(req, llm)
@router.get("/finetune/status/{job_id}", response_model=FinetuneStatusResponse)
async def get_status(
job_id: str,
llm: LLMClient = Depends(get_llm_client),
) -> FinetuneStatusResponse:
return await finetune_service.get_finetune_status(job_id, llm)

View File

@@ -0,0 +1,61 @@
import asyncio
from app.core.exceptions import LLMCallError
from app.core.logging import get_logger
from app.models.finetune_models import (
FinetuneStartRequest,
FinetuneStartResponse,
FinetuneStatusResponse,
)
logger = get_logger(__name__)
_STATUS_MAP = {
"running": "RUNNING",
"succeeded": "SUCCESS",
"failed": "FAILED",
}
async def submit_finetune(req: FinetuneStartRequest, llm) -> FinetuneStartResponse:
"""Submit a fine-tune job to ZhipuAI and return the job ID."""
loop = asyncio.get_event_loop()
try:
response = await loop.run_in_executor(
None,
lambda: llm._client.fine_tuning.jobs.create(
training_file=req.jsonl_url,
model=req.base_model,
hyperparameters=req.hyperparams or {},
),
)
job_id = response.id
logger.info("finetune_submit", extra={"job_id": job_id, "model": req.base_model})
return FinetuneStartResponse(job_id=job_id)
except Exception as exc:
logger.error("finetune_submit_error", extra={"error": str(exc)})
raise LLMCallError(f"微调任务提交失败: {exc}") from exc
async def get_finetune_status(job_id: str, llm) -> FinetuneStatusResponse:
"""Retrieve fine-tune job status from ZhipuAI."""
loop = asyncio.get_event_loop()
try:
response = await loop.run_in_executor(
None,
lambda: llm._client.fine_tuning.jobs.retrieve(job_id),
)
status_raw = response.status
status = _STATUS_MAP.get(status_raw, "RUNNING") # conservative fallback
progress = getattr(response, "progress", None)
error_message = getattr(response, "error_message", None)
logger.info("finetune_status", extra={"job_id": job_id, "status": status})
return FinetuneStatusResponse(
job_id=job_id,
status=status,
progress=progress,
error_message=error_message,
)
except Exception as exc:
logger.error("finetune_status_error", extra={"job_id": job_id, "error": str(exc)})
raise LLMCallError(f"微调状态查询失败: {exc}") from exc