feat(US7+US8): finetune management and health check test
- app/models/finetune_models.py: FinetuneStartRequest, FinetuneStartResponse, FinetuneStatusResponse
- app/services/finetune_service.py: submit_finetune + get_finetune_status via run_in_executor; status map running→RUNNING, succeeded→SUCCESS, failed→FAILED, unknown→RUNNING; LLMCallError on SDK failure
- app/routers/finetune.py: POST /finetune/start + GET /finetune/status/{job_id} with get_llm_client dependency
- tests/test_finetune_service.py: 12 unit tests (TDD, written before implementation)
- tests/test_finetune_router.py: 6 integration tests
- tests/test_health.py: GET /health → 200 {"status":"ok"}
Full suite: 72/72 passing (was 53)
This commit is contained in:
18
app/models/finetune_models.py
Normal file
18
app/models/finetune_models.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class FinetuneStartRequest(BaseModel):
|
||||
jsonl_url: str
|
||||
base_model: str
|
||||
hyperparams: dict | None = None
|
||||
|
||||
|
||||
class FinetuneStartResponse(BaseModel):
|
||||
job_id: str
|
||||
|
||||
|
||||
class FinetuneStatusResponse(BaseModel):
|
||||
job_id: str
|
||||
status: str
|
||||
progress: int | None = None
|
||||
error_message: str | None = None
|
||||
@@ -1,3 +1,28 @@
|
||||
from fastapi import APIRouter
|
||||
from fastapi import APIRouter, Depends
|
||||
|
||||
from app.clients.llm.base import LLMClient
|
||||
from app.core.dependencies import get_llm_client
|
||||
from app.models.finetune_models import (
|
||||
FinetuneStartRequest,
|
||||
FinetuneStartResponse,
|
||||
FinetuneStatusResponse,
|
||||
)
|
||||
from app.services import finetune_service
|
||||
|
||||
router = APIRouter(tags=["Finetune"])
|
||||
|
||||
|
||||
@router.post("/finetune/start", response_model=FinetuneStartResponse)
|
||||
async def start_finetune(
|
||||
req: FinetuneStartRequest,
|
||||
llm: LLMClient = Depends(get_llm_client),
|
||||
) -> FinetuneStartResponse:
|
||||
return await finetune_service.submit_finetune(req, llm)
|
||||
|
||||
|
||||
@router.get("/finetune/status/{job_id}", response_model=FinetuneStatusResponse)
|
||||
async def get_status(
|
||||
job_id: str,
|
||||
llm: LLMClient = Depends(get_llm_client),
|
||||
) -> FinetuneStatusResponse:
|
||||
return await finetune_service.get_finetune_status(job_id, llm)
|
||||
|
||||
61
app/services/finetune_service.py
Normal file
61
app/services/finetune_service.py
Normal file
@@ -0,0 +1,61 @@
|
||||
import asyncio
|
||||
|
||||
from app.core.exceptions import LLMCallError
|
||||
from app.core.logging import get_logger
|
||||
from app.models.finetune_models import (
|
||||
FinetuneStartRequest,
|
||||
FinetuneStartResponse,
|
||||
FinetuneStatusResponse,
|
||||
)
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
_STATUS_MAP = {
|
||||
"running": "RUNNING",
|
||||
"succeeded": "SUCCESS",
|
||||
"failed": "FAILED",
|
||||
}
|
||||
|
||||
|
||||
async def submit_finetune(req: FinetuneStartRequest, llm) -> FinetuneStartResponse:
|
||||
"""Submit a fine-tune job to ZhipuAI and return the job ID."""
|
||||
loop = asyncio.get_event_loop()
|
||||
try:
|
||||
response = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: llm._client.fine_tuning.jobs.create(
|
||||
training_file=req.jsonl_url,
|
||||
model=req.base_model,
|
||||
hyperparameters=req.hyperparams or {},
|
||||
),
|
||||
)
|
||||
job_id = response.id
|
||||
logger.info("finetune_submit", extra={"job_id": job_id, "model": req.base_model})
|
||||
return FinetuneStartResponse(job_id=job_id)
|
||||
except Exception as exc:
|
||||
logger.error("finetune_submit_error", extra={"error": str(exc)})
|
||||
raise LLMCallError(f"微调任务提交失败: {exc}") from exc
|
||||
|
||||
|
||||
async def get_finetune_status(job_id: str, llm) -> FinetuneStatusResponse:
|
||||
"""Retrieve fine-tune job status from ZhipuAI."""
|
||||
loop = asyncio.get_event_loop()
|
||||
try:
|
||||
response = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: llm._client.fine_tuning.jobs.retrieve(job_id),
|
||||
)
|
||||
status_raw = response.status
|
||||
status = _STATUS_MAP.get(status_raw, "RUNNING") # conservative fallback
|
||||
progress = getattr(response, "progress", None)
|
||||
error_message = getattr(response, "error_message", None)
|
||||
logger.info("finetune_status", extra={"job_id": job_id, "status": status})
|
||||
return FinetuneStatusResponse(
|
||||
job_id=job_id,
|
||||
status=status,
|
||||
progress=progress,
|
||||
error_message=error_message,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("finetune_status_error", extra={"job_id": job_id, "error": str(exc)})
|
||||
raise LLMCallError(f"微调状态查询失败: {exc}") from exc
|
||||
Reference in New Issue
Block a user