refactor: finetune through LLMClient interface + get_running_loop

- Add submit_finetune and get_finetune_status abstract methods to LLMClient base - Implement both methods in ZhipuAIClient using asyncio.get_running_loop() - Rewrite finetune_service to call llm.submit_finetune / llm.get_finetune_status instead of accessing llm._client directly, restoring interface encapsulation - Replace asyncio.get_event_loop() with get_running_loop() in ZhipuAIClient._call and all four methods in RustFSClient (deprecated in Python 3.10+) - Update test_finetune_service to mock the LLMClient interface methods as AsyncMocks - Add two new tests in test_llm_client for submit_finetune and get_finetune_status
2026-04-10 16:43:28 +08:00
parent 603382d1fa
commit 0880e1018c
6 changed files with 130 additions and 79 deletions
--- a/app/clients/llm/base.py
+++ b/app/clients/llm/base.py
@@ -9,3 +9,11 @@ class LLMClient(ABC):
    @abstractmethod
    async def chat_vision(self, model: str, messages: list[dict]) -> str:
        """Send a multimodal (vision) chat request and return the response content string."""
+
+    @abstractmethod
+    async def submit_finetune(self, jsonl_url: str, base_model: str, hyperparams: dict) -> str:
+        """Submit a fine-tune job and return the job_id."""
+
+    @abstractmethod
+    async def get_finetune_status(self, job_id: str) -> dict:
+        """Return a dict with keys: job_id, status (raw SDK string), progress (int|None), error_message (str|None)."""
--- a/app/clients/llm/zhipuai_client.py
+++ b/app/clients/llm/zhipuai_client.py
@@ -19,8 +19,39 @@ class ZhipuAIClient(LLMClient):
    async def chat_vision(self, model: str, messages: list[dict]) -> str:
        return await self._call(model, messages)

+    async def submit_finetune(self, jsonl_url: str, base_model: str, hyperparams: dict) -> str:
+        loop = asyncio.get_running_loop()
+        try:
+            resp = await loop.run_in_executor(
+                None,
+                lambda: self._client.fine_tuning.jobs.create(
+                    training_file=jsonl_url,
+                    model=base_model,
+                    hyperparameters=hyperparams,
+                ),
+            )
+            return resp.id
+        except Exception as exc:
+            raise LLMCallError(f"微调任务提交失败: {exc}") from exc
+
+    async def get_finetune_status(self, job_id: str) -> dict:
+        loop = asyncio.get_running_loop()
+        try:
+            resp = await loop.run_in_executor(
+                None,
+                lambda: self._client.fine_tuning.jobs.retrieve(job_id),
+            )
+            return {
+                "job_id": resp.id,
+                "status": resp.status,
+                "progress": int(resp.progress) if getattr(resp, "progress", None) is not None else None,
+                "error_message": getattr(resp, "error_message", None),
+            }
+        except Exception as exc:
+            raise LLMCallError(f"查询微调任务失败: {exc}") from exc
+
    async def _call(self, model: str, messages: list[dict]) -> str:
-        loop = asyncio.get_event_loop()
+        loop = asyncio.get_running_loop()
        try:
            response = await loop.run_in_executor(
                None,
--- a/app/clients/storage/rustfs_client.py
+++ b/app/clients/storage/rustfs_client.py
@@ -21,7 +21,7 @@ class RustFSClient(StorageClient):
        )

    async def download_bytes(self, bucket: str, path: str) -> bytes:
-        loop = asyncio.get_event_loop()
+        loop = asyncio.get_running_loop()
        try:
            resp = await loop.run_in_executor(
                None, lambda: self._s3.get_object(Bucket=bucket, Key=path)
@@ -33,7 +33,7 @@ class RustFSClient(StorageClient):
    async def upload_bytes(
        self, bucket: str, path: str, data: bytes, content_type: str = "application/octet-stream"
    ) -> None:
-        loop = asyncio.get_event_loop()
+        loop = asyncio.get_running_loop()
        try:
            await loop.run_in_executor(
                None,
@@ -45,7 +45,7 @@ class RustFSClient(StorageClient):
            raise StorageError(f"存储上传失败 [{bucket}/{path}]: {exc}") from exc

    async def get_presigned_url(self, bucket: str, path: str, expires: int = 3600) -> str:
-        loop = asyncio.get_event_loop()
+        loop = asyncio.get_running_loop()
        try:
            url = await loop.run_in_executor(
                None,
@@ -60,7 +60,7 @@ class RustFSClient(StorageClient):
            raise StorageError(f"生成预签名 URL 失败 [{bucket}/{path}]: {exc}") from exc

    async def get_object_size(self, bucket: str, path: str) -> int:
-        loop = asyncio.get_event_loop()
+        loop = asyncio.get_running_loop()
        try:
            resp = await loop.run_in_executor(
                None, lambda: self._s3.head_object(Bucket=bucket, Key=path)
--- a/app/services/finetune_service.py
+++ b/app/services/finetune_service.py
@@ -1,6 +1,4 @@
-import asyncio
-
-from app.core.exceptions import LLMCallError
+from app.clients.llm.base import LLMClient
 from app.core.logging import get_logger
 from app.models.finetune_models import (
    FinetuneStartRequest,
@@ -17,45 +15,21 @@ _STATUS_MAP = {
 }


-async def submit_finetune(req: FinetuneStartRequest, llm) -> FinetuneStartResponse:
-    """Submit a fine-tune job to ZhipuAI and return the job ID."""
-    loop = asyncio.get_event_loop()
-    try:
-        response = await loop.run_in_executor(
-            None,
-            lambda: llm._client.fine_tuning.jobs.create(
-                training_file=req.jsonl_url,
-                model=req.base_model,
-                hyperparameters=req.hyperparams or {},
-            ),
-        )
-        job_id = response.id
-        logger.info("finetune_submit", extra={"job_id": job_id, "model": req.base_model})
-        return FinetuneStartResponse(job_id=job_id)
-    except Exception as exc:
-        logger.error("finetune_submit_error", extra={"error": str(exc)})
-        raise LLMCallError(f"微调任务提交失败: {exc}") from exc
+async def submit_finetune(req: FinetuneStartRequest, llm: LLMClient) -> FinetuneStartResponse:
+    """Submit a fine-tune job via the LLMClient interface and return the job ID."""
+    job_id = await llm.submit_finetune(req.jsonl_url, req.base_model, req.hyperparams or {})
+    logger.info("finetune_submit", extra={"job_id": job_id, "model": req.base_model})
+    return FinetuneStartResponse(job_id=job_id)


-async def get_finetune_status(job_id: str, llm) -> FinetuneStatusResponse:
-    """Retrieve fine-tune job status from ZhipuAI."""
-    loop = asyncio.get_event_loop()
-    try:
-        response = await loop.run_in_executor(
-            None,
-            lambda: llm._client.fine_tuning.jobs.retrieve(job_id),
-        )
-        status_raw = response.status
-        status = _STATUS_MAP.get(status_raw, "RUNNING")  # conservative fallback
-        progress = getattr(response, "progress", None)
-        error_message = getattr(response, "error_message", None)
-        logger.info("finetune_status", extra={"job_id": job_id, "status": status})
-        return FinetuneStatusResponse(
-            job_id=job_id,
-            status=status,
-            progress=progress,
-            error_message=error_message,
-        )
-    except Exception as exc:
-        logger.error("finetune_status_error", extra={"job_id": job_id, "error": str(exc)})
-        raise LLMCallError(f"微调状态查询失败: {exc}") from exc
+async def get_finetune_status(job_id: str, llm: LLMClient) -> FinetuneStatusResponse:
+    """Retrieve fine-tune job status via the LLMClient interface."""
+    raw = await llm.get_finetune_status(job_id)
+    status = _STATUS_MAP.get(raw["status"], "RUNNING")
+    logger.info("finetune_status", extra={"job_id": job_id, "status": status})
+    return FinetuneStatusResponse(
+        job_id=raw["job_id"],
+        status=status,
+        progress=raw["progress"],
+        error_message=raw["error_message"],
+    )