feat(US1): text triple extraction — POST /api/v1/text/extract

- app/models/text_models.py: TripleItem, SourceOffset, TextExtract{Request,Response}
- app/services/text_service.py: TXT/PDF/DOCX parsing + LLM call + JSON parse
- app/routers/text.py: POST /text/extract handler with Depends injection
- tests/test_text_service.py: 6 unit tests (formats, errors)
- tests/test_text_router.py: 4 router tests (200, 400, 502×2)
- 10/10 tests passing
This commit is contained in:
wh
2026-04-10 15:27:27 +08:00
parent e1eb5e47b1
commit dd8da386f4
18 changed files with 321 additions and 1 deletions

View File

@@ -1,3 +1,18 @@
from fastapi import APIRouter
from fastapi import APIRouter, Depends
from app.clients.llm.base import LLMClient
from app.clients.storage.base import StorageClient
from app.core.dependencies import get_llm_client, get_storage_client
from app.models.text_models import TextExtractRequest, TextExtractResponse
from app.services import text_service
router = APIRouter(tags=["Text"])
@router.post("/text/extract", response_model=TextExtractResponse)
async def extract_text(
req: TextExtractRequest,
llm: LLMClient = Depends(get_llm_client),
storage: StorageClient = Depends(get_storage_client),
) -> TextExtractResponse:
return await text_service.extract_triples(req, llm, storage)