- app/models/text_models.py: TripleItem, SourceOffset, TextExtract{Request,Response}
- app/services/text_service.py: TXT/PDF/DOCX parsing + LLM call + JSON parse
- app/routers/text.py: POST /text/extract handler with Depends injection
- tests/test_text_service.py: 6 unit tests (formats, errors)
- tests/test_text_router.py: 4 router tests (200, 400, 502×2)
- 10/10 tests passing
26 lines
444 B
Python
26 lines
444 B
Python
from pydantic import BaseModel
|
|
|
|
|
|
class SourceOffset(BaseModel):
|
|
start: int
|
|
end: int
|
|
|
|
|
|
class TripleItem(BaseModel):
|
|
subject: str
|
|
predicate: str
|
|
object: str
|
|
source_snippet: str
|
|
source_offset: SourceOffset
|
|
|
|
|
|
class TextExtractRequest(BaseModel):
|
|
file_path: str
|
|
file_name: str
|
|
model: str | None = None
|
|
prompt_template: str | None = None
|
|
|
|
|
|
class TextExtractResponse(BaseModel):
|
|
items: list[TripleItem]
|