- app/models/text_models.py: TripleItem, SourceOffset, TextExtract{Request,Response}
- app/services/text_service.py: TXT/PDF/DOCX parsing + LLM call + JSON parse
- app/routers/text.py: POST /text/extract handler with Depends injection
- tests/test_text_service.py: 6 unit tests (formats, errors)
- tests/test_text_router.py: 4 router tests (200, 400, 502×2)
- 10/10 tests passing
64 lines
2.0 KiB
Python
64 lines
2.0 KiB
Python
import pytest
|
|
from unittest.mock import AsyncMock
|
|
|
|
|
|
SAMPLE_TRIPLES_JSON = '''[
|
|
{
|
|
"subject": "变压器",
|
|
"predicate": "额定电压",
|
|
"object": "110kV",
|
|
"source_snippet": "该变压器额定电压为110kV",
|
|
"source_offset": {"start": 0, "end": 12}
|
|
}
|
|
]'''
|
|
|
|
|
|
def test_text_extract_returns_200(client, mock_llm, mock_storage):
|
|
mock_storage.download_bytes = AsyncMock(return_value=b"some text content")
|
|
mock_llm.chat = AsyncMock(return_value=SAMPLE_TRIPLES_JSON)
|
|
|
|
resp = client.post(
|
|
"/api/v1/text/extract",
|
|
json={"file_path": "text/test.txt", "file_name": "test.txt"},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "items" in data
|
|
assert data["items"][0]["subject"] == "变压器"
|
|
assert data["items"][0]["source_offset"]["start"] == 0
|
|
|
|
|
|
def test_text_extract_unsupported_format_returns_400(client, mock_storage):
|
|
mock_storage.download_bytes = AsyncMock(return_value=b"data")
|
|
|
|
resp = client.post(
|
|
"/api/v1/text/extract",
|
|
json={"file_path": "text/test.xlsx", "file_name": "data.xlsx"},
|
|
)
|
|
assert resp.status_code == 400
|
|
assert resp.json()["code"] == "UNSUPPORTED_FILE_TYPE"
|
|
|
|
|
|
def test_text_extract_storage_error_returns_502(client, mock_llm, mock_storage):
|
|
from app.core.exceptions import StorageError
|
|
mock_storage.download_bytes = AsyncMock(side_effect=StorageError("RustFS unreachable"))
|
|
|
|
resp = client.post(
|
|
"/api/v1/text/extract",
|
|
json={"file_path": "text/test.txt", "file_name": "test.txt"},
|
|
)
|
|
assert resp.status_code == 502
|
|
assert resp.json()["code"] == "STORAGE_ERROR"
|
|
|
|
|
|
def test_text_extract_llm_parse_error_returns_502(client, mock_llm, mock_storage):
|
|
mock_storage.download_bytes = AsyncMock(return_value=b"content")
|
|
mock_llm.chat = AsyncMock(return_value="not json {{{{")
|
|
|
|
resp = client.post(
|
|
"/api/v1/text/extract",
|
|
json={"file_path": "text/test.txt", "file_name": "test.txt"},
|
|
)
|
|
assert resp.status_code == 502
|
|
assert resp.json()["code"] == "LLM_PARSE_ERROR"
|