From e1eb5e47b1c90c5d7a784fa50cf0a07b6cd8e9b4 Mon Sep 17 00:00:00 2001 From: wh Date: Fri, 10 Apr 2026 15:22:45 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20Phase=201+2=20=E2=80=94=20project=20set?= =?UTF-8?q?up=20and=20core=20infrastructure?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - requirements.txt, config.yaml, .env, Dockerfile, docker-compose.yml - app/core: config (YAML+env override), logging (JSON structured), exceptions (typed hierarchy), json_utils (Markdown fence stripping) - app/clients: LLMClient ABC + ZhipuAIClient (run_in_executor), StorageClient ABC + RustFSClient (boto3 head_object for size check) - app/main.py: FastAPI app with health endpoint and router registration - app/core/dependencies.py: lru_cache singleton factories - tests/conftest.py: mock_llm, mock_storage, test_app, client fixtures - pytest.ini: asyncio_mode=auto - 11 unit tests passing --- .env | 10 +++ .gitignore | 32 ++++++++ Dockerfile | 18 +++++ app/__init__.py | 0 app/__pycache__/__init__.cpython-312.pyc | Bin 0 -> 148 bytes app/clients/__init__.py | 0 .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 156 bytes app/clients/llm/__init__.py | 0 .../llm/__pycache__/__init__.cpython-312.pyc | Bin 0 -> 160 bytes .../llm/__pycache__/base.cpython-312.pyc | Bin 0 -> 1003 bytes .../zhipuai_client.cpython-312.pyc | Bin 0 -> 2755 bytes app/clients/llm/base.py | 11 +++ app/clients/llm/zhipuai_client.py | 37 +++++++++ app/clients/storage/__init__.py | 0 .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 164 bytes .../storage/__pycache__/base.cpython-312.pyc | Bin 0 -> 1638 bytes .../__pycache__/rustfs_client.cpython-312.pyc | Bin 0 -> 4935 bytes app/clients/storage/base.py | 21 ++++++ app/clients/storage/rustfs_client.py | 70 ++++++++++++++++++ app/core/__init__.py | 0 app/core/__pycache__/__init__.cpython-312.pyc | Bin 0 -> 153 bytes app/core/__pycache__/config.cpython-312.pyc | Bin 0 -> 2170 bytes .../__pycache__/dependencies.cpython-312.pyc | Bin 0 -> 1188 bytes .../__pycache__/exceptions.cpython-312.pyc | Bin 0 -> 2473 bytes .../__pycache__/json_utils.cpython-312.pyc | Bin 0 -> 1045 bytes app/core/__pycache__/logging.cpython-312.pyc | Bin 0 -> 3690 bytes app/core/config.py | 46 ++++++++++++ app/core/dependencies.py | 23 ++++++ app/core/exceptions.py | 50 +++++++++++++ app/core/json_utils.py | 19 +++++ app/core/logging.py | 62 ++++++++++++++++ app/main.py | 46 ++++++++++++ app/models/__init__.py | 0 app/routers/__init__.py | 0 app/routers/finetune.py | 3 + app/routers/image.py | 3 + app/routers/qa.py | 3 + app/routers/text.py | 3 + app/routers/video.py | 3 + app/services/__init__.py | 0 config.yaml | 19 +++++ docker-compose.yml | 37 +++++++++ pytest.ini | 3 + requirements.txt | 16 ++++ tests/__init__.py | 0 tests/__pycache__/__init__.cpython-312.pyc | Bin 0 -> 150 bytes .../conftest.cpython-312-pytest-9.0.3.pyc | Bin 0 -> 2414 bytes .../test_config.cpython-312-pytest-9.0.3.pyc | Bin 0 -> 4441 bytes ...st_llm_client.cpython-312-pytest-9.0.3.pyc | Bin 0 -> 4168 bytes ...torage_client.cpython-312-pytest-9.0.3.pyc | Bin 0 -> 6695 bytes tests/conftest.py | 39 ++++++++++ tests/test_config.py | 40 ++++++++++ tests/test_llm_client.py | 40 ++++++++++ tests/test_storage_client.py | 62 ++++++++++++++++ 54 files changed, 716 insertions(+) create mode 100644 .env create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 app/__init__.py create mode 100644 app/__pycache__/__init__.cpython-312.pyc create mode 100644 app/clients/__init__.py create mode 100644 app/clients/__pycache__/__init__.cpython-312.pyc create mode 100644 app/clients/llm/__init__.py create mode 100644 app/clients/llm/__pycache__/__init__.cpython-312.pyc create mode 100644 app/clients/llm/__pycache__/base.cpython-312.pyc create mode 100644 app/clients/llm/__pycache__/zhipuai_client.cpython-312.pyc create mode 100644 app/clients/llm/base.py create mode 100644 app/clients/llm/zhipuai_client.py create mode 100644 app/clients/storage/__init__.py create mode 100644 app/clients/storage/__pycache__/__init__.cpython-312.pyc create mode 100644 app/clients/storage/__pycache__/base.cpython-312.pyc create mode 100644 app/clients/storage/__pycache__/rustfs_client.cpython-312.pyc create mode 100644 app/clients/storage/base.py create mode 100644 app/clients/storage/rustfs_client.py create mode 100644 app/core/__init__.py create mode 100644 app/core/__pycache__/__init__.cpython-312.pyc create mode 100644 app/core/__pycache__/config.cpython-312.pyc create mode 100644 app/core/__pycache__/dependencies.cpython-312.pyc create mode 100644 app/core/__pycache__/exceptions.cpython-312.pyc create mode 100644 app/core/__pycache__/json_utils.cpython-312.pyc create mode 100644 app/core/__pycache__/logging.cpython-312.pyc create mode 100644 app/core/config.py create mode 100644 app/core/dependencies.py create mode 100644 app/core/exceptions.py create mode 100644 app/core/json_utils.py create mode 100644 app/core/logging.py create mode 100644 app/main.py create mode 100644 app/models/__init__.py create mode 100644 app/routers/__init__.py create mode 100644 app/routers/finetune.py create mode 100644 app/routers/image.py create mode 100644 app/routers/qa.py create mode 100644 app/routers/text.py create mode 100644 app/routers/video.py create mode 100644 app/services/__init__.py create mode 100644 config.yaml create mode 100644 docker-compose.yml create mode 100644 pytest.ini create mode 100644 requirements.txt create mode 100644 tests/__init__.py create mode 100644 tests/__pycache__/__init__.cpython-312.pyc create mode 100644 tests/__pycache__/conftest.cpython-312-pytest-9.0.3.pyc create mode 100644 tests/__pycache__/test_config.cpython-312-pytest-9.0.3.pyc create mode 100644 tests/__pycache__/test_llm_client.cpython-312-pytest-9.0.3.pyc create mode 100644 tests/__pycache__/test_storage_client.cpython-312-pytest-9.0.3.pyc create mode 100644 tests/conftest.py create mode 100644 tests/test_config.py create mode 100644 tests/test_llm_client.py create mode 100644 tests/test_storage_client.py diff --git a/.env b/.env new file mode 100644 index 0000000..20292a0 --- /dev/null +++ b/.env @@ -0,0 +1,10 @@ +# Required — fill in before running +ZHIPUAI_API_KEY=your-zhipuai-api-key-here +STORAGE_ACCESS_KEY=your-storage-access-key +STORAGE_SECRET_KEY=your-storage-secret-key +STORAGE_ENDPOINT=http://rustfs:9000 + +# Optional overrides +BACKEND_CALLBACK_URL=http://label-backend:8080/api/ai/callback +LOG_LEVEL=INFO +# MAX_VIDEO_SIZE_MB=200 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0f47a07 --- /dev/null +++ b/.gitignore @@ -0,0 +1,32 @@ +# ========================================== +# 1. Maven/Java 构建产物 (一键忽略整个目录) +# ========================================== +target/ +*.class +*.jar +*.war +*.ear + +# ========================================== +# 2. IDE 配置文件 +# ========================================== +.idea/ +.vscode/ +*.iml +*.ipr +*.iws + +# ========================================== +# 3. 项目特定工具目录 (根据你的文件列表) +# ========================================== +# 忽略 Specifiy 工具生成的所有配置和脚本 +.specify/ + +# 忽略 Claude 本地设置和技能文件 +.claude/ + +# ========================================== +# 4. 操作系统文件 +# ========================================== +.DS_Store +Thumbs.db \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c6fdd28 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.12-slim + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends \ + libgl1 \ + libglib2.0-0 \ + curl \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +EXPOSE 8000 + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/__pycache__/__init__.cpython-312.pyc b/app/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..96a8c63ab69618006a6da18df3565473406bb929 GIT binary patch literal 148 zcmX@j%ge<81UDAl$OO@kK?FMZ%mNgd&QQsq$>_I|p@<2{`wUX^%h<&#raZqWySN}R zIW;CHF)1|%LdGX%#uuj+m1P2j6AKDr;^Q;(GE3s)^$IF)aoFVMr_I|p@<2{`wUX^%i6^%raZqWySN}R zIW;CHF)1|%LdGX%#uuj+m1P2j6AKDrl5;Xs^Gb?i;^Q;(GE3s)^$IF)aoFVMr_I|p@<2{`wUX^%ihH*raZqWySN}R zIW;CHF)1|%LdGX%#uuj+m1P2j6AKDrl5;Xs^Gb?ia&mHG;^Q;(GE3s)^$IF)aoFVM crP^NC=5AbYcR8fL3Clv_qLrr;B{C+om|VtLOfyxU53v34g37B|JQUn7MB9gy+&w20p`(Bc_)6)|Ou1hbDlIIwq z_cl1byo%&kfb1fM7#1kSO`JMS2O|gFMas=O?i6z#mAb$X47je|Y3^g5280Ic>xX1l-@B~h9SAFL0xcgN>v-X!9@3)-# zzz=JtvAB4DUL-uz#!I`53sXNq=R^!>Ok_%K36)RuwOO{^g0KsdfiO%Ans-wjt*c1Nwzk(G(sa{T*bwz zwB}_UK-uGiAoy{8;aY3ED<7&Jjd@GZ4i_axG>H_KTM2B_Uau7wepE|{wAG=C&-Dhz zwR(m#zueB**uKKt2l#-#Jx1Si7w8km#sBPhxpJKLg-&36R0Nl|5|wnb@c#xNCqaU= zvpu#gXCN?>R&G?b9-jHrMYY|ql_xEn0$2(grXEEZO?ecVi6{bfePQ`j6m9mYD4$4c z*$|k&aYdrEaaj^;dBV1twP5w+uu!2x+F=!7>l6dnK}X)~*uma1-XmjV?=SVi!f06X z!}xQtmlNclF4M+-Mca5!*{n7`V24*xZrOIRg{4JHYqXF>tX-8Tq(TH5Xj=P&CbDTVEHk%dm)VDRru5;~ zG=j2?8Xr*$@l_*68Zaq|nkN2``eTA0-9WO<2p`5&@o(coRP@7h?(71s{Cbi(_uO;t z;b0?Y-J55X`+Okz@t#Bh`>nv!A?MOwDxloC?}typSG zi)n&ZEnmtX^HU-b=4XMk)H^IzBaywt)Q%I=$CR^jg}Z8$M*P3wq%T5UU+i!qJM?rn zkb&ObXLef&mUdtg#7W$;x;W2pnAZ$4N4GMAgNz&AK_1pBlX4P>DZ)sMnj|Kfl*y)K z$^(k290#3E73LaKnbrjBOo_<@n&|_H@2(O*Nc?w|1V9oPl_NE7ZAEiWW!D$aCiEk0 z)YUk1hIqQqkSeO-fRD$oyb;h8aVjPvBuCHDs=;zZzr`OV5ee+W1F$|Kaop?rbi*@` z}=d%{Z{-CvplTF6S~d#(FCyL*=)aehTU&jss1rE zKFB>@dv?_I>v|%caCChvyzG7LRemj8XV(FmAXnCHm>#~o?vcFmMz9PV;;N~P$t>5O zVzwP0WVV5NS3v>d7<}bjfO-f8JLE|6){Cpl&?Y(QY{h!VAn;y?co< z*Of2jVLC(mW~i&`21Ln>%uz3X52AWNHeTG?+R9N=S1}I9ow54m4?^B*kd~VPT_b-^ zkj1r4Q?a*W)9UodrL`T!P{)G5VckY%@-6U$a0+OO?EoY9a z4!T4@GE}8*kET3H9gGD5IklQ#rt+b5POa)jNlq2v)cXK&FlYokNca=u+8yVkD^`H1 z!TIPN0Xgl~`M%}nJNPI8raz~BKt7VHD|6b*l1aw3m$YFzOh)7bWSC-5M*@9Dbq|i1 zGw99o0HF8eDKaTxp<4-eb6FDiu7IR(1p}s&bdo>~syKbWs5{c_Oo}KRZOt12xvPv! zDr+$Z!q0X!gR{89BC^OhZ-=N3n0pfw6JGB5JFR%?kQv`G*0wxeMdG%1S{X=Sdj}BS zvU_2({SY8X$dk$^!BfFvW2Dd+DK>5`G;S?6b`}~t7uUBeHn){j()84IA~)4@%wvu( zaQ#r$#K}-JF#_BmXLc6UWxB=E&`4A^;_$dzS6!zw(P1j9RWTbv-!7g0;+Hd@E`9vk zwb^68zI$fr^!aOFeZIXlQVXTt9!(pGj9Z6Ii5-QG1+6KQbwhk84dqj3Bg`0bGTikU zhci%jZP!<+h``CD9oXP%UNdu*M0z0O%AlMZ=o&Ft7LCiXO}Mh{aE_@K@vSItt|IR9 zSKr3Bi1lp5u+hT@4<3Zt=M_I52F6&!U2Km6`&&RaMfJN93Kv6dg;3k2(5Adnl4=9p zbg}vVVso_699>+!=7uESQ(soK4RwDIt+6h@uT)3YwG_h}3*n8^FBHO&VmMj|M~mTY zh48lg-YdcSH+v>~PS(tJ{qWfL!xuXXo1f43Tn-*sY*?S~D>c9oe_dZgRyX}lB)AEe z_fGaMG)3mN!g~ASlZEKB`QFRH=dRRmSh)YmxrRA)p=HOV`kfc{F9i4fT~cw^KlV1z z|CP17ee{F%yA<*@-6bjCJiQ9Yc}jshFHxkGz-~pJuYGjaPI-Q(3iNl13{vRg(C^am z6w`ILM%STx4OvJBb^V2*xaCQB6z str: + """Send a text chat request and return the response content string.""" + + @abstractmethod + async def chat_vision(self, model: str, messages: list[dict]) -> str: + """Send a multimodal (vision) chat request and return the response content string.""" diff --git a/app/clients/llm/zhipuai_client.py b/app/clients/llm/zhipuai_client.py new file mode 100644 index 0000000..a92322d --- /dev/null +++ b/app/clients/llm/zhipuai_client.py @@ -0,0 +1,37 @@ +import asyncio + +from zhipuai import ZhipuAI + +from app.clients.llm.base import LLMClient +from app.core.exceptions import LLMCallError +from app.core.logging import get_logger + +logger = get_logger(__name__) + + +class ZhipuAIClient(LLMClient): + def __init__(self, api_key: str) -> None: + self._client = ZhipuAI(api_key=api_key) + + async def chat(self, model: str, messages: list[dict]) -> str: + return await self._call(model, messages) + + async def chat_vision(self, model: str, messages: list[dict]) -> str: + return await self._call(model, messages) + + async def _call(self, model: str, messages: list[dict]) -> str: + loop = asyncio.get_event_loop() + try: + response = await loop.run_in_executor( + None, + lambda: self._client.chat.completions.create( + model=model, + messages=messages, + ), + ) + content = response.choices[0].message.content + logger.info("llm_call", extra={"model": model, "response_len": len(content)}) + return content + except Exception as exc: + logger.error("llm_call_error", extra={"model": model, "error": str(exc)}) + raise LLMCallError(f"大模型调用失败: {exc}") from exc diff --git a/app/clients/storage/__init__.py b/app/clients/storage/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/clients/storage/__pycache__/__init__.cpython-312.pyc b/app/clients/storage/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..56c457d612ba01f6bc8efb76ba84ef9dd25ca24d GIT binary patch literal 164 zcmX@j%ge<81UDAl$OO@kK?FMZ%mNgd&QQsq$>_I|p@<2{`wUX^%h|;$raZqWySN}R zIW;CHF)1|%LdGX%#uuj+m1P2j6AKDrl5;Xs^Gb?iic9i~64O&-;^Q;(GE3s)^$IF) gaoFVMru-(3lKpn+}||Ly1FniEs{ya3#GDxwzWy#B2J$q1kjY z_xiir**foPqj+cr((I=qMBif73uFEb7)kNjcUq+UpPGcEYl@+Mw^(vxvDyF)ERROHEQ&_c0xw)P-J~XM~ zeYqV+GBH`HI|^@0lX*jK_Op_bW~7p^Xn;MX1M(}_bL96vp+IdyzjMd5=}fQeq~jz` zxnMkDY4=kZ8n6V*%dp63rudk3Cr0Y6y?f-In|WHsJynKUsschI6l3h~cXqCI#;Llk zhdh*>n0IBI%Ya9Lmg*pa*L*nag!bUNqjMgeF4uBvILX#UahAZsOM|2?1!GI$BaeClW_Sp>I%L&?Sw ze8(fh7UK~meHhz3JC8I10Im(&TCy z8fjjEAByCICXLR*MK{;7N)RM`AcG*Q1;HQ{qZsXa5PUY`aqdyr&&-9)%(pz0?ay=e zZIl>w9&ss>Y1?_&gChJ$pNh^atBVVMdrdEMc{fz}gR%X}xnn=_DEnDe j-pV`twz>ok3R~mA&Rk0A&*akYr48DmUvK|GP}{sG(oLPp literal 0 HcmV?d00001 diff --git a/app/clients/storage/__pycache__/rustfs_client.cpython-312.pyc b/app/clients/storage/__pycache__/rustfs_client.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f34f312a93831fd222eefd6534851f9c709ccc03 GIT binary patch literal 4935 zcmd6reQXrR6~Jfq%XjDV2mZq5<1pA5E*KM=P$(rN2ZRtvToRjvxW?=A?b@6<-(6?- zV(gm8N(i>wrXf$<&2FMu)AzX6j2K6Tx3o*FOs?$Tv+ooEzP~l8fq#tm!sk$5p zwY90tXBo6)X6k0@1So?r}B-QtVJC*@==n zQ}0kkB{S8>p*W9=4H?R!nbs`T20eMOmJKcTnR-FBfwdg4=I9dLd4}`3h^FtZv%E2E zYETJ>{6XDtczr%q)8r#+m*LP<9|NaQmq3ZIV69J><(mXLPTthxAO?w^nqETMIjER- zDYS?7a6SAb?qbSD50Ph{>mqKx*5I^_Zr*UFMYyUzg?X!r8N1m9Eag_qssz zlI!`!gWKNRmT=}KN>(OZYZI#~r|n{aqhEO4HU&=*Lu!fmj;OjJguVJhco!_T+xj9w<4;v=fp_X>lrw{QA9cN2W6oSyu za&K=hD}vInZ3}oiS`_d0?)7svfD-Bq20~s%Zt2oht!i5!5AZ>CNH0veC5K;=#Oq)yEggmd$->I zMd|kpy5SCgXltokGBUhcSJ3AV8QJ(zQeT4a7<`&SVIzk{g0M+t^{DELz&<5o&VS`tX`ZR>Txw#q8v7FAH>s|NK`?$Us`f9I%xN{4Ay`EDv zk+XU%XZ3i_ntmb4N%n&@QL=oZWc^sl`b2KQpNjTQ@!XQ^F9}zW-M=d-k%Hng(wo~R z^2^8a%g6I8;?9a>2DIItwvpVWUl0lguIIVpD>n_d4j+s@8`Yz&v7MuQED~Sw%y?dH zy#9r_^M$XHJU;lfwgpD=TA^nXJ-ft{Lw-d)S;Cbf&_N$2#4L?#`=vmK? ztQS$Q;i1(K6tC_zoW4*{$LZC(!m4jJR5O_V5#C~9+Q;_^4}_`+sEYSQRp{qUT*dq0 zN=X-@C<2POa^_{CDCsyt^HMkyMczbF=#l22D48fqFH7@K6z8Id-vSfG!=RiJ0Y%{f zP~`QQscWAZrZqT}16=ycU_zUUA9cjd)w%_?0-j){D9S-V;I@{e9##ZeEr!DpUA7oD zIN%~<2a@>Icip13)vK}h!zk{ejUxiIQ*g5~xb0pyXLCow_oI3(bkHh6Al1S-+grAY z;&o%i>k>;>p4Z=Bb@ie59*WkF*X)SzZ%p!}q;i^YC3z<3a}fXG&EK6*BhFTXIaUE; zNMvhJ$CofSiW!F88S@yn95Y?S;jk(Y8m<>$j38SaJ7;^-U2;!HS7L9>&b~Goo|Z`N z;yD!Ckm9_eiM)!jyb7TBOO9KVHzkVsz_FN>{RN8K)6V|{*iG=-zCzC$`pz;BCBLSR z3BM6Q`G7*z?5i1kW-JqNj08sv$0;*7mmYl-94d01Nd^%qS{DfMD z+ks(HekdiIaL`Z|T3_5)J z({rEnpZx4+$3MON!Q{!;OAkEvbm>9>83vDqsF|K1Wd;xa%|)#=joP9G|m2MZJ zA}H4=Q~`m6j1ar0j32QVdR+X7OGLelhejWMn$p({b9cky0}=OM%U(_D>{CTaz%ss($>ih#1_?^YKQvjeTn%R+0$Ts6^(m=br@ zeCXO|WuaNA`1-UOY|{@%5ZhZnxjOmA=}%7g-Tdv9$#;J_IsC>wfMM(KCHN^aBTp*y zxprRi*ktkWo#^i9mRNPHKDK+b zY}7Y;Aikn*Jg+|9a47CPWa7}2!U2-UHJCh7NFE;(uGqd&EOOKCVmMC*{64Sl4+S@b ze7dTy*L29lJKWTtn+>a+iHFIcw?ma>!y(HZAte$(-6_l8k9Y&A79Kh=%&Fnvi}?)0 z&jkB6R`76`XTT(3__-e<=F*vgp5cki6!0RV#XuY&O4zoQB;z&l$GTvrLsDucy?LJpY2&wB0l{2 zvMjNgUI2SoMqvb4#x$#pKmX#CS&?+v#3p)SC%(3c4ns4RQEl2RE$7fLcea%leG7D! z2g1n5zXW_CrdGk>MGfPOYi6!w;38C|4)s;3Lw!|?7moZGe~f&yd0t1%360#D7W})% zA8cc|q1ltzCRzUgS(E$>R^!PneLS?*aXYJnIto9n1_Yc&DE)|Je?;sb6Xy-GZj#j8 hAeA@Bn!nidPwpAFFYlG6r7g6Ho_zFg1WVKAKLMRuvN-?% literal 0 HcmV?d00001 diff --git a/app/clients/storage/base.py b/app/clients/storage/base.py new file mode 100644 index 0000000..89535a4 --- /dev/null +++ b/app/clients/storage/base.py @@ -0,0 +1,21 @@ +from abc import ABC, abstractmethod + + +class StorageClient(ABC): + @abstractmethod + async def download_bytes(self, bucket: str, path: str) -> bytes: + """Download an object and return its raw bytes.""" + + @abstractmethod + async def upload_bytes( + self, bucket: str, path: str, data: bytes, content_type: str = "application/octet-stream" + ) -> None: + """Upload raw bytes to the given bucket/path.""" + + @abstractmethod + async def get_presigned_url(self, bucket: str, path: str, expires: int = 3600) -> str: + """Return a presigned GET URL valid for `expires` seconds.""" + + @abstractmethod + async def get_object_size(self, bucket: str, path: str) -> int: + """Return the object size in bytes without downloading it.""" diff --git a/app/clients/storage/rustfs_client.py b/app/clients/storage/rustfs_client.py new file mode 100644 index 0000000..8ef105a --- /dev/null +++ b/app/clients/storage/rustfs_client.py @@ -0,0 +1,70 @@ +import asyncio +import io + +import boto3 +from botocore.exceptions import ClientError + +from app.clients.storage.base import StorageClient +from app.core.exceptions import StorageError +from app.core.logging import get_logger + +logger = get_logger(__name__) + + +class RustFSClient(StorageClient): + def __init__(self, endpoint: str, access_key: str, secret_key: str) -> None: + self._s3 = boto3.client( + "s3", + endpoint_url=endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + ) + + async def download_bytes(self, bucket: str, path: str) -> bytes: + loop = asyncio.get_event_loop() + try: + resp = await loop.run_in_executor( + None, lambda: self._s3.get_object(Bucket=bucket, Key=path) + ) + return resp["Body"].read() + except ClientError as exc: + raise StorageError(f"存储下载失败 [{bucket}/{path}]: {exc}") from exc + + async def upload_bytes( + self, bucket: str, path: str, data: bytes, content_type: str = "application/octet-stream" + ) -> None: + loop = asyncio.get_event_loop() + try: + await loop.run_in_executor( + None, + lambda: self._s3.put_object( + Bucket=bucket, Key=path, Body=io.BytesIO(data), ContentType=content_type + ), + ) + except ClientError as exc: + raise StorageError(f"存储上传失败 [{bucket}/{path}]: {exc}") from exc + + async def get_presigned_url(self, bucket: str, path: str, expires: int = 3600) -> str: + loop = asyncio.get_event_loop() + try: + url = await loop.run_in_executor( + None, + lambda: self._s3.generate_presigned_url( + "get_object", + Params={"Bucket": bucket, "Key": path}, + ExpiresIn=expires, + ), + ) + return url + except ClientError as exc: + raise StorageError(f"生成预签名 URL 失败 [{bucket}/{path}]: {exc}") from exc + + async def get_object_size(self, bucket: str, path: str) -> int: + loop = asyncio.get_event_loop() + try: + resp = await loop.run_in_executor( + None, lambda: self._s3.head_object(Bucket=bucket, Key=path) + ) + return resp["ContentLength"] + except ClientError as exc: + raise StorageError(f"获取文件大小失败 [{bucket}/{path}]: {exc}") from exc diff --git a/app/core/__init__.py b/app/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/core/__pycache__/__init__.cpython-312.pyc b/app/core/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aa06ba8dc2d12c48bc1376e230cd833cf192c689 GIT binary patch literal 153 zcmX@j%ge<81UDAl$OO@kK?FMZ%mNgd&QQsq$>_I|p@<2{`wUX^%fiJfraZqWySN}R zIW;CHF)1|%LdGX%#uuj+m1P2j6AKDrlJkpFW8&j8^D;}~7&-6@L4}|DUBKT9#oRdAR|IcBL$=YMWLdX1kS^wc5v zy*F=W-^_dOoA*^B5d|FL-?t420Q|rw-4Whbc1Hz(b+`&T=zT z{g6})lM>fhqNI8=8)8cN()iTX+&IZijg!lT>#VmlGdZ2RSRlE4zECRt+Eyy$rwcQ_ zE!EvuD2`1{ju&TG`a&*$8D}J)o0#A_xjH?;A`_Dr$wXncFu{7RB!%KEnVc<5PourG6w_?0V$8kPP^%`Bw8|WlF)J=cpqh2cf|!N6V`tt;Qchfvebn6eWE<~4KyVK?u&btWmxN%Ap+ik*QVGl^|II$o7|_s z3CPLUfS16NyS}_&SF4D_;79^ohqcTid?;OqMIkE|pZo_$R%Td2Pp1{NZhB1O)r-tD zYP8@uw!@;cJUCy^%FYq==p5xRguo;oKR@p{j%Vk_-np@8JGb4Ms?i&!dXt)aNK}LH z!W(E-YqcAi?N9_inY9Ls;i5+@>UvanPNFv+j(ZJZ1-6yGR=zd;abs25PWJrCx@&Ee zHcvcEzI<2eNRS%r1f@V?C)L*~d=gy^bb@dwvyxwxSEuiW|8V8sKLsGs|L3K4?8J{C z1QOqc;~fd*Ggm?9e~pd8Ka`v-eJKfuPk1Hp4TO-0oPruo4+hVqU&plHe!PRKdHZz& ztg9}5A>H4v(;Ki19;k*|Bmj%Mi!j~xPdoV z7MCS2-rYB!>}tQr`JP|&uJ#<0&V5&QhT6RZhQ!S;S7D*}7rPWur{CYt&+KlAkR8cKjll%FSG2vj2!8 zu3DjlHytN~C$oUz(W=Wt+hrkaL55>nOq!#f&x&k-2^F7#{vugW9Tr9ck{b3fpN}q2 zIH%%q=N0ZQil34_o;h!!VHp4J2M8(P7B_2j+e#^S@&$<@GiEVUIox)VRV zA#GgStbY?f-3h_5vz;i!m6i8??m2&bxZSX z+jJdH92VlSn8r#!tYjXy>8O;P62J4=I(b0_-q7W z2O^K*g~u@P82W#J*kd^MJ;d%LI$;<*vH5oU@agR%!<%M1bLK#z{k`_U>)S)Ge3on< zeRI1vvvIlIb80*DV*8~x+JonwD&bS&O0p%lN*lu)?x)$-@3vCIyAT${r)fx~TbDlm z{Uc>~OBsHoyt<{l`bZhwQbr%VzolGQiFblwA=8@a;O@+3dQ@U3RBs!)|voyH!gx z;o!{+BqngI@kHRj|KUXv4rF3a>VX?jPo8|UKT0)R+{2siym|X(-sin9`MiqYd-?gq zIusE4&Ns;?#-jTX#11l%iEZRy4P&kaTW~~847F%WPD)FKTC!zF(Uef9?6i~7GNG1j z)yZmEj08lmsT`#Dag^W85V0nh>U(KFzEI0u5u-p%wl5~%$`$jc{Es4L8Jjk%hHk78 zzFAmUoVP8~2w;=f)-132=;@_nIdG|7Cp|~*SAtGZS7 z34LXOTlc(*;ZjmDiANeHX&4sqOJ0i&@KSBtsTv_JIt)4aJoq=kY@#3P$cO38`ES|r zcA>rUY2q+*G+92*&U}?-x)6Z=!XSeq#PA%%|6%As5jm9JP6`pF_G5T}NCrfH)IOGm zN<7zU1WeTpgZMt@#Z;dd(4&x@NJk+Z9ph#kOhQrNjzKUHLplVK4RaK6?gR(F{~C|u zo$bRhb}}@wwX!|YR*#16o)kv6THAB&>7&BEe*H_4a`G9HQZg;T?@Pr2mZ>!xM&P=( zPkBvj1R7I{(EBB5LuplqmhpLF6aOB?laV})PXZh74~pL&<>1X=lUE&681|ZFDUQHrKMnl(NPloQxPVF3}Q^sM8WO`FWoNjTy&=)VASCSr^`=+;kkr-L4L(1Q*t wchJ~8amF^%X1mayNeeHmhk*s02N;R2Nr}R)&Kwi literal 0 HcmV?d00001 diff --git a/app/core/__pycache__/exceptions.cpython-312.pyc b/app/core/__pycache__/exceptions.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..48fd955b538057473178be6a94ab1b36f773ac63 GIT binary patch literal 2473 zcmb7F-)kI29G}_U`Fl}5<#IO()td(gZMgAHO3gPLTCmMF!sVk%2&Nu@*~XNaM$ z5<}zKP1XNTDInPZut6T!W`mLq0UO@Mh9nySHoA)qOST8t*e*6A**LJhyV$7F$EnfJ z1I7WaE(VQ(Rjt(1=Cn392wZIWU~1?-N=#8Nl;@Xf+;L$Td##Ym=DAa~Er*x=BlrlO z$ksd!-N)bpCnai-k}?NwlvJ+4GnGk*8EPrWLon0iEWkrX&BYVT=1MN{H9vxVl?&SlKqX*E^Ffrt{gU3`^(pxxA-IHnhYY zM?cTKpx|yzSlP1T%ae%ST%}hDlOY6md+jylD)}@*Abt}bjE$0Iuq2yO zMGwrpfK#h-;e{EiSQVGCl#>-90F zTb2!eqX@9E9Zklj3}&-3{#lc7Gy>!Ojj{*RB0JNb?|scP<-VHA5+B%W6vV zM&>MdJylz{+&En^d2zYgzDDbKjfUp3g}K?;T)vo|W~XN|X;z${O=mX)0{0X{2#+D4 z9rECt#8)FvmOvJv7k*9*V3pJdhE~qjd&b|N_7`G!-?9hJRSa$yZ9Aij^S)ql7&h|y z&dp4xbF7%lvCLHdOgbwb2U;W%q)SrQI8G6eYIf;LbaY{O-@0N2*A|ddekT5f7RC$3 zTz=|I+Rwd5>v;Q@p=^s-2mJx$Tp3+TI+ERIdw)CQkQ&X9CO?tF1&){Pb@MU!SGS=}0!(K@E<=0c8 z90+`&UBwJD>_hl97Xi*ff!ZK$Cp(~Uy{!Pfrvck3*SjLX;$C~2LBCYKQ*O3*DUk2g z%QU5CHx-!?vd+O+R*X>Dwp?zxDNW$^J9*Pth3PMz1#ob?WKxz0FR@ek*ml8V7j(-o zxezbGN_@2rzMMa>y4~#=t{)uU$bFWpA4+^Z^2NxF@(&j`FK+dpXa&?*utC&7@NWkd zd~p0Z+T~RUNa@ui4EB0uP(yK52*|cm@(mRn(DpbG$O_y{VA&P;84)i4q!j<;hmd#? zpzEMxHOoJh@rZN2467&bA9x*howI%@^>g{w(Vs4C^}q6;^Nl9pQxx|?i#mJ~s}+Ij zy#wvpglK=K9VyC-q&r^|zba#Kl7&KMr_2X=AEo62^@t>bObsk8UU<^~V4M`_A6_zN z4WOY?O7D@ed*oOv7^LIt^DP2hYe1n#ufNwK(6yopJ$(HLjt}E_Z2csT$0Wa=-R2WM npJ+ri8vHTTAkf{iWcTYEZ#Hq-=mofO*$>Uj?)GF0BU0(VU`!DN literal 0 HcmV?d00001 diff --git a/app/core/__pycache__/json_utils.cpython-312.pyc b/app/core/__pycache__/json_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7bd9150c683968de4f3fbef4ba114626cef99be9 GIT binary patch literal 1045 zcmaJN%+^BF5@I|+ zIDm-;5(3d^qK2avZi*)pFC{&I3kMTZBDY337`!;MZIzQRnfK=X%zW?fOGCpMK&kcB zZcO0-_|B9X3DwKdReD(l8qi<@1S=8-A=SAAm*j&y1R9S7O+aE)(8MP~&@l}a$02(@ zra-Gjw3=3t6s`JC0CWJ; zS@Xv0Yqx*06X2FSXG%4T#^<pgaC=OS zVm&F-#>yBGLr)QOStcfq8AdEMA&;tfQqwajIihPwj-pfq5jhr>4H}G6rZ4LqA0O}P z^T&yvQu;#VQrEo@2?g$V_bF7NzC%_@+DlpK^_zpYFH7qSKR!Gwt*#%gt(TVO4&Fbv zv+&DfiqjHpV2i`ZauB5A$U{q8QC9%8*5lhKl1kA>#@R-fca@$FU zld4HyLqwQQmgN)wpg)w+@gylT+!G+UiEJ1w6 zv{g^{466|{%*?o%{>a8pW+>)>zXtG_)kEjccl)q zjlpc$EDtTs$4c{|sR%O67@f4;H;boDw|F%*Z5Q?q9MN!qo7f{~Pg2*?3Dk=j4x1$D hq>O??xCh*Oz*PcQ%8q(C43|h5Fu6jGZHdp*z&}H^07?J= literal 0 HcmV?d00001 diff --git a/app/core/__pycache__/logging.cpython-312.pyc b/app/core/__pycache__/logging.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a702a7ac7b0116baa74e17e15fcd79c52ea6a1c9 GIT binary patch literal 3690 zcmai1U2Gf25#Hnd@c5@6TCym~vL!XL#U`fXwvikoh#lLJT_>hpOMqL#@!`T-%9M^j z**(gVN+CvLv{dWBN`Y1~VAO6?pe^hI%2V^`_r92+P_i2vFi;do-k3^3gFJQijwi~h z(k{T=*_qkd-MN`>mVXU}{0Lg`!&>?y6`_CdiQD7`vAzn#GBS`Mm?+_lLPD_To`eTR zk11xPgd}j8Xv!HSq1d!!dNXQ5wQ1S(W&8<0&M%JN;1?NQX=F*r;*{i zfeb(B?Pb~euiB?-P0GdIpY;xP?glOnc~~J#)sHvmnLRSdL_L z%L26)?JUh@n{&Tz7_O!-PjY#EQnzqaBz#|{@Qq-V*EP<4+uz^+_2{Xim#^i>G|lTt zd>K9=Hl1G6(;CHOCJk&opTC^U5s*8_jpgT?yR|bEVREa7~TX)RwB+T~@nS z4y>tzwch@cvZh90?e%l(U=}5L=Qj5fufGVKWrPvW7043-i&Kze+neFRGQ_U3uVj{RE+}#^>9Iespjsb8yI* z7&>!w=-knvagGoM1Z*l}MI|O#X}|^9#533=+$Qs8Fs1q=W-6XdYUyk$7v1fIwwR%l zOw!3DWj=d>jVWG-%rP~uTUTwCno6740h~DL@G(8-+}sMr9^ZY^DgW3e20==gb_OSDeOkOrFtAfM*{001N&7 zq=V}yj{W)0XTc`suy=zB{S9>2(bJB|^7#+W-y3`Mz0*tQD;;NQoxRo0p>pR?ZTFt# z=?|uBU47NA;d0k-rR&>Y_~c#wlD8g2!7vAg+Ti}`;7ECJWOe2*?|$@dW$<+A^wQaK zaNpCO{=0{6AFB2oD)$`vThHOev!#kobjlaJ0}bN2Iu%S%?SaZgPQhEK{p)<;ia1S0E#c# z*uX?;5%MZ9|AcHiwqI=cuse=?v=_P?sP5S%kFH#*_QlG5u{wgu>UTHz;H=|W_6&r% zRcQT(Ao%3(Si(>!VNJvsM~gW+!!KM z2K$ZbYB}RK)Qgs9wfL+kO||X7l?~~R^gwp8A7VusN2BPb^=I*#5LM#o&*0{t^V&+d zvjO;(47RT33?^~ZW(okZRiG?j6bL|VE~{lIfTavygXmEo;fcqB5JV|0nJZ-Zvq&aD z7~~Buk$9OPKj5TTASSuUseoe>78p3H*fC~rPU2iDgOpfIC5;y3euQv?=D?zA zq~X0}C9nREsB={ga|6GC?lU{?+B&Lj1Ld}XO53+e^544$?~PWvpRewEzN9|h-Fs)k zL61t`i>oh{`;L5ks=Vj;!-2}~Q>8#n4c-ba2AwBTE!g?AXW(w^cB~fJcQl?i zBg3_x!D>&e+!L$qvq83{E+b#h22y?gI^bfY)LQRC9WQ)M`N#T6|A}tlcfyG->BHAv z0x~MZqXBXZB;+{nUgMn&hYE)dObE|2!n2(4R3O|dI<>y|mBbYy&_>vEGH6%ywP1X?%9}nBZ z!`XlHnUkB`R|NE zeL1#ktHAlALY>Q=A5d5Vj9#c71fD^8(#m0;j;m(UB))kQ%N zK1G2~Q491>Q13s{ktb;X6Es{`yh3 None: + for key in keys[:-1]: + cfg = cfg.setdefault(key, {}) + # Coerce numeric env vars + try: + value = int(value) + except (TypeError, ValueError): + pass + cfg[keys[-1]] = value + + +@lru_cache(maxsize=1) +def get_config() -> dict: + with open(_CONFIG_PATH, "r", encoding="utf-8") as f: + cfg: dict = yaml.safe_load(f) + + for env_var, key_path in _ENV_OVERRIDES.items(): + value = os.environ.get(env_var) + if value is not None: + _set_nested(cfg, key_path, value) + + return cfg diff --git a/app/core/dependencies.py b/app/core/dependencies.py new file mode 100644 index 0000000..66a9c72 --- /dev/null +++ b/app/core/dependencies.py @@ -0,0 +1,23 @@ +from functools import lru_cache + +from app.clients.llm.base import LLMClient +from app.clients.llm.zhipuai_client import ZhipuAIClient +from app.clients.storage.base import StorageClient +from app.clients.storage.rustfs_client import RustFSClient +from app.core.config import get_config + + +@lru_cache(maxsize=1) +def get_llm_client() -> LLMClient: + cfg = get_config() + return ZhipuAIClient(api_key=cfg["zhipuai"]["api_key"]) + + +@lru_cache(maxsize=1) +def get_storage_client() -> StorageClient: + cfg = get_config() + return RustFSClient( + endpoint=cfg["storage"]["endpoint"], + access_key=cfg["storage"]["access_key"], + secret_key=cfg["storage"]["secret_key"], + ) diff --git a/app/core/exceptions.py b/app/core/exceptions.py new file mode 100644 index 0000000..aa7fd15 --- /dev/null +++ b/app/core/exceptions.py @@ -0,0 +1,50 @@ +from fastapi import Request +from fastapi.responses import JSONResponse + + +class AIServiceError(Exception): + status_code: int = 500 + code: str = "INTERNAL_ERROR" + + def __init__(self, message: str) -> None: + self.message = message + super().__init__(message) + + +class UnsupportedFileTypeError(AIServiceError): + status_code = 400 + code = "UNSUPPORTED_FILE_TYPE" + + +class VideoTooLargeError(AIServiceError): + status_code = 400 + code = "VIDEO_TOO_LARGE" + + +class StorageError(AIServiceError): + status_code = 502 + code = "STORAGE_ERROR" + + +class LLMParseError(AIServiceError): + status_code = 502 + code = "LLM_PARSE_ERROR" + + +class LLMCallError(AIServiceError): + status_code = 503 + code = "LLM_CALL_ERROR" + + +async def ai_service_exception_handler(request: Request, exc: AIServiceError) -> JSONResponse: + return JSONResponse( + status_code=exc.status_code, + content={"code": exc.code, "message": exc.message}, + ) + + +async def unhandled_exception_handler(request: Request, exc: Exception) -> JSONResponse: + return JSONResponse( + status_code=500, + content={"code": "INTERNAL_ERROR", "message": str(exc)}, + ) diff --git a/app/core/json_utils.py b/app/core/json_utils.py new file mode 100644 index 0000000..494b5fb --- /dev/null +++ b/app/core/json_utils.py @@ -0,0 +1,19 @@ +import json +import re + +from app.core.exceptions import LLMParseError + + +def extract_json(text: str) -> any: + """Parse JSON from LLM response, stripping Markdown code fences if present.""" + text = text.strip() + + # Strip ```json ... ``` or ``` ... ``` fences + fence_match = re.search(r"```(?:json)?\s*([\s\S]+?)\s*```", text) + if fence_match: + text = fence_match.group(1).strip() + + try: + return json.loads(text) + except json.JSONDecodeError as e: + raise LLMParseError(f"大模型返回非合法 JSON: {e}") from e diff --git a/app/core/logging.py b/app/core/logging.py new file mode 100644 index 0000000..1fd8b9d --- /dev/null +++ b/app/core/logging.py @@ -0,0 +1,62 @@ +import json +import logging +import time +from typing import Callable + +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.requests import Request +from starlette.responses import Response + + +def get_logger(name: str) -> logging.Logger: + logger = logging.getLogger(name) + if not logger.handlers: + handler = logging.StreamHandler() + handler.setFormatter(_JsonFormatter()) + logger.addHandler(handler) + logger.propagate = False + return logger + + +class _JsonFormatter(logging.Formatter): + def format(self, record: logging.LogRecord) -> str: + payload = { + "time": self.formatTime(record, datefmt="%Y-%m-%dT%H:%M:%S"), + "level": record.levelname, + "logger": record.name, + "message": record.getMessage(), + } + if record.exc_info: + payload["exc_info"] = self.formatException(record.exc_info) + # Merge any extra fields passed via `extra=` + for key, value in record.__dict__.items(): + if key not in ( + "name", "msg", "args", "levelname", "levelno", "pathname", + "filename", "module", "exc_info", "exc_text", "stack_info", + "lineno", "funcName", "created", "msecs", "relativeCreated", + "thread", "threadName", "processName", "process", "message", + "taskName", + ): + payload[key] = value + return json.dumps(payload, ensure_ascii=False) + + +class RequestLoggingMiddleware(BaseHTTPMiddleware): + def __init__(self, app, logger: logging.Logger | None = None) -> None: + super().__init__(app) + self._logger = logger or get_logger("request") + + async def dispatch(self, request: Request, call_next: Callable) -> Response: + start = time.perf_counter() + response = await call_next(request) + duration_ms = round((time.perf_counter() - start) * 1000, 1) + self._logger.info( + "request", + extra={ + "method": request.method, + "path": request.url.path, + "status": response.status_code, + "duration_ms": duration_ms, + }, + ) + return response diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..1ee4df0 --- /dev/null +++ b/app/main.py @@ -0,0 +1,46 @@ +from contextlib import asynccontextmanager + +from fastapi import FastAPI + +from app.core.exceptions import ( + AIServiceError, + ai_service_exception_handler, + unhandled_exception_handler, +) +from app.core.logging import RequestLoggingMiddleware, get_logger + +logger = get_logger(__name__) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + logger.info("startup", extra={"message": "AI service starting"}) + yield + logger.info("shutdown", extra={"message": "AI service stopping"}) + + +app = FastAPI( + title="Label AI Service", + description="知识图谱标注平台 AI 计算服务", + version="1.0.0", + lifespan=lifespan, +) + +app.add_middleware(RequestLoggingMiddleware) +app.add_exception_handler(AIServiceError, ai_service_exception_handler) +app.add_exception_handler(Exception, unhandled_exception_handler) + + +@app.get("/health", tags=["Health"]) +async def health(): + return {"status": "ok"} + + +# Routers registered after implementation (imported lazily to avoid circular deps) +from app.routers import text, image, video, qa, finetune # noqa: E402 + +app.include_router(text.router, prefix="/api/v1") +app.include_router(image.router, prefix="/api/v1") +app.include_router(video.router, prefix="/api/v1") +app.include_router(qa.router, prefix="/api/v1") +app.include_router(finetune.router, prefix="/api/v1") diff --git a/app/models/__init__.py b/app/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/routers/__init__.py b/app/routers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/routers/finetune.py b/app/routers/finetune.py new file mode 100644 index 0000000..f16ec0f --- /dev/null +++ b/app/routers/finetune.py @@ -0,0 +1,3 @@ +from fastapi import APIRouter + +router = APIRouter(tags=["Finetune"]) diff --git a/app/routers/image.py b/app/routers/image.py new file mode 100644 index 0000000..30aefbc --- /dev/null +++ b/app/routers/image.py @@ -0,0 +1,3 @@ +from fastapi import APIRouter + +router = APIRouter(tags=["Image"]) diff --git a/app/routers/qa.py b/app/routers/qa.py new file mode 100644 index 0000000..5b22c10 --- /dev/null +++ b/app/routers/qa.py @@ -0,0 +1,3 @@ +from fastapi import APIRouter + +router = APIRouter(tags=["QA"]) diff --git a/app/routers/text.py b/app/routers/text.py new file mode 100644 index 0000000..44c49f9 --- /dev/null +++ b/app/routers/text.py @@ -0,0 +1,3 @@ +from fastapi import APIRouter + +router = APIRouter(tags=["Text"]) diff --git a/app/routers/video.py b/app/routers/video.py new file mode 100644 index 0000000..136e997 --- /dev/null +++ b/app/routers/video.py @@ -0,0 +1,3 @@ +from fastapi import APIRouter + +router = APIRouter(tags=["Video"]) diff --git a/app/services/__init__.py b/app/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..2e68628 --- /dev/null +++ b/config.yaml @@ -0,0 +1,19 @@ +server: + port: 8000 + log_level: INFO + +storage: + buckets: + source_data: "source-data" + finetune_export: "finetune-export" + +backend: {} # callback_url injected via BACKEND_CALLBACK_URL env var + +video: + frame_sample_count: 8 # uniform frames sampled for video-to-text + max_file_size_mb: 200 # video size limit (override with MAX_VIDEO_SIZE_MB) + keyframe_diff_threshold: 30.0 # grayscale mean-diff threshold for keyframe detection + +models: + default_text: "glm-4-flash" + default_vision: "glm-4v-flash" diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..fa5fa65 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,37 @@ +version: "3.9" + +services: + ai-service: + build: . + ports: + - "8000:8000" + env_file: + - .env + depends_on: + rustfs: + condition: service_healthy + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 10s + + rustfs: + image: rustfs/rustfs:latest + ports: + - "9000:9000" + environment: + RUSTFS_ACCESS_KEY: ${STORAGE_ACCESS_KEY} + RUSTFS_SECRET_KEY: ${STORAGE_SECRET_KEY} + volumes: + - rustfs_data:/data + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/health"] + interval: 10s + timeout: 3s + retries: 5 + start_period: 5s + +volumes: + rustfs_data: diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..78c5011 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +asyncio_mode = auto +testpaths = tests diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9e74516 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,16 @@ +fastapi>=0.111.0 +uvicorn[standard]>=0.29.0 +pydantic>=2.7.0 +zhipuai>=2.1.0 +boto3>=1.34.0 +pdfplumber>=0.11.0 +python-docx>=1.1.0 +opencv-python-headless>=4.9.0 +numpy>=1.26.0 +httpx>=0.27.0 +python-dotenv>=1.0.0 +pyyaml>=6.0.0 + +# Testing +pytest>=8.0.0 +pytest-asyncio>=0.23.0 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/__pycache__/__init__.cpython-312.pyc b/tests/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3cb36901d8f4dab1a295ef7e7b269ff1f4a5e050 GIT binary patch literal 150 zcmX@j%ge<81UDAl$OO@kK?FMZ%mNgd&QQsq$>_I|p@<2{`wUX^%hbgxraZqWySN}R zIW;CHF)1|%LdGX%#uuj+m1P2jOHzwVieuvAGxIV_;^XxSDsOSvwtD S1ma>4<0CU8BV!RWkOcr$swI{H literal 0 HcmV?d00001 diff --git a/tests/__pycache__/conftest.cpython-312-pytest-9.0.3.pyc b/tests/__pycache__/conftest.cpython-312-pytest-9.0.3.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d04d8c6033003d64ab4a585c53efa23175db1870 GIT binary patch literal 2414 zcmb_dO>ERg6rS;V|FUsHwn-rArb$CVEUGqDDIlSUmO?=Yk_+^*r8SN{2@dw!8haBq z3evWX1l+i^xp1h|OA)k6kG=KY3sJWsGC>?#r1lorOX;cajepokq^FML=Qr$(d)!d$5BJNpNiwlM2!{R83AV^Y|G>VDa+Ha zLD_Sez9fC}Dfr#t;?xDrs-YmZUU4N@sX`Ws=w>dPvCbxzTH&3_Z!Hq&+bE*w!qN9l$t{9*84`BXh zQpq;j6H+^bk+&fd!8#D>ZHP>~Y#~f$nkx|CyV3N5St?!eefQYJ1iPl$uEi?4V^6r8 zdFEV=873j$ka9MZD~I0*xq1v|v1U1%q0Tgb=%BmqcJ%u2a3({jb#4W{7;NaG@obz;Hdctx04XLNdYVGh(-29L+rBjHC?3?-me;Wz7Rn>UU31U zq(Qz>Mwjx%*$p`xVjjxl>+<+-@>?%FJRU12iYErCjs(RGXUC_#2TW)ESSE4Fy+A_p zkN_YL7beIHm(2U2FU&qn;Dv@uQW&^><=4XK19>!LYfq4yshgH>*5HDE;5O8xH>A3F zZT*VoLE-eF+EzGQajXkI4msjMCUgU(UNe0$23}lI6-1E;inE&MYpz+r zWLUsC@(}c+bWugGKs~O^XfQec0)joAsi+2oX*|HefJi%>RT8<)Z8B$P&1>-MV;wRh zV6B7^R;{X}T|pgSWLSiBmcz;?E@T H#8UqSb>S3< literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_config.cpython-312-pytest-9.0.3.pyc b/tests/__pycache__/test_config.cpython-312-pytest-9.0.3.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1652cf6588174d5aef96b8f80970d3a0f46f9293 GIT binary patch literal 4441 zcmd^C&2QX96t~y2Uhg`eO`0^RXj5otHf=)Mq%DO6wPm+ar6xhNR48g?IcsmaiFdsk zJ83rTN)`0biVtz42M&?AY!nXt1NZ}wxY+s7MuQL)h(m8x!>JMyZ#;Iggg}a*2b7Wb zH}Ac9^Wo=tGr#?=wl>B=Ir7zd<|o8)KT{v8if{bFSL4iz>h`SUW`S7i?@$rdaN$; zL`<2vbY?Oxkj9YUKtx!t|YG~uaJhT*T&wxFq~A9qltIM zE{|Tj6z53+H%GPR6Z{p`i{c4#cg+ z5D~yeVc=PvYC&h)YdaISsPPO5&t^2;Bn?@0PDy7BU9mFrx{{rE{4K1Sh*?w94U5!h zdRm<^Y{k~+?0I?8$o35Mqz%=&Me2g4Sy-T%o3HU2XZ_ZQWt&)?)JbGwCN-_w7LhG; z2B&mIQ*CwLS9+)tj}b}D=X+Bo)-k<$NNiHK6&5ujr_|IfT}c_biirq|(sQ#Uc0OOQ zb<0-3)~KXZ!%(oE$0;+LSFui_ilwG?Y9VzOymyBMCNVZKX;RWA&Z@Se&*cp@r`pgG zj}kd+=BD*RUbR!VNGz3}R3HE|hE9Z8m_h&pkzr1SMY6A2>91B!F@?=>x>`AF(fhX} zD9>I`ym8~UiKnf+n$m9=>V$4ERMd<@zjX%8^c^h^{c!Z=3#8S1^MaZ+6#o-gieai6 z?t=+v{H)*Lvb@9nf_m0bT?MtdsI7uJUDUaXj%|os!zKQnur4)Kq;^+of6!kJJ5u|a z)cZuBO;1El-d~9wc4LQEVn^=0{Yb96H+O&NL5nMQIH+qO0pQ3T3kg^5sv!ET`39z~ zcp8(JsXr5|5nR?`)Z6z5odeyjeB42&$`*hlA1_<3e5!)zv*sI^w&H0_UZ(y`tVVF5 z!_vhbg2g%LTzLe*k-N$xu6(Y7=(FY&Ml zIaeO6Ao{HN2Bxie8k3i)KNG7FTiRE<<<4|c6Ew6%CU+r{At0Y&@2c?VWJ%B> zB~%oO$Rm>BVt7{~DFK1xflfl7=O&UuKoL#=u!QBAo8)!_8C$^3G*`g>Qx_mBw_bxSj&#thfnaF@|5F zGR=H^hyvUVaDrR94M!zDLM<;-c!k1I3Y`Fp5`LA+$0)=pbW=D^;RJ;q3cUc91Q48C z_%%>Shj#?gtHZlR*xdoMDJEP|7%mok7Tj8C=T8Cda6jyH7Cl1s71ZLQ7N@nloLq(U zjWusOdH(A&0w%#HVMYJD+(7>@HzHncNG~`yqMNx9)8rSD8#UFrLI3NLa3Xo}yhm^mTqe@QOvbP?Ibdm& znrv!q3L{6ojI02`>BahOoUwrzZJK~sGMjfmkdSE+k#+NEgqOdVMqrWi=3{v;4KiBv t7EVADoJyQU51S_f&+|WW++FUY==WUhQ?Z^;@=uRO`2LON2!GNW;CIcJe$M~^ literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_llm_client.cpython-312-pytest-9.0.3.pyc b/tests/__pycache__/test_llm_client.cpython-312-pytest-9.0.3.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9f328cacf8b1528e620fe3dadbbffb430677df0f GIT binary patch literal 4168 zcmc&1TTC3+_1>A;7yAGL*ddNB3byeQv#}fVvST-ZW2rT4rMh-vWNA8{8DMv3AH6e+ zf$U1yj_O2-lvavLB1KW6x@r)uluG^RXUJEj{_F;)i$POGHv$MM< zIJG~jUYK*wJ@=e5_ntYg{bM-XL|~lw#YW+OE;3XY zl^Al`K6N&mP<7MxrFGL(W^|xrGnR992?TQ{o~vv7*|R6FT+6bQnNw8#ilJQ94F}3f zLN;|a2U@OJx@nml%7&4aRc?e>ZozI%gOy}cOUbA*n*o6csP)5R;zwR0Uj*A9BN9x%KHZPsu0zRT5aEBKVZowY`(r7`Au2!u%Kq#?l zHqnG%i|$U56o@K-qr=~7)y5QT)hQhf2SlFP-MTbVJq zVQdkw{1zIIk7SFB z)n_-b=7`zRp#QNthl>>i*G*U>XS=&!pX;0%3h%FG*%+QTi=(ceB^a@>k?9h?i!m+5?b^3G{qR&K4+v_9|kP=}J z!86+t#RPp8=NSMVAPz*swyzovwxrG~mK{{H>71ck2?%>|bfznoZikp|tv@X1B^Jq)W&XIfjA?17_HkujUhml|U6n7C21%aoL>B<_(Rr$}>jxs$$3) zC9T^5*;J-=)L=(O`4QpWJOSHn@^qG^6-(CVbB2;p@Kh^49(Oz=vHf5OJBTlIF8A%m z#c^lhs5sHM7&I~7x}4#}bSJ+NngC9Smo?>TYGnh!y9XZgeE|2!S4-qCC(kdPyA{74 zuSk2#(%y>HQyCV&4v~@0@`q`unb+;fqV>HadDs zQtvB+rH=c^7(H#0;N|4X`oFxioAFb(sO#KN>=Xda$!ygSm3&uVoU zle;95`_fHa|89w)G2PTT(M5KQ`_hfi`expGy7AZ2O+agENH^F<@X?^@371x%A2Qtp z5>l)EPu6OGy}hhAN4qzd4_o<@ zSTlAE8M_-k8XRhufU;`UnYU!sz6F~#I8#lBs|y-lN%Lr4&9`eTe&sfA`(j?tOUk?{9u^_x5k^{qC30 zwRxa)sIv@NxO3aWY=S#@G+>8*kk43&w9X67ZoX^>&4i}Q`t-D}TDI3Ic~DrDgsGbh zd#fnrm8h{ZJjnf(chguFb^IA2XEVH)#v|vjg0WE$#=tRC0JzV7G4g!r+@aG3{2S3uvedkNjiS@{2#Xni{Pkt$qrn9sJ zO#aEQYKwoH7-Tm_+s8t5ZO;>9L|i*YN6&cIj!TFS1dx7)0{$`aj0Jojdwf8N?y>!D zHRmyGcgl`2VouG48^xG@jU}K_2-HZnuS#<=z<1!yb~v9&SZJXb{=ncrX?T}yANb}@ zop<4O8)wQgJyx@%^Brv8bYdP#w(~26e-m3uD=Y=wyt$B36Iouu8Kw$u`xU~l^K=To z|Jig_%NzO`HVHJ0OJ3(UMM~-CguF~{hCe5LcggUd$OQngA_i62O%YN7;{NTtpOKM3EqoO{oG-+R7u?q31{AA$7zZ%VOkUPAtXf|6!2(y%S!R?@0KP{s}+h`9xhZFcDz9GZ9SIPt;Q)yW$;Y zhn277dV}03ixZ8Y(^)Aqx%hUIt9&K5t=@)*sOPcOYk7!zUR%A54^hu&tJnGv_58Ma zZD(ny?q4|h@erqbPl(g8=!tZ6R(EDZEjkmTy8lQbCa1Kcijr19ta~A=YNIEOOq5S* zX+@ltS$VXwiiEg8aKLZnJdi6yg9BhBiQXvlgJgkdsKb)PNe+p>>AX?SS)iKJEHgJU zM+HkiN244Z7x=qTK4z4hBwI-zQRy5Utd|Idzq90w<0UdjL$2}pE-{k{N7;PTuxboE ztiXh(RKds&Po}kW|9sPore%hDdW^E61HHYyx?@&8tGiS=s>oW%p?l<%lu5@@n(h;$ zQCU?5EcY4Z$cK1^hv6%@4hjxhcL-`ftg>VAehew7^&83xDH6upl zQwecWP8d`WV}dFxr(=+<$*QI@DwKzBjynTmGp3+4L4$A)i8=)29J%Wa=Ii$_c@Ny0 z_*-3DsU=iueXP{Fzw}?A7oDa0hHvVLZ>Z=!@R|3(s_KF27q<*L zK5`y%N3Gq1OgH>qhu=yV#(o^u7%n!o?JxHRx#=+XI7V|MUfv(5W2HDQPF1q)Es5h4 zs*=P@PIIqchkgGEEbC?NisPqFCt0A9Yk|wr95>rd$buv1P=BrY%?9FiCRh1N?wn(a zN*=5;#oZ{6NAha*rcS)U-ZJ^s1Dz&beaO+)$+kVns7Wq5sOLYw2?4 z%1XX>xOq5T3%ul)>T-OHGRdj6nmyQBYBTdH-<;EI$^4!N!+eG=IA9%r@SepVt+Z%z zxKB3&=1$=q+(#~c_PzsdVQ^gMpGr$-qo%I;Z3boq_uvWvo5YQ;;N72*6N&V7qTq~H z;7623AiC#ZLQGCd;-UF|yb*$wo=YXtq9jb71zCU#npIM&A%zbn(or#?9sd~Kn}Q1ic8phKQCjt@>8ui!cT1us&IdI4jMjr4gtar8vJn)o(^YPG_&S|$+SZ*p z+Z$3lhKDBPz09 z2gSl}N*1du%PRZDU9*nRt$I>Zg)b(az zDy<|%O_0xI5@Jf!phRyqCdN9JB_W-P%EDYsn+ds?{ZY2yZ2UL~LvfE?=raVP*#Wz- zS4DH#!IH7pk3VpSmHkkq1%B#>K<<-o&ym0H=>0HMh&;cv=Y@ReBEPA? zZ(8EFmzp;fo4X6m-PhW0)h{-W6$4}EMoTU2dA|L^-b*hQ{Ov^^|L+*GAy~A4xrVe@ zVVC&!OD|eF76Zy9-dE|p$oCcazFX~|K9}eF?(nCUoh0})T~*O~oIfq2W=oo7MkUuxw|T(_ z0Ro^e%WPIENFK>+YFP%`VHj-Y!>s{tW+npv4ZxZC@Qv$E<5YUb(1;}ZtHmElOS0}h0f-E4iS9inYtOO!l*L89_x{y;=dazp^|Sk*d~)x-KivQDx7Vre zgu@-3QP7ah`;#$MjisjX%7;7(8XqlJ`WuaS&gG z=AqaV`PLG?Tj_<~&PQC4A$r@hF%ov%-b^FAT(`II$Zz#x{w|7q*csX9zTNMJoHggu zathjPPJsmH5l#lQU7c#V1cK!zKpkwaDxrJ0?le{4pIZn2-+PuNCxd@i75uxkfZ1&v zO>$LQ1OJuwYd8uOPzU(0XnwDw0PydAfNll;Jr?|XC7%uc(G;>|YYD-tDm?)HtF%$9 zwP*27>lK+B?>Bd94gA-Q>wzQblm-#SvvO)$n~4!XV&m!sPs8O(8-Sb43@DF5%gSaT zI+C|Z8%Y>R4-!22N*|Iv zNcw@8Aa)!x29OLQ*^6W!kaZxnlU?tsE-gPKYD`#2uo)}k(86|X<@Z1gxY{{%Yb3uT zVqWP-3j8BWd{?Qlwb`7kDVJSHMi#Z)(Zod2HrYY}5hNo>jv`?|bOO0iAa)oE)jp@SaOioc zikC!1>uumrXkaZ4?E)OyVd0Q*Xt%Gyp`)LBM;Q)v7J{AczMLO8wip~MddBjeF~pu@ z07RoD&lm$DCVZ!iL3g!*rPpCPJ~4DRK(9L0GEff$m9pz(#($QZQU(_0+EjN0a;?5< z#jGY^JXAZr^&KwI9IY9}I?)0za1z9AIWFg5(J!Cl)x`J5v63stL-gy$I+f_xR;LzF zS>W>kJuCW`OptO)2vW{+jOx-VAZ59Y@me6|GGkNoTaHmVh*No(aqTtnm)C^*UtAM( zqc72muP}$XZoCYxQwoH6YENHJZ#bTrR?#7eXXI%k)b+)Z;4JrGhJw(4FAAb>(TN$^7o&Te(k9E&3*k{6wCgus|18i~Ozv zziWvPmzvtH?D)lwVpCV4sjJx3S7_=xcf91^SoF6;bb58B;P1-w`>zcH$@{ym4Hx|T zi#-0{DQ6hcB9jdMGt@U&I|Y?WyK=$biA{FzXAO4(v$kc-u{GUkHEreF+g{@LFMEme z$MoN2uVHi}cnzb?{KrQ(0~^|;d&~z4=9B501S6sf8>a%E8N!M@r^GY}Ga1R_!P_@ZuMrSSJeWZ z6x}@)I|J`bvT+3Yq^Qj5Zc#m(ipJ9HNrHuP%-d5COqExW;Nuf}b24ADl4&WMkPj&d zD8PVG{VR}V2c`5&Lf#;+2fidbz92jAk)bchu4NbDJa2A(ZSz;0hc>P_2-mQT-10`u zfAqCSi*!SQZupAh>B|(0Tb3ES9IPsL(x=RFWS1MN%6Tf7<;d2k|Fl`|q))G}@1h4- L_g`_qvHAW7Th51! literal 0 HcmV?d00001 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..ae81f4d --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,39 @@ +import pytest +from unittest.mock import AsyncMock, MagicMock +from fastapi.testclient import TestClient + +from app.clients.llm.base import LLMClient +from app.clients.storage.base import StorageClient +from app.core.dependencies import get_llm_client, get_storage_client + + +@pytest.fixture +def mock_llm() -> LLMClient: + client = MagicMock(spec=LLMClient) + client.chat = AsyncMock(return_value='[]') + client.chat_vision = AsyncMock(return_value='[]') + return client + + +@pytest.fixture +def mock_storage() -> StorageClient: + client = MagicMock(spec=StorageClient) + client.download_bytes = AsyncMock(return_value=b"") + client.upload_bytes = AsyncMock(return_value=None) + client.get_presigned_url = AsyncMock(return_value="http://example.com/presigned") + client.get_object_size = AsyncMock(return_value=10 * 1024 * 1024) # 10 MB default + return client + + +@pytest.fixture +def test_app(mock_llm, mock_storage): + from app.main import app + app.dependency_overrides[get_llm_client] = lambda: mock_llm + app.dependency_overrides[get_storage_client] = lambda: mock_storage + yield app + app.dependency_overrides.clear() + + +@pytest.fixture +def client(test_app): + return TestClient(test_app) diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..aa8f464 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,40 @@ +import os +import pytest + + +def test_yaml_defaults_load(monkeypatch): + # Clear lru_cache so each test gets a fresh load + from app.core import config as cfg_module + cfg_module.get_config.cache_clear() + + # Remove env overrides that might bleed from shell environment + for var in ["MAX_VIDEO_SIZE_MB", "LOG_LEVEL", "STORAGE_ENDPOINT"]: + monkeypatch.delenv(var, raising=False) + + cfg = cfg_module.get_config() + + assert cfg["server"]["port"] == 8000 + assert cfg["video"]["max_file_size_mb"] == 200 + assert cfg["models"]["default_text"] == "glm-4-flash" + assert cfg["models"]["default_vision"] == "glm-4v-flash" + assert cfg["storage"]["buckets"]["source_data"] == "source-data" + + +def test_max_video_size_env_override(monkeypatch): + from app.core import config as cfg_module + cfg_module.get_config.cache_clear() + + monkeypatch.setenv("MAX_VIDEO_SIZE_MB", "500") + cfg = cfg_module.get_config() + + assert cfg["video"]["max_file_size_mb"] == 500 + + +def test_log_level_env_override(monkeypatch): + from app.core import config as cfg_module + cfg_module.get_config.cache_clear() + + monkeypatch.setenv("LOG_LEVEL", "DEBUG") + cfg = cfg_module.get_config() + + assert cfg["server"]["log_level"] == "DEBUG" diff --git a/tests/test_llm_client.py b/tests/test_llm_client.py new file mode 100644 index 0000000..39a586b --- /dev/null +++ b/tests/test_llm_client.py @@ -0,0 +1,40 @@ +import pytest +from unittest.mock import MagicMock, patch + +from app.clients.llm.zhipuai_client import ZhipuAIClient +from app.core.exceptions import LLMCallError + + +@pytest.fixture +def mock_sdk_response(): + resp = MagicMock() + resp.choices[0].message.content = '{"result": "ok"}' + return resp + + +@pytest.fixture +def client(): + with patch("app.clients.llm.zhipuai_client.ZhipuAI"): + c = ZhipuAIClient(api_key="test-key") + return c + + +@pytest.mark.asyncio +async def test_chat_returns_content(client, mock_sdk_response): + client._client.chat.completions.create.return_value = mock_sdk_response + result = await client.chat("glm-4-flash", [{"role": "user", "content": "hello"}]) + assert result == '{"result": "ok"}' + + +@pytest.mark.asyncio +async def test_chat_vision_returns_content(client, mock_sdk_response): + client._client.chat.completions.create.return_value = mock_sdk_response + result = await client.chat_vision("glm-4v-flash", [{"role": "user", "content": []}]) + assert result == '{"result": "ok"}' + + +@pytest.mark.asyncio +async def test_llm_call_error_on_sdk_exception(client): + client._client.chat.completions.create.side_effect = RuntimeError("quota exceeded") + with pytest.raises(LLMCallError, match="大模型调用失败"): + await client.chat("glm-4-flash", [{"role": "user", "content": "hi"}]) diff --git a/tests/test_storage_client.py b/tests/test_storage_client.py new file mode 100644 index 0000000..d124563 --- /dev/null +++ b/tests/test_storage_client.py @@ -0,0 +1,62 @@ +import pytest +from unittest.mock import MagicMock, patch +from botocore.exceptions import ClientError + +from app.clients.storage.rustfs_client import RustFSClient +from app.core.exceptions import StorageError + + +@pytest.fixture +def client(): + with patch("app.clients.storage.rustfs_client.boto3") as mock_boto3: + c = RustFSClient( + endpoint="http://rustfs:9000", + access_key="key", + secret_key="secret", + ) + c._s3 = MagicMock() + return c + + +@pytest.mark.asyncio +async def test_download_bytes_returns_bytes(client): + client._s3.get_object.return_value = {"Body": MagicMock(read=lambda: b"hello")} + result = await client.download_bytes("source-data", "text/test.txt") + assert result == b"hello" + client._s3.get_object.assert_called_once_with(Bucket="source-data", Key="text/test.txt") + + +@pytest.mark.asyncio +async def test_download_bytes_raises_storage_error(client): + client._s3.get_object.side_effect = ClientError( + {"Error": {"Code": "NoSuchKey", "Message": "Not Found"}}, "GetObject" + ) + with pytest.raises(StorageError, match="存储下载失败"): + await client.download_bytes("source-data", "missing.txt") + + +@pytest.mark.asyncio +async def test_get_object_size_returns_content_length(client): + client._s3.head_object.return_value = {"ContentLength": 1024} + size = await client.get_object_size("source-data", "video/test.mp4") + assert size == 1024 + client._s3.head_object.assert_called_once_with(Bucket="source-data", Key="video/test.mp4") + + +@pytest.mark.asyncio +async def test_get_object_size_raises_storage_error(client): + client._s3.head_object.side_effect = ClientError( + {"Error": {"Code": "NoSuchKey", "Message": "Not Found"}}, "HeadObject" + ) + with pytest.raises(StorageError, match="获取文件大小失败"): + await client.get_object_size("source-data", "video/missing.mp4") + + +@pytest.mark.asyncio +async def test_upload_bytes_calls_put_object(client): + client._s3.put_object.return_value = {} + await client.upload_bytes("source-data", "frames/1/0.jpg", b"jpeg-data", "image/jpeg") + client._s3.put_object.assert_called_once() + call_kwargs = client._s3.put_object.call_args + assert call_kwargs.kwargs["Bucket"] == "source-data" + assert call_kwargs.kwargs["Key"] == "frames/1/0.jpg"