From 0274bb470ace469ee6102afc51a4537ab3019400 Mon Sep 17 00:00:00 2001 From: wh Date: Fri, 10 Apr 2026 16:00:08 +0800 Subject: [PATCH] =?UTF-8?q?feat(US3+4):=20video=20frame=20extraction=20+?= =?UTF-8?q?=20video-to-text=20=E2=80=94=20POST=20/api/v1/video/*?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - app/models/video_models.py: ExtractFramesRequest, VideoToTextRequest, FrameInfo, VideoJobCallback, VideoAcceptedResponse - app/services/video_service.py: interval+keyframe frame extraction, uniform-sample video-to-text, HTTP callback, temp file cleanup - app/routers/video.py: size check helper (_check_video_size via head_object), BackgroundTasks enqueue for both endpoints - tests: 6 service + 4 router tests, 10/10 passing --- .../__pycache__/video_models.cpython-312.pyc | Bin 0 -> 1869 bytes app/models/video_models.py | 38 ++++ app/routers/__pycache__/video.cpython-312.pyc | Bin 252 -> 3432 bytes app/routers/video.py | 68 +++++- .../__pycache__/video_service.cpython-312.pyc | Bin 0 -> 9774 bytes app/services/video_service.py | 189 +++++++++++++++++ ..._video_router.cpython-312-pytest-9.0.3.pyc | Bin 0 -> 6454 bytes ...video_service.cpython-312-pytest-9.0.3.pyc | Bin 0 -> 17283 bytes tests/test_video_router.py | 71 +++++++ tests/test_video_service.py | 195 ++++++++++++++++++ 10 files changed, 560 insertions(+), 1 deletion(-) create mode 100644 app/models/__pycache__/video_models.cpython-312.pyc create mode 100644 app/models/video_models.py create mode 100644 app/services/__pycache__/video_service.cpython-312.pyc create mode 100644 app/services/video_service.py create mode 100644 tests/__pycache__/test_video_router.cpython-312-pytest-9.0.3.pyc create mode 100644 tests/__pycache__/test_video_service.cpython-312-pytest-9.0.3.pyc create mode 100644 tests/test_video_router.py create mode 100644 tests/test_video_service.py diff --git a/app/models/__pycache__/video_models.cpython-312.pyc b/app/models/__pycache__/video_models.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5792e1c74c6585700f08e1edc9599eb7c9e62aea GIT binary patch literal 1869 zcma)6&u<$=6rT0|v}4C^;sP-yLT-|35pXF72vsF$S}388R0;Lc`*o7I^>gL_TbKJz$WcFh}VRs|S!e+W{ijy;tvrnP_lrYlJC~0KA zBur-p)vg1}Ot3O9EFCRsG%V#|s-D%bm6wgZuH$B9`PPBpepB4$ev2jd*rN_hgv{;- zA@gEiJdoKW>hLD>0xGRfZJ~}MJF>yQ(@Qj=RlD}?jZSH;>@rOz5 zH`#9J@3AmN&kwu=P80y`$MG(nDol1C2b4v)F?8OIyBzbazyKpz2l#=Eic5#}6!OKx z?NO=H+d957G%BM?wO8-oJTbKAZNno0wiflo3$w#b!jRgWHIJZD;Az$ ztvK`rbF;jF6BiMR2qlC|2ulFc+z%sP@Cp*IBP=5*O)jJL>J%_l$@>8RR<&3j8s$*| zrGBm2yLEi6Uw*PVG^(T8MsKJ8#o)b@#;LK9E{f}&FZA0ma-q;Dsw(}i;hHk5>iSN5 zKava60;TLgIwF9=M1gWCjkR8P?RhOwfmDksT1uL^__6vq!YlA6I053nqyk=CKHM3V zmXCJQ$k6Hh$S90l6dBdwD%;Fi0bi<5N<|eCjAWVlf=f$EZ{%h;OO=oA+Onj|=d)<< zrXPlTe)C~kG8UAK=p=P%DLoRo5OqY{5$PjPV4O$XgBK$4zh>^jf6Js4!mA~XFo3(Q z)Y*VOzlt!Y-4)~zP~GH10MJg|Lj~(@@AmOlf91)}fc}!N4Gnj+TI+51m;2(RGBj$V zH`jW1`_~5Llg(3OEfpD8J744)j9esgS9cA0SxNL!v&mw?=slLi@G3EBPr_G%)m>{| zgn!T{ER7MyOVr z+d?|m(?|9Oe+OJN_>(PwKXgsgej}@6%hcW&d@v@^j7zq5b+9rf(2UEvR_2Ekt+03q7AX&XtKel9SF#zkY7yCxJavtOKz-tzJF^?d zaw~xlSDG{DoO|b-yL-Ox-22Bsz(-)*y|$9Q=q2Q@Xw*lOqpW2FLaq>vXo601LQ=?y zNiioSB>{b+F6We_!gWb^<=ja(*Ja(4^CrDqS9D*_pY-RNl1497@8RWWKZ_b61 zp@Xf;>a3o`zpSras+NuTM3DP#LSqo)+TFY5E zxrN5Yzih8tFSRgj(IRg|=Ox<7)rp;E0)L>KJV}d}Gv1rSp@S>{vpyf@tY_E|umV zSYfmc`xD0pLBhwf8qE*o^MfiIqmMC`XRw{a*Y<#6V$Z4nXFDz z1=T!mxpL~sRBqIAF=`gspns7=XfO3=*1B9?Grh^#piY4h)qDdtg<{^qL!6W}vsaN@o zFhzREOPy~@&j|_TP?o?xi=B}1_Uq^F{QUB*m(Sn4alQK5>(zxzx3B-8dj5BJUYV)B zaQ&mVUbyv>Y4CRxk#Yz7K6>k=>hFJCz3`)sBJ6}aEn2fl9RVPwvk5f*mWtuP&TeIKHUA^Jw|0k;;LQvPvrlXnB98oS7&$ z>1&eM?5h#c>#Iq`lK>I!6FxSYA<>!EUP+i0dIkBF1ZbyXd-#_20MrnlT^SD0JAZ(s z2zlmUGL_3~RJZ(wshULtIH6HXV)R)nG}<6n%0zah8zqMjE_gV&_5z@6aG4Eg)f>cV z38AwUmTDrG5=wxSq{-Y?xUH0=@%6D`4GztfC3RSCI`7sz_4(5-6N|}5Nd(A}aopjC zQBvj|f^3AjKuW?mwE2z0y@6%J+oh%8(oSdmLpx2NY~ZLMKF4&2XFe~vyaamyL#l; zBiBY2_bvJQm*xJl-2b5*U6$J`a{GIp*s>>4@g!C}{uNL2iYI#4t0*qW-}FE^F<3tO zUPm zCjG5X;}JCBO?ae$cRi0>C3#bx{3kqe-^U~66L{3X{Qrzc5-`a5_=G; z;Gmf82K^q2Sr?idz+t;poB0%$WFvk(8}Q4%h#&4j!B3KVAV#h@Aa+pf9OO=9_7zxw zKsJ5_!Ul2h{|VyGil>u;`jcaN%Yk-_F`ZCO9%++yr$+Xlnd&3KCnVuU7K=iIgo8xt=3 zT(=~{WR{ejwELeW7xLi18uDUZBWs`%yJdO44$+G`-OKvHhi9L$3uH|a1mOeHQz1Q7 zvg>{FrT580ACLi1{wd0WxJp3$!$*YA{ly!eZJYJJ>wR$ARdXqVZ*HVU;I-J#U+>tj z@>9dygnPgm&elRi^qh}>KVA-ew*2t^@{`}4jxPzrs~)mrXL)DOQu`yT?y#T=E8g~+ zgvy7lJ8KFmH5UoSYkaciA+Gq}z35$&h&UqDM80BIBX)bayTAOMA&h-=UF<){w$DDj s6x+S(jq)7vnuH3^fr^uZs~+Nu)cCrZ57YV4i|LLDt0L;G5B{@%1AD||$N&HU delta 204 zcmaDM^@p+kG%qg~0}$jazLDt!q#uJgFu)9Dd{zN6rZc24q%h_%3 diff --git a/app/routers/video.py b/app/routers/video.py index 136e997..efdba25 100644 --- a/app/routers/video.py +++ b/app/routers/video.py @@ -1,3 +1,69 @@ -from fastapi import APIRouter +from fastapi import APIRouter, BackgroundTasks, Depends + +from app.clients.llm.base import LLMClient +from app.clients.storage.base import StorageClient +from app.core.config import get_config +from app.core.dependencies import get_llm_client, get_storage_client +from app.core.exceptions import VideoTooLargeError +from app.models.video_models import ( + ExtractFramesRequest, + VideoAcceptedResponse, + VideoToTextRequest, +) +from app.services import video_service router = APIRouter(tags=["Video"]) + + +async def _check_video_size(storage: StorageClient, bucket: str, file_path: str, max_mb: int) -> None: + size_bytes = await storage.get_object_size(bucket, file_path) + if size_bytes > max_mb * 1024 * 1024: + raise VideoTooLargeError( + f"视频文件大小超出限制(最大 {max_mb}MB,当前 {size_bytes // 1024 // 1024}MB)" + ) + + +@router.post("/video/extract-frames", response_model=VideoAcceptedResponse, status_code=202) +async def extract_frames( + req: ExtractFramesRequest, + background_tasks: BackgroundTasks, + storage: StorageClient = Depends(get_storage_client), +) -> VideoAcceptedResponse: + cfg = get_config() + bucket = cfg["storage"]["buckets"]["source_data"] + max_mb = cfg["video"]["max_file_size_mb"] + callback_url = cfg.get("backend", {}).get("callback_url", "") + + await _check_video_size(storage, bucket, req.file_path, max_mb) + + background_tasks.add_task( + video_service.extract_frames_task, + req, + storage, + callback_url, + ) + return VideoAcceptedResponse(message="任务已接受,后台处理中", job_id=req.job_id) + + +@router.post("/video/to-text", response_model=VideoAcceptedResponse, status_code=202) +async def video_to_text( + req: VideoToTextRequest, + background_tasks: BackgroundTasks, + storage: StorageClient = Depends(get_storage_client), + llm: LLMClient = Depends(get_llm_client), +) -> VideoAcceptedResponse: + cfg = get_config() + bucket = cfg["storage"]["buckets"]["source_data"] + max_mb = cfg["video"]["max_file_size_mb"] + callback_url = cfg.get("backend", {}).get("callback_url", "") + + await _check_video_size(storage, bucket, req.file_path, max_mb) + + background_tasks.add_task( + video_service.video_to_text_task, + req, + llm, + storage, + callback_url, + ) + return VideoAcceptedResponse(message="任务已接受,后台处理中", job_id=req.job_id) diff --git a/app/services/__pycache__/video_service.cpython-312.pyc b/app/services/__pycache__/video_service.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d6ef12d67c93ba6e3f49edddacc64a4c3cc3d8e5 GIT binary patch literal 9774 zcmc&)dvFxTneW+|+4uV?Y4rjLfp`d85Cjrn3n39%NMJ<#kg+$*&PeRC+TF~KK*+N` z&h|O03Rg&VoDpXngwQpcRr|G#WumwdP!+B$aT2NbN@Z30`3$rB%eTWO>Q8-Yz{U4B=MjYa( zDC$#ls*h62nm&z^(|xp(Gkpx?nkYM{?bA}KtgcU|@brCpB{%dLA*Z9JL35v3kzt~i zL2I8?q1mWy(B5ZPXl>Ln=lLyDb{#V~0?x zgyOLMN5UK*-yh%44~v9N4+&9Omk1t<#)F)!6?kz- zh=nxj>nf=M(&ylJ`yntw7ouR2#DX~xiY$~YG~%cW8e*`G=sB3er)h*<)SRYYU|E#X z7?7lium#}-nS>_z3!+% z_oL_Vs2F%#8jrdOu_#F$)!>#C{5w{Vew{u_)$4jb!)qU=Q7486`S=i?a3+$B$7EXw z*2BSI=y0Gv7>@E>y+**JmRVj9;=*|f2t3vGI+;Briiu&_)S5gN3#qF_W)g8c@9o1O zJ|TudomO2Uf(z@QlcG@1DEb65u3)eVndXN>Nfl3fThVt z)ie+c2a>#SBn;KTMB+e!P98Xdmlk0)B#z0}03Jqwyopc+x;^kqJ`2gG=*#11&gP!A zRcCC~Q_fl2+F#n%PBVE8ZRn+PPVedZ@%o9j$$_!@tg~U(+%RKq_%(~{>p(DLTl?ir zbHi`uY$abH3dHBk&Xc=Gch8irzF6~f`mMIB`b^`)>D{yDeHrt<-{xs7^Hp94t-plk zs)pYskH9dV_3x;mUZz@Y+V9gqy+ZA<(m(XJx-^%lR)_W<8KC~f1dPiDYKNJ*Y}8`f zK|%Fpmtlv2yz8gq;zY-%Ew&kdZyHc$cPhB0evaCqj;PQUN)qQ6z%N_OMQ8+Ogfn58lp{90;JwT5m6hxZ-mCInKADTivU_>ra#9zm zB~&pd7D+iJ%W;}B!&vSUYY0D5N2p@HfD2<^{79f@1lA1EOY)RUa!Ksro%n4fXUZZu zgzt)LNNcc>aRhb>XC-fvaxbvRS|V4>Ih$lBvdWxzQgBU!r5lPegn9{a4#^Cu?PWIQ zgFSLT;S_U;k$jRjvY}W*C^GY$^EG_5!n1h4!0I+dzM=9KreYH*!TY%--v!reg?&t{ z0xiv)dj$4Q$}Cw6ly!_c(FUt<3u#@<$yi{YG?UuMHbND1&I6X%UMv$zY$cRrB4gmZ za6kHpEkSeJyo_dPEnJCY+KEo;Lz)2;(mVD{K;X6CIHrLKXAK!$q1<#gjqsiru6(vX4;3yeNt9a{R9ejcHCu_~u`D&qLj= zdB5Ox2+j*U{-fAG*$=)+2hj+=37;xiFdv9)sn%Bfs)`-~A7!%c;85r=FD7ME zGCm}P_y89agEFf)Dzfh|f2<$-$pJ3h-yaYU34HQUJjxBIM&174S4_iTVq?d7jjT-$ z_4kK|Wi7`?d6B=RN^E&hz>eyESs#vx;1C6)pWzJ(%_50`5uKttxn8E$j~K&);K8qt zB=`Yv(oJfw;TXpc%X;kifzuR{jcRowC?5I@JJ+&H@$f=oK-CoB;xWD+1`~-N41~c^ zSE~hlQOUaAZ?(0x_x7G=g-y7uO^QKrC@J7GL^gqAnHUlSShZ|X9LNB`gCvYe_AKf| z^)}CI6g8^5sO@O&+}*yjezkzT0GS5-AzQil(HO=HfrH0HJ}Dde!AVs+(2M+F0vBax z4_KGm4@E#Q!m$oq#~u~JA}=#BV99joNTY04AW2&=0kDIY&26oF1AF)F*&FEC+bc5y z50HczjB~tfC1z7?E$c%^#I|@eF38rlJ-hep3+&jnuW{GD*2iUSFex5O@UkYBkahhq zN^wIYDB*)KnGPOI%B&C{ig7S)gM2K6RT~P+1FUKuN-TISofuc&PYa$dAL9h<*~mHp zmO&8wUVJahOc;QbYIQow#}qGDHsT>i1KiMHLclJVVoAY|kwES>539C~( z6pMyqhwDp)YAlMEu7IyyuuuPhtW_7WY?xo6GA#}!WRvP|tK*~kK{yCLe18H~dtt%g zfz7{=y4(y2fj<%$5Q4{G%9O<)=7zC1D$1-%Av1~;gExoJh(|*OWcuJxzkqK! zp#qX5elrXfmV5;4Y234j#bENVFa#V$K?`gc1qj?RB9rHPYmc|(EcUU|(WfRlGnTr^ zRL1g+;~hD>Yi!rJdDdQ?u~*M5YreQMYi~W?dDCb;-j>&NP^>>!UNc+Xm?>|Z-gI%p zo7*mJ%a(V}dOOpmk1U>9%c_iJ)wK5F&@V0RZ#i;i%gJ4%yT-b%o2zq9*Xg?Px^x@h z3Tp>-M~`gIS!-p+S~-!-T5HmD&gwWhFgkE5H0!O)cCPi+Lo%KEe^=POB@ zbMBI~A?Nm_4L7aMTxt2ak~1YQm5n}>ZX5Ib%H{o)-E-6HPw&dvoF|7zhbNlfv#rSa zYbTSlE4O6ezkkcchco`|~b)=a@0)t(f($&UjZ(ZJKVrn0)i;OHaR%dMldsJ~3;5V#fYN&hDSJ zFVEPQ|7c6Dx;|I2B3D&^N5|AU?;ytE${UfRVv@?(myMZknmrSa(VleY=Pk(Q%5O&o z^GVaF>4fDYy%iHP?)sd!^mJrAGUqLwSd;bEea_O2#5R7}=da3DEc@J~U2D9Jv^LYPk=9_!qcWRLDA?i1`EbSRf`&an~Ddadnef32h z@@>uQK+SLN*pb=u1=6#oIT*Nk)I8%^3x*lFG;)3aqce{^k==hFv+>Ea`MUn8IcxcX zAy?+SWiY(Cit0(%OOGuY;(y~I{x;8o+TVWe{zFFkyX4ETk)B=GUQ^ixDAdqi35B1nq`Lgf&sOa~PM~%uLW|Q{L${N@x~|pJZK1DesBTt!jb<^eXS+S0}2SWwV7Y2)v34YJP@3t1=F^gkK#Nz|($}E9sS*ZQP5UmzGriH!Yp5X!H zrU$Q?;dJ=u;`BI8LC`RzS0Yk?u@^rQ3z}f0i=VIaKvM=0zoGI(aByYO!x=c^1ruov z=+`J2Beq3CP?QYBql57pBlEblC|U4^=W#gr-Z;9WJWHWhNl!GUOn^ABtFq`3=P`4{ zyNCrv(z<|t%_*~p&8IwIqa`q^WFCPSA8D7e+$FyhPQ6>+D&l~Vs^6;OLe4@gm$HfY zo>aN>xE^qYy?`s=4FG2^>=RHo9*?BOSdols1T+A?C}99urtC#@D%KX1qsU4^6?4f! z+LJuxfLMT2a_mG;z63qHz{2%Kc|t9)@M=<5%q16jI|UQ2D{=@$_J@L5-6@acPEsef zESUEt@GEDPJVdY3&YSXy^B6XQul7ZclstE2%B!g%#;am3~dHZmE>>EbNQ(N>0vq zRx?1Q$^g+nP}D^zNhg&Nt1U*EDSvS#7cp_A8_=~`^XF2eGbgMjBYZCnfZn!(snkf7BNjvjsblYh8%YU-nluYdC5cR&5f-`;xT zg-=eN{PfM~Pv1QA$&dg3(^p^k><8nw{^t3QU-*lUe|+gb|LMf7Q|CTOkKKCXuRi(y z^Bw|GLd1Mum}x2E#GgsUkr!9>A4_IDrb_@oB0=2!zd;+JZZD-z}~p zegVh(1bh(IuUBxs@F13D6-x`Pn2Ap{$Uwd-wDPI`*}8 zxAzJUVfA>;10GhUN;Vwav{A+H0zNr#a0Y{U;c+b4hihyJ(3=p!DzJcp5Opwx4h6|# zftdjOFW}=xz^4eHaU~d_fO-L+JE{e-53&ft3s2#?08YM*lOQDZRVsKtf-B|`yl@Z~ zLy*Ykh09oY9@q0Y>Bq?cP7dKD3<-p*6yvFY+&6g##J*@yZPd>tZVD>-8r)j1GD$c zng0Q+12KVUzGr8Vc)omn?klPO!5^){qUb8g=Rea>{oG#Sd2 z)MwqR(}p>SS9DIAGVV1~4`=P`(<}rxy#9%;*FCFlv$V_jfoIhi1z^)sa<*n7IJx3X zcxr8?a#OafDQj-Zc~<3FWU%KENCM<_`A&(aQ{$=0N3*_l@F6s6h@8D-&Qo&w!1#g5x~Uae&wZmE>DF{|&RH_+T%K_*pWN_j z%PTEE+?I9TKkM9_ac<5!o3U=!$)3@k$*zk-Gv5r%d^49~0@PHS^|+ez!Fby7R6&G?$G>o?!5sF~Jf+?!`yThjVm zdDXc+XZB3BPjAkaZ=LmS1(2^ro|3c7Il~#lOQx*57Q!keHIr?xcE8d+`M~tD>CWkv zi`&578T4OW*8fgs1JWjxb2AdjO zSOu}6=1a{pj|4OK9ZZ|9>qB>=^C|_M*ZuE;2Y*H_2p;_I3op!Kel7BC`!a8Y(Lnk& z0faRdLvKbdMXvWgGV^F(w)ct5h66Z&@Z{Y?{67yM{2gr5XItsL6m?l!)$Kx;S5Uh* zGnec9KwqIMcel`2s&<&6@Y6N^ZawpF+MNz4yzQg9EzH{`tz}Snr;+NmG4E_>)j{EE zGu7>2uC_3keqame{rP4J=%2T+-K_2!vI2e0hHI`ll+38p^dNyj` zZDcXs#O`6xyIWac|AL}{{sqL6i?Rc>DmBL9FxvUAt)x%i1mQLX+EnMH>*nj{> zPvK?)BnTH-3_`KT1nkGjZV2W!gp@z9Nj5-yq5cO#60iUSn(QQbPdhF`Ag07(`zg_q-)KTzDUV#Mu(t)CGAj=J8Q2yI)Ak+KE{~A57fB#KOS)Rp2UW;7byiOtYNNc=pP{@8ww<7M()0ntvaOPP|fVkD4*C`|% UZr;2>A&t=TjtP^(>+l-?1M2;+n*aa+ literal 0 HcmV?d00001 diff --git a/app/services/video_service.py b/app/services/video_service.py new file mode 100644 index 0000000..1851c0f --- /dev/null +++ b/app/services/video_service.py @@ -0,0 +1,189 @@ +import base64 +import io +import os +import tempfile +import time +from typing import Callable + +import cv2 +import httpx +import numpy as np + +from app.clients.llm.base import LLMClient +from app.clients.storage.base import StorageClient +from app.core.config import get_config +from app.core.logging import get_logger +from app.models.video_models import ExtractFramesRequest, FrameInfo, VideoToTextRequest + +logger = get_logger(__name__) + + +async def _post_callback(url: str, payload: dict) -> None: + async with httpx.AsyncClient(timeout=10) as http: + try: + await http.post(url, json=payload) + except Exception as exc: + logger.error("callback_failed", extra={"url": url, "error": str(exc)}) + + +async def extract_frames_task( + req: ExtractFramesRequest, + storage: StorageClient, + callback_url: str, +) -> None: + cfg = get_config() + bucket = cfg["storage"]["buckets"]["source_data"] + threshold = cfg["video"].get("keyframe_diff_threshold", 30.0) + + tmp = None + try: + video_bytes = await storage.download_bytes(bucket, req.file_path) + + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f: + f.write(video_bytes) + tmp = f.name + + cap = cv2.VideoCapture(tmp) + fps = cap.get(cv2.CAP_PROP_FPS) or 25.0 + frames_info: list[FrameInfo] = [] + upload_index = 0 + prev_gray = None + frame_idx = 0 + + while True: + ret, frame = cap.read() + if not ret: + break + + extract = False + if req.mode == "interval": + extract = (frame_idx % req.frame_interval == 0) + else: # keyframe + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY).astype(np.float32) + if prev_gray is None: + extract = True + else: + diff = np.mean(np.abs(gray - prev_gray)) + extract = diff > threshold + prev_gray = gray + + if extract: + time_sec = round(frame_idx / fps, 3) + _, buf = cv2.imencode(".jpg", frame) + frame_path = f"frames/{req.source_id}/{upload_index}.jpg" + await storage.upload_bytes(bucket, frame_path, buf.tobytes(), "image/jpeg") + frames_info.append(FrameInfo( + frame_index=frame_idx, + time_sec=time_sec, + frame_path=frame_path, + )) + upload_index += 1 + + frame_idx += 1 + + cap.release() + + logger.info("extract_frames_done", extra={ + "job_id": req.job_id, + "frames": len(frames_info), + }) + await _post_callback(callback_url, { + "job_id": req.job_id, + "status": "SUCCESS", + "frames": [f.model_dump() for f in frames_info], + "output_path": None, + "error_message": None, + }) + + except Exception as exc: + logger.error("extract_frames_failed", extra={"job_id": req.job_id, "error": str(exc)}) + await _post_callback(callback_url, { + "job_id": req.job_id, + "status": "FAILED", + "frames": None, + "output_path": None, + "error_message": str(exc), + }) + finally: + if tmp and os.path.exists(tmp): + os.unlink(tmp) + + +async def video_to_text_task( + req: VideoToTextRequest, + llm: LLMClient, + storage: StorageClient, + callback_url: str, +) -> None: + cfg = get_config() + bucket = cfg["storage"]["buckets"]["source_data"] + sample_count = cfg["video"].get("frame_sample_count", 8) + model = req.model or cfg["models"]["default_vision"] + + tmp = None + try: + video_bytes = await storage.download_bytes(bucket, req.file_path) + + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f: + f.write(video_bytes) + tmp = f.name + + cap = cv2.VideoCapture(tmp) + fps = cap.get(cv2.CAP_PROP_FPS) or 25.0 + start_frame = int(req.start_sec * fps) + end_frame = int(req.end_sec * fps) + total = max(end_frame - start_frame, 1) + + # Uniform sampling + indices = [ + start_frame + int(i * total / sample_count) + for i in range(sample_count) + ] + indices = list(dict.fromkeys(indices)) # deduplicate + + content: list[dict] = [] + for idx in indices: + cap.set(cv2.CAP_PROP_POS_FRAMES, idx) + ret, frame = cap.read() + if not ret: + continue + _, buf = cv2.imencode(".jpg", frame) + b64 = base64.b64encode(buf.tobytes()).decode() + content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}}) + + cap.release() + + prompt = req.prompt_template or "请用中文详细描述这段视频的内容,生成结构化文字描述。" + content.append({"type": "text", "text": prompt}) + + messages = [{"role": "user", "content": content}] + description = await llm.chat_vision(model, messages) + + # Upload description text + timestamp = int(time.time()) + output_path = f"video-text/{req.source_id}/{timestamp}.txt" + await storage.upload_bytes( + bucket, output_path, description.encode("utf-8"), "text/plain" + ) + + logger.info("video_to_text_done", extra={"job_id": req.job_id, "output_path": output_path}) + await _post_callback(callback_url, { + "job_id": req.job_id, + "status": "SUCCESS", + "frames": None, + "output_path": output_path, + "error_message": None, + }) + + except Exception as exc: + logger.error("video_to_text_failed", extra={"job_id": req.job_id, "error": str(exc)}) + await _post_callback(callback_url, { + "job_id": req.job_id, + "status": "FAILED", + "frames": None, + "output_path": None, + "error_message": str(exc), + }) + finally: + if tmp and os.path.exists(tmp): + os.unlink(tmp) diff --git a/tests/__pycache__/test_video_router.cpython-312-pytest-9.0.3.pyc b/tests/__pycache__/test_video_router.cpython-312-pytest-9.0.3.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8d9e034a7c710c75df2d2533e939d13ac086a5be GIT binary patch literal 6454 zcmc&&%Wo9P8SkFydGz!={J<}~;ECB~8Fm>zf#qQm2#@S80STd%*4n7mo@$#hGt;B0 zwy|NXpkxoAM1tFLAQD##@;~I1Lvpdfh*2xjA|;XH8w&>{pYnax(^GA`*Y;r(x5vN! zs`{(y>aV(gzpv`=sZ>IO<3Ee_;-$DG{Syn`5o#Fk#=-bbA`+2H(zwjep>ntolBKdT z9+BNE(eWr;36)~y_;|dmj;p*rTuPL+aSd$cTWLH=B7i9p1x%9|U}j#<#aHo0S8^do zJ&<#Q;$8=juy_5aBuh1EI_6ywXQGe@iINzJKMp@rpEmBi5Q1lgKd&rH zzYR^}NH!b`VUk!-YT=r4^N=JhL~0TCXIt}}rjwo}&P1z4CS{VuHj|;JihG5m?2P9# z-Qro|{Ip@tO02`zf1B{|tXeZB)bF{+yDrYcvm(R&#i)aPn~~hcPw^s7(vQQpqY#rQ z$&i*>v?%jZ%^+ zK_xMm7qu4QGnA+$Sjl(i;2BE547Fh!F+;vKLGe+&QEuZG@%%{p;}DOCM)r`7n%0O< za>wyWdOdmPMtoF=Phuf5FCCDEBqra2ulQ?t`(^2&{Kvvv_-pBwoa?*-pX3v1QFfBl zv}b5VpEXJ|=GtMSTFp~y#x^O-&*FEPKW7wfPC-?LTsPQFmNy8|ZLrop7%-~Ef!Uz} z?l)l0+0-c5M<%IJHrZO*t+h>N=gZa6Rb-*n!vKq6N1ZH|OdY=e8;;7X8Co#)B5|VA z)&!hkOv*H~Dyx4(XL&6&Hnu3=9{K1{b$&R<5_cL#b}Wn5V2+FU#QzG0|pDI}sMoF(gtQ}Qn#-xcoIO((e z(TY}u^QhCVPg=BW*t$7aEg2QVhAR|b-(1Rx7D`34Vmrw)(6P>JiyBiV#ZSi3a0G6Y zhWv$5e__NexHmZFFMQ@G#IOy&^Ps;l>~}uuFC1qnxPR7vterW3@|#;0y~(OZ!ThFV zOqeAXb)%@mM9vnW8pFvr>iosi-Iq9Dop<_ps2i`R?!KN7M11R+K0G)~e-5`qVPM@L zA$O&J9QbtUh1C%S!*f zQMdF$m@C}bt$vQ(HT|0gc-OVegA?~p{NdEy^I$%V-#@dY^gX%;aPQ2cYs*^SD+PZq z-LhMFfd$umj?S*>#}%dTeHT2@vU+-rp*;Ml`(kJKubq*L+1NUX#^`a0M%;-+gA_MK z8rn7v686Lxq&V(L??MQu9)x7$0n)2(l3vyGn&xSbS8~rJ!KGK*AiYT&UpjtFr#(xY zJxSrxEBX%*X(6Z}y~1-#kp}5aZsRMYck4cd^ro89n@dwY2%4HYY_+(eK35sLf6F*cX$U$@l)Ud5b`wqx-ja1X_~ zpqLAB8m4>E+KAp{hN#Uvd@djtU|l#U`CML33V9N{yBlL)5}P9ux~ zFiflbEKbirg)Hu8b1qu1TK+M zoTt+pcsjGoOQg&;slDK7@pM5;1;NuTO`dLr<;MSciA35PJdMkv^*k-cAGAD5)sUz6 zU>hG#i#9>eEO>frKf%)-jpb2-bds)`;OU(%AGhV{faQ_VSRQp>p_tOR42~bzTF3gu zj%^*WF{8xrxODyrTDW`{haAIbKpuvfsqV8->{Jeu8 znx_Y`?XLiSgm3qAzV!%J$00ZA6*J#!&=fU-?GyC6Mdo{;nfccD?XUOl`$3B~^Iuiu zzn!~DxpT`}c1bz-TORoJK{aw?KE6Tz5J?qlny}{`( zrYM|(m7gg6C2^tx)g}bh{%8uUZK76@Fa;%9NlvY)4b+<6=pn0=G4&wS5z{itz`}P7t+i9CmiMmak)J&ylP{MoSJAxz%i;sEZUw zadeXErODMYXpLEDYLMx*(MpZI*G_62y$*6^ds%XI_BzSc)$1Zxcdv(By}e$zT9Up& zf3JTq&>I*m>n$UFS(CxR^4{{nirxyy2RJ5P$=TvnoIPIMZ|58@*?Mc@u7OS3FpVq6 zzC>M7v%SlKro*Vyuz{@_W?VU^sa^ejv~!u-t+4jrY;cZ)(>xOoz*E;==ZXt+I2sFr11VO2HPm5X^15J+0uGG+Aq@NJW*8YqTvl&m2!o6yR z->o_t_o@g`NdvHVixWqGi^|$eG)yrCMM;!olPr=|V)`vt)cUMl)ZPh7qOYj;=%k+A zLs40W=+t@{An%lxQ?3#tSvgC;RdR6F5g0kgjMynkWHqV*kA@l7N!t~i6QnFF*$4az zZiW)8je@4eIvEP6%FOi&uDug9HEa(%Mz$T{6Jk6R4GDwMWHKbevWHIMs)xjM=y_c0 z=R-q6B6TbjJGrTaJP*=>Z;_e7%$AckaZooa5KyoNmucC?iNl$APuMA2p^5qol{S(7 z^bjA5$!=3WvW<_Xj>Tn5DkC%fL&>CU8%m_at+MSDj!bs&@nk$I#KTS=*N4Z2<#A1c zAi=>_upf^njvW_eyV|)mJtWF>LbfT}1L;Fry7O@7_GeC|`4d7W8jC-ZjP}Ko3XDV( z5h2c>Oh7R%f(6xEaCu*CJ=c0^)wq4aIuXBmcp@_8+IeR04R7%L_7A-)M(G=Y;6=v;$EB+A$_YAG zvv%U~tHH~MuL_rY-fsNY<~N(C1Kp#xIoAEE^PKbi>KV3X-bVRW&f6&)`%3qV-QPHL z3vNus5NJt%18{KWH-Mu#3uomR&NgYkqP{avP+>>U9Uqx$D^9Eh9_&qA;+ty$$Sgb# zSea;&$7x=+$nG9DrCp;N#o8jmD6**=i&!z(kG=HjxCtbRmm8m4*J;nZb{Z-NWo zfc?6Z2NdDsU*$JK!7}&>D*(VEx>wEx8|Q)*bN;e9Z}nVt&26{cW1pw&HaqDdhTBd{ zINP)EyLA$m;MSTQnu=CkfQquBazQJC$W5 zWmu)trFCJ~B zcF%K_+E}wJ=gm4LR&t(bhI7XyxrF}`t2MfT8Vxh9oKJGWX#Lp6I4ik;SgX+t)M=P; z)zYg`w+N|~rpw(eQZ&dBWlJ7W<-JE_tQK#kEHM&;5=KyF z>a_%|ah5Ma=hW7L3%*XcNY>4ja}|<1K_gGedc-DeF-$$J(#njhF z#1TD(~~Q`E1a+^R$?1tSN#5+8Kzm3+Nb&oEJCO6(?#fl z+KO@^_02Bl8n{NOT;0zVOWw~FT2I~Q;ETKon{2Kgjp+NAbw8F~ISq4<*NoZ7H2;CscQaFgqbPsfHk7hFQ6nEJw zTZj0h?8roilj$fYm&HK9?Tf}vfaF3-1Oe|B3Y}5J5pO>Vb{f&hZX)7o@=FATnh2;N zsH6yySeobKF;Ue(T6QGU&`x-$rJ##8==~PHA;10=_3Ja#9J_3m4dvO;c=Ze${>?4a zBhHpr(Racy;1*T(6j}-;&PXN=f~Gbon4L8hKgHvU0bZ_-M;%5)j0z|2R38_`%=Xq+ zoGC0rtJd}=F5zYRj%DZ0Fg>zyb#rETb69YFO+`XGLx^n*3(a_K3kzXaHndYI?Fb99 zH5pIw4?`beMz#VWf;I%35p<0Bq5{l8B)x1!`pocF*)E7taY&FIPdv4E@4hFVxJ-`( z)$X;vw}b`QAwAGBHraB=+z;HB|1#9@>k)hjY3z#7coSDon+J5E%ma|3d2A|}#{+OL zhChV;z`Zu{hgIx>g1c^0M!zjwE?WhWCj!3Aq+wIZ3?GkjJZ>A=$E8oDaAQUKhGDD7 zo}rA9<59$umoNNa`A2|&KZsPWT^Z~^j6`|4j678y zk2A?=Dk?%vxJvd6f>s_8#55m07AKOp%!-2%472SM zr+0MV(yD3a%JH_js_KjV7y4g2KI;C=O1VO}?NrnHH)^LF_nkR7&8)l;3SSkcLpv~2 z|8dRo98-Clp>1n{%;nGdxBuAtp|j_w?4F%J$`OSj#+hS8-URrm$v2It7aMepHcEkW(qS3 zdL4-s)C_?_@2&pQrW{iby=}_7>t`AKpHcEkW(qS3dYu7c1vNvU_}E`|zWJP#V;aUb zk4jVSI>5$h06BNvIGuMl%rf{tqZTMR9prHj!L#E-g zFl>svbLE}+<&^UX{Tt<}=AAwNEp)+#I~(rVMo*UY)LH-KA;8{zu%c(Z^_rjV3EQs) z7{n_MvQY6sovmk$sj)vGq#TUHUU!q(55> z{doY-;iAM@z@lc-u?KW*)*wV{8rmUrYZ_zHg>9Og0@gIJ+GcH_d4v95oJ2H7kTl@a zFyqSEMcs-v5HQL$Y|^1=&)VIt6~R?T|0llG3nAq z0hS7VT@4DbRG2Ariv(CIsz$3oj(3Blf)yLIn{yt{%lSC}WI%t`PD66#$|QC&sEvf! zF+h7on}60t>=-V|33iOirQ0#iRdMA+J8?^Hu8LqDt{SvfkK`p9iBIx@o*LH1H-I*- zqQuomKA6ceY-4C7rZy;#6eUA{UWCr8tuMD+)l>akEmtS`6N<(j5L>mbO+BGJT$H4M z){}Ns<$4du^gfueo742P&015Fp5`Tbniur6XJBuaOXZ-a-FsD5O69OBAqm=F0ebV_5W|Pfqc5$1h2R19x%D=F8}_*!!43cg zY8eqSM|Kg(>_mK6u__lTV?r#R0>$k(x)Fp0bg?I~^W6yc02uKo#%^^XP+h&b4uH&R zw+J&lo(nvdD%!^e#P%ZSM1U$7kNlW_6v0;zJb~Z<0&=^j5Ich4%LtAl*bg8KO4viX z61KSV@{pp0f$~BW2EG?NdKSTR*jqgxK^$2RUycAxq&#vC9#20WrC!-K66ezbkJc9+ z*#^HH!Se{B2=MvweF$O*kYf`ahUX9?%1Ruuegp#uP9R7kIEdgFg5wAh0K(P$AmS+m zX$0U;rg${y%5;R!VCJg`c43JXJpkk$r?Biaf?)(af)NBm2)>3uLLeZ>BKRW&sFn&i ztpcfDkc*&cH|6}ND7j{|>&)F>9v-!&Asj~2y0}Hu%){gg2a91DIT|8~yIyy?L z&YM4Jyx4r9`O<@vecwHP`S^73ff;t|yp{4*eFDCqBEm0S_|m0yPM?ktkLB3M6rpsU zrChbY6}kTRLlSS(B=I$r^UDR2IKJg)_pa#PK)=;c)*Z0EwQ08(GH*Xz(H*kBQ%-l+ z+ux~R5U<&{6)N5j*t(ZH-mhd4ucx8n{gADDt>gXGSW@84(xvg!@KU}KxJw%Ug1lKK z?=>^vgy|HeD{8*LFNMs3q`M?Yhf!0|0#6c>Kt2qN20(s5PnON>$>N;oyW%{PUdpe{RisB)8-taw+&# zgl~&@PLecG!u!Qlf;>udRoKQPfmIvQ_CTZ7-neQ{5+txnQ~zkaC`yv<`2b&*F_JY! zWRNu|LE@@jgjV+~>7E%M@yz&C3CV9rSf-wk{}v@lx1ASBNZzbx?0XEDflEj~Jtpah zEm6-Z^%%hvvt-#*#FQMa?cq@zB`$9wB=K7?`+p++Y$c3K5q(rEBiiG@3Y|I`PoWiW z0jY=ZSzYSkSK|!uVcsN6z8SM9knmvydl8h7ZAMf-eQVps))A&bBt=3?xJ(gHHsU=DDTOT9E-d>d0+dN)25%|* zl!cFT#4IZCE3qDrYDGvv(Zp%!FG6dalF&yNiX=q_ z$K6-A%rIMTm<*0L*0jFy_>T_%;P5{monF&5>X~Mnh~e>*5|&1bVreAd7ZZJN1b@6X z_v~}Gtrj8{l(0Z9Bo=JkG28lRzV*@B)+5(jk4#k_&9@#I7teDS6BiQW^lQmn>ya}( zxyqwA%IhvY_hI?^bBtbsH?{Mre`Oh z*PIny&DLx6bk}P8wG|BFjUgz1%Tm?V&b(E#mx9c@HMXus$Gi0`;;U&OdbiouwaM}B z1}~HpXidm8&%*E4hrk$KaTc?B!_3C%0cMjH?dSvZ475@bFjwa&I#I2_TrCD`=NQQX zfnzEiF|R<-WRe-QWv#@#VkKcB?9!R%IVWdFcE>ppW*N?f<~hkmSQr?dz_fC2vz%M9 z!#F+I#$aKlHUV`?ri;XAb6$i_H`sAr)wJS3)05;-84W9ax-MIh=`0QVc%2J8tTluC=hU#m1% zsZv5*t6(k&yA=T9d9rIa_)_Ab%QSn z|7X+!B{xNK3Vt2w71Ru;nH}?vV!sX=e9yM-3)BBlv9Hehrk&om%>Jf>LEPQxgYtKl z(U5tk&bDujD=56Y>#!ac(sNFo!-a818A31<|gGf(1z`GXHScR%8H^o#4yo zMR-$UvIE{0L?EgYZ_VTQ&4Ha_T7-9t1a|%vR9%3d9@trL1a<~1=K|%$ft?HxYY96i z{E>ydT|yPM7CELEgJVj}Jpwx+{MF!zpp4>uNRn(zRE23Hi1Qw*u%ompY}~IP?30e@ ziR4sX=eR8vnSRA<9IQ^e8L@J6mO z%W5m3%>Sek@VeOStH|uD|FCDT&0G!aL_*8v%UI;FBNxE|+i|#a$GY8@R zZ=SdAS~NY)QvK=oUxZE%9tCHc5+3U}Obn*BrFP6h#s;!uKo0c6+)JC1n(kgZ%?N|B zYIjLYma$rskz(qudU|ZiBvIA&jJT8&mbh`yM_bB5F@zq0U;WL?H~;d&f4utYuYPp( z7cYO~7k~eQ-=(zGMI;0zWJpZIM-#vlgy>)<83$R5e7qBh0w+Ov3-tNq-~s~IU*il7 zGsa%b>V^ssCWD1{z%aO_dSzrcd@Yh05{XtHre#Yar5QFL1TK_FX+qbC@_obxoCC3h z95ltdfonzyKm$Y#Kr0bj2OYK$9x9wlh{s)75@-h|5eT#ccT*3}3N0r5s|dg+ONE2? zHj~J1aC+fqC<4lWF>GeYfR`~FN6?R80Ko|a=(CYs#z5qtG86I`K>AImV)1a9%#x@u zcqodrhM!hQA8^zYag)XNFWs!mvE~ z%Joxa8w$g)mSh#FU6tF^HC_A2nMbFYno`kNC4|%6KkbOC#nY$r?v7ao|7X+!B{xNK3VxjddIdE@AenR5zQA z8%KBM;OgEu%i#ZvTA<{nFsGo`v5`uol;|Y7#OuwwTXIZy&fPNWhWIRq)auS{KL8N% z?FXjZ-30HRW$+)#2yz{9h}6PD#J3+XaJNdVlmPsY>7HXevrJ8%shMTg<(YLejL9%l zMLC}@@VPt!&-rZlfCVfv`&WZS<{?0@xhnU!Td%F8_iwUa3o(eVdW43G_g35Xw>sWi z%ObvshKl#vZTlZ~ytf@oVkM=_QMAH%i>)vas8sm*(C?X)X;?BihbG%h>YoCFmB=hr zdPmWL;4G3VT_9Du3#3X2+9@Gb`tC!j^m4>4!ueIHGH{<#Wf_sHg1=?Cs$7>UE5Ny8 zd^Ga~NtJi=`mvvCoPO-}Vm?3i9TS(Y>KS3t9zl5g*YOzLjG)j9G2%aX@K9(d6+H=* z(Y|Dy|27c)E?Inc2^J4bgP2ml)N@~kl^@~+7vl5$1lB%^;4295)L+o55)Ry(bMk+I zttSyIkz#`l8~y~ki2r?luR!gp?=QjXhb*GA8CXOI zpw}#wogLO|)pX|;`?VSd@wz=8sCc{C*4gHGdxHbOhHsWYH;)?PQ!;dYS2G380AmMjwpVppGcDkcM=FgTu;P) z$CHhZG8fr+#daTd2i$ik8Gnd>2S{K;QobWQSOf@fc}pt??+Vd zBg*x2sy$D&|B^cRsog;bFKwQu;4-n7T;6EEg}M0}%EG>~;>8u8S{$_fmX)#u<`IME z59PHl*3QxudAj0LOAXzKwUzUT&36>lHtDs1%^w8+M?vEXdcE2hu=$pvS`WQbtp#kp f5o?z)pmMrJZ3Wo;`l3GlbO)*Z)B+ffO5^_kZ& 200 MB + + resp = client.post( + "/api/v1/video/extract-frames", + json={ + "file_path": "video/big.mp4", + "source_id": 10, + "job_id": 99, + }, + ) + assert resp.status_code == 400 + assert resp.json()["code"] == "VIDEO_TOO_LARGE" + + +def test_video_to_text_returns_202(client, mock_storage): + mock_storage.get_object_size = AsyncMock(return_value=10 * 1024 * 1024) + + with patch("app.routers.video.BackgroundTasks.add_task"): + resp = client.post( + "/api/v1/video/to-text", + json={ + "file_path": "video/test.mp4", + "source_id": 10, + "job_id": 43, + "start_sec": 0, + "end_sec": 60, + }, + ) + assert resp.status_code == 202 + assert resp.json()["job_id"] == 43 + + +def test_video_to_text_too_large_returns_400(client, mock_storage): + mock_storage.get_object_size = AsyncMock(return_value=300 * 1024 * 1024) + + resp = client.post( + "/api/v1/video/to-text", + json={ + "file_path": "video/big.mp4", + "source_id": 10, + "job_id": 99, + "start_sec": 0, + "end_sec": 60, + }, + ) + assert resp.status_code == 400 + assert resp.json()["code"] == "VIDEO_TOO_LARGE" diff --git a/tests/test_video_service.py b/tests/test_video_service.py new file mode 100644 index 0000000..3e33483 --- /dev/null +++ b/tests/test_video_service.py @@ -0,0 +1,195 @@ +import io +import json +import os +import tempfile +import pytest +import numpy as np +import cv2 +from unittest.mock import AsyncMock, MagicMock, patch + +from app.models.video_models import ExtractFramesRequest, VideoToTextRequest + + +def _make_test_video(path: str, num_frames: int = 10, fps: float = 10.0, width=64, height=64): + """Write a small test video to `path` using cv2.VideoWriter.""" + fourcc = cv2.VideoWriter_fourcc(*"mp4v") + out = cv2.VideoWriter(path, fourcc, fps, (width, height)) + for i in range(num_frames): + frame = np.full((height, width, 3), (i * 20) % 256, dtype=np.uint8) + out.write(frame) + out.release() + + +# ── US3: Frame Extraction ────────────────────────────────────────────────────── + +@pytest.fixture +def frames_req(): + return ExtractFramesRequest( + file_path="video/test.mp4", + source_id=10, + job_id=42, + mode="interval", + frame_interval=3, + ) + + +@pytest.mark.asyncio +async def test_interval_mode_extracts_correct_frames(mock_storage, frames_req, tmp_path): + video_path = str(tmp_path / "test.mp4") + _make_test_video(video_path, num_frames=10, fps=10.0) + + with open(video_path, "rb") as f: + video_bytes = f.read() + + mock_storage.download_bytes = AsyncMock(return_value=video_bytes) + mock_storage.upload_bytes = AsyncMock(return_value=None) + + callback_payloads = [] + + async def fake_callback(url, payload): + callback_payloads.append(payload) + + with patch("app.services.video_service._post_callback", new=fake_callback): + from app.services.video_service import extract_frames_task + await extract_frames_task(frames_req, mock_storage, "http://backend/callback") + + assert len(callback_payloads) == 1 + cb = callback_payloads[0] + assert cb["status"] == "SUCCESS" + assert cb["job_id"] == 42 + # With 10 frames and interval=3, we expect frames at indices 0, 3, 6, 9 → 4 frames + assert len(cb["frames"]) == 4 + + +@pytest.mark.asyncio +async def test_keyframe_mode_extracts_scene_changes(mock_storage, tmp_path): + video_path = str(tmp_path / "kf.mp4") + # Create video with 2 distinct scenes separated by sudden color change + fourcc = cv2.VideoWriter_fourcc(*"mp4v") + out = cv2.VideoWriter(video_path, fourcc, 10.0, (64, 64)) + for _ in range(5): + out.write(np.zeros((64, 64, 3), dtype=np.uint8)) # black frames + for _ in range(5): + out.write(np.full((64, 64, 3), 200, dtype=np.uint8)) # bright frames + out.release() + + with open(video_path, "rb") as f: + video_bytes = f.read() + + mock_storage.download_bytes = AsyncMock(return_value=video_bytes) + mock_storage.upload_bytes = AsyncMock(return_value=None) + + callback_payloads = [] + + async def fake_callback(url, payload): + callback_payloads.append(payload) + + req = ExtractFramesRequest( + file_path="video/kf.mp4", + source_id=10, + job_id=43, + mode="keyframe", + ) + with patch("app.services.video_service._post_callback", new=fake_callback): + from app.services.video_service import extract_frames_task + await extract_frames_task(req, mock_storage, "http://backend/callback") + + cb = callback_payloads[0] + assert cb["status"] == "SUCCESS" + # Should capture at least the scene-change frame + assert len(cb["frames"]) >= 1 + + +@pytest.mark.asyncio +async def test_frame_upload_path_convention(mock_storage, frames_req, tmp_path): + video_path = str(tmp_path / "test.mp4") + _make_test_video(video_path, num_frames=3, fps=10.0) + with open(video_path, "rb") as f: + mock_storage.download_bytes = AsyncMock(return_value=f.read()) + mock_storage.upload_bytes = AsyncMock(return_value=None) + + callback_payloads = [] + async def fake_callback(url, payload): + callback_payloads.append(payload) + + req = ExtractFramesRequest( + file_path="video/test.mp4", source_id=10, job_id=99, mode="interval", frame_interval=1 + ) + with patch("app.services.video_service._post_callback", new=fake_callback): + from app.services.video_service import extract_frames_task + await extract_frames_task(req, mock_storage, "http://backend/callback") + + uploaded_paths = [call.args[1] for call in mock_storage.upload_bytes.call_args_list] + for i, path in enumerate(uploaded_paths): + assert path == f"frames/10/{i}.jpg" + + +@pytest.mark.asyncio +async def test_failed_extraction_sends_failed_callback(mock_storage, frames_req): + mock_storage.download_bytes = AsyncMock(side_effect=Exception("storage failure")) + + callback_payloads = [] + async def fake_callback(url, payload): + callback_payloads.append(payload) + + with patch("app.services.video_service._post_callback", new=fake_callback): + from app.services.video_service import extract_frames_task + await extract_frames_task(frames_req, mock_storage, "http://backend/callback") + + assert callback_payloads[0]["status"] == "FAILED" + assert callback_payloads[0]["error_message"] is not None + + +# ── US4: Video To Text ───────────────────────────────────────────────────────── + +@pytest.fixture +def totext_req(): + return VideoToTextRequest( + file_path="video/test.mp4", + source_id=10, + job_id=44, + start_sec=0.0, + end_sec=1.0, + ) + + +@pytest.mark.asyncio +async def test_video_to_text_samples_frames_and_calls_llm(mock_llm, mock_storage, totext_req, tmp_path): + video_path = str(tmp_path / "totext.mp4") + _make_test_video(video_path, num_frames=20, fps=10.0) + with open(video_path, "rb") as f: + mock_storage.download_bytes = AsyncMock(return_value=f.read()) + mock_llm.chat_vision = AsyncMock(return_value="视频描述内容") + mock_storage.upload_bytes = AsyncMock(return_value=None) + + callback_payloads = [] + async def fake_callback(url, payload): + callback_payloads.append(payload) + + with patch("app.services.video_service._post_callback", new=fake_callback): + from app.services.video_service import video_to_text_task + await video_to_text_task(totext_req, mock_llm, mock_storage, "http://backend/callback") + + assert callback_payloads[0]["status"] == "SUCCESS" + assert "output_path" in callback_payloads[0] + assert callback_payloads[0]["output_path"].startswith("video-text/10/") + mock_llm.chat_vision.assert_called_once() + + +@pytest.mark.asyncio +async def test_video_to_text_llm_failure_sends_failed_callback(mock_llm, mock_storage, totext_req, tmp_path): + video_path = str(tmp_path / "fail.mp4") + _make_test_video(video_path, num_frames=5, fps=10.0) + with open(video_path, "rb") as f: + mock_storage.download_bytes = AsyncMock(return_value=f.read()) + mock_llm.chat_vision = AsyncMock(side_effect=Exception("LLM unavailable")) + + callback_payloads = [] + async def fake_callback(url, payload): + callback_payloads.append(payload) + + with patch("app.services.video_service._post_callback", new=fake_callback): + from app.services.video_service import video_to_text_task + await video_to_text_task(totext_req, mock_llm, mock_storage, "http://backend/callback") + + assert callback_payloads[0]["status"] == "FAILED"