From 1f2de5d8ace079c5d06894a67afbd5b82d6a6b9e Mon Sep 17 00:00:00 2001 From: Begild Date: Sun, 3 Nov 2024 18:52:54 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E6=AC=A1=E6=8F=90=E4=BA=A4=EF=BC=8C?= =?UTF-8?q?=E5=81=9A=E5=A5=BD=E5=9F=BA=E6=9C=AC=E7=9A=84translate=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=EF=BC=8C=E4=BD=86=E6=98=AF=E9=80=9F=E5=BA=A6=E8=BE=83?= =?UTF-8?q?=E6=85=A2=E3=80=82=20=E4=B8=8B=E4=B8=80=E6=AD=A5=E8=AE=A1?= =?UTF-8?q?=E5=88=92=E6=94=B9=E6=88=90async=E7=9A=84=E5=8F=91=E8=B5=B7?= =?UTF-8?q?=E5=A4=9A=E4=B8=AA=E8=AF=B7=E6=B1=82=E7=9C=8B=E4=B8=8B=E8=83=BD?= =?UTF-8?q?=E4=B8=8D=E8=83=BD=E6=8F=90=E5=8D=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 0 LICENSE | 0 README.md | 0 excel_translate.py | 161 +++++++++++++++++++++++++++++++++++++++++++++ input.xlsx | Bin 0 -> 8642 bytes 5 files changed, 161 insertions(+) mode change 100644 => 100755 .gitignore mode change 100644 => 100755 LICENSE mode change 100644 => 100755 README.md create mode 100755 excel_translate.py create mode 100755 input.xlsx diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/excel_translate.py b/excel_translate.py new file mode 100755 index 0000000..365ea71 --- /dev/null +++ b/excel_translate.py @@ -0,0 +1,161 @@ +import shutil +import openpyxl +import os +import sys +from openai import OpenAI +import json + +# Set up OpenAI API key +API_KEY = "sk-ckFgxmnjJAJoVfcVF918CbFbEc5a459eA72cA51e4dB24dAf" +API_URL = "https://api.gpt.ge/v1" +client = OpenAI(api_key=API_KEY, base_url=API_URL) +# completion = client.chat.completions.create( +# model="gpt-4o-mini", +# messages=[ +# {"role": "system", "content": "You are a helpful assistant."}, +# { +# "role": "user", +# "content": "Write a haiku about recursion in programming." +# } +# ] +# ) + +# print(completion.choices[0].message) +# exit(0) + +class Model: + gpt_4o_mini = "gpt-4o-mini" + text_davinci_002 = "text-davinci-002" + gpt_4o = "gpt-4o" + doubao_lite_128k = 'doubao-lite-128k' + +# PROMT = """ +# 将我所提供的如下内容翻译为英文, +# 内容主要是自行车,电助力自行车,仪表,嵌入式等相关的内容请翻译注意该领域内容的准确性。 +# 原始内容是我从excel中按照单元格提取出来的,每个单元格的内容前后会用{}包裹, +# 输出结果只能是一个json的list的代码块,json的每个元素是一个原来{}包裹的内容翻译后的内容,请转义\n以保留换行的格式。 +# 翻译时请结合所有内容整体去进行理解含义而不仅仅是单个单元格的内容。 +# 注意仅输出翻译后的内容即可,不要保留原文任何内容!!! +# 请翻译:\n""" +PROMT = """ +# 将我所提供的如下内容翻译为英文, +# 注意仅输出翻译后的内容即可,不要保留原文任何内容!!! +# 请翻译:\n""" +def chinese2english(text, model=Model.gpt_4o_mini): + # print("start translate") + #Translate the text using OpenAI + response = client.chat.completions.create( + model = Model.gpt_4o_mini, + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": PROMT + text}, + ], + } + ] + ) + # print("translate done") + translated_text = response.choices[0].message.content + return translated_text + +def chinese2english_stream(text, model=Model.gpt_4o_mini): + with client.chat.completions.with_streaming_response.create( + model = Model.gpt_4o_mini, + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": PROMT + text}, + ], + } + ] + ) as response: + result = response.json() + print(result) + return '' + +def update_translate(output_sheet, map_list, translated_text): + # json_obj = json.loads(translated_text) + # print(json_obj) + json_obj = [translated_text] + for idx, (row_idx, col_idx) in enumerate(map_list): + output_sheet.cell(row=row_idx, column=col_idx).value = json_obj[idx] + +def get_json_content(content): + content = content.strip().split('\n') + #去除掉api返回的code代码提示信息 + if content[0].strip().startswith('```') and content[-1].strip().endswith('```'): + content = content[1:-1] + return '\n'.join(content) + +def is_all_ascii(s): + return all(ord(char) < 128 for char in s) + +def main(): + if len(sys.argv) >= 3: + print("Usage: python excel_translate.py input_file_path output_file_path") + input_file_path = sys.argv[1] + output_file_path = sys.argv[2] + translate_excel_process() + +def translate_excel_process(input_file_path="test.xlsx", output_file_path="output.xlsx"): + # Check if the input file exists + if not os.path.exists(input_file_path): + print("Input file not found.") + sys.exit() + shutil.copy(input_file_path, output_file_path) + + # Open the input file + input_workbook = openpyxl.load_workbook(input_file_path) + input_sheet = input_workbook.active + + # Create a new output workbook + output_workbook = openpyxl.load_workbook(output_file_path) + output_sheet = output_workbook.active + + # Loop through each row in the input sheet + original_text = "" + col_idx= 1 + map_list = [] + + last_cell_value = "" + last_cell_value_trans = "" + # debug_test_count = 0 + for col_idx, col in enumerate(input_sheet.iter_cols(min_col=1, values_only=True), start=1): + # Get the original text and language code + for row_idx, content in enumerate(col, start=1): + if not content: + continue + elif isinstance(content, int):#如果是纯数字也不用翻译 + continue + #如果全是英文,则不翻译 + elif is_all_ascii(content): + continue + try: + int(content) + except ValueError: + pass + else: + continue + #和最近一次的cell内容相同则不翻译直接使用结果即可 + if content != last_cell_value: + original_text += f'{content}\n' + map_list.append((row_idx, col_idx)) + #if len(original_text) > 100: + # print(f"Original text: {original_text}") + translated_text = chinese2english(original_text) + # translated_text = get_json_content(translated_text) + # print(f"Translated text: {translated_text}") + update_translate(output_sheet, map_list, translated_text) + map_list = [] + original_text = "" + + # Add the original text and translated text to the output sheet + # output_sheet.append([original_text, translated_text]) + + # Save the output workbook + output_workbook.save(output_file_path) + print("Translation complete.") +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/input.xlsx b/input.xlsx new file mode 100755 index 0000000000000000000000000000000000000000..5427c793c87ada7c5ef9f455b7fd5ffc5156453e GIT binary patch literal 8642 zcmeHsg%fCLDEgb*x)LvXj?5Znpw?))a% z-EViZ`~3y)_B?ZM&(mG?bl-bURm~|?MFd1V01$uz001ZfW_y{|#&7^Y0wMr_3qXN4 zkaBW#w{&zj(e!q2EfDa|L^!8UV)v}Z4)7JlB>Snel{MdI47}*N!+tHfHartHgz89B?*Nh5^H<{(V&&g=`L;1 zLT4>TfquR~_e{&8vi3aaQSYZwoRwkRFX6m)B zSE-7PZVWZt<4?`u=Q+X?8nEUD7Jzk){A5ZzBxRqXPYU!q6YQ9q{0#kkniyWjoTy%X z;lsB!VKvAS52g$mnFOfX#Z(X!b2$$Bw~OKDKIuDXrI@>>UBZ3PzCdd6B`ZF`LYaul zfo1VSSC+Jj06$Ymz{l>Mnn%qlyXFu49P@^eR}mjy+~b{rHoqJ@Ie1 z3i<8~eK!c*JAAbn$aMz<0PgP*0IGkZWu+De^$84XPhhM=htbl+)zaRLo$bf?A07XP zIrx`XFG^BUY2&~M-fYu%tYt1k%Q+~VFYP`Q3xBh zGG6xVo=&cIW=>9aKk`EB`c$#G?nl{CC+|?Zh9q{;YJ5uQ6TR~HUGhw{ zVo71_Pg(V^SOR8ZHU z_;$n9R!3XaReY9OLxm5U!V%Dk_q)Zs8JvcQZ8&^F!Q0k_k{rdc`jfapSXJ-im+2o- ztSX^>>k_yiQpdBtqcOxhFLJ+;*FGd;77YU3Sn|M+6jUaOABjeNn|DJWUUa8)4X92j zk?lJoWHtQYy83-=Ql5!L3-7D?52s zaz+~oTcItJ6sAlM3R&n0#*4r*X1blb4?Ra=(xg?8(Tt*cdv&@9wJt%|Jq4+EWL=1^jKJqNE|C#XKajHXHB!j1ODkTn zeHC+Y=w2?$me=Z){6t*D;(PFBa3}Zt!^ZOc@XqsPcn_r=<%1M~$Y!O6(}sjk`Ypf) zoRDdrQEhfxY9}qB{l~W4s%prdXLe#Wy3d-^D(Z}M5le-h`JaRjd-ZL=V=Z7h5g~nM z7~ciwkAH(ywmTW3(+;I(7DF(SlxNAVxM-h)-b@hnPiguNjA3`0##hYZ`Z;`#EoPNd z0*HW@j4BES+nx8^Tl-WlL)OHWb)RX@7${9q z-}skYn#p0J*plKLAc^o5g{~F1n84uvPeKHXCZ`_5u6%)!f&_p92P4F9iQ})d_#=tH z!D>1ft^T)I`%RG@ z!71E(zrV}loEK{u0r#|pIX@bV=!0v&FNEaOf839NVzygM79xv;3*On__`kQA;-UD_mxmOt99~3H*74VAq zg}YNkbjy+lGA6lw&q=S-+Fx>)yzXUenmJ+S4_@0Nd$*LPi9bk6#Xh3lIqa zAcEcTdkl56wzPD2WB)mE{s^WSkQv7n3Y_qLoh>1XJ#%|{rE2Yr0$Y5CeQn=*I98T4SnuKBHPLbrq70kjQQ^fx1I`7ZMJ0ui z=oD{J%h!rh*e`mG+Mo{1NWuo3)*MR!mu#=*+iT^sJGs$il8iM+krQgu=kbvrG`V(g zlfkS7sVSu|+3~+MFw+?(WVY3{Tlpo+AEMK;hLz7(QA%+M?Oc}K_<9W1JW6ESZbH*| zk`@{li|i$n@^(hu#n&vwYVraC-9#l2ODzOWjh(RG2gX6!%^+{ztjr2!4w6LCKPaTa zntm-xFrJfbFRFNVNo~3<7AwEl_cmJ`Q9Ldr$$IcZ<#x>hl9T}0UmqvR5V1CxUbP@E zM#%lbZlswK4GL-PI#hE=#JHVH1nP_CAGA3(1sgtxv!vJaj!BUz)a<__t{0Eb@z9nj zfud1Pf>{qu-sE&#u$>xAy(r0BslyoUSPUHx#D1sAshEEMbFY66WsC!IN~|vOEbqnF|i;)!MyO*$64OcW0Mc}@ zv)G>xz|-G?*2;l{IAnr)4W|v-{IJ;pL4MbSGvfdr%Y^FT-On?v@;PB~OUdI@39D>r zM;Z~RAiJJ1?#Mfb*X!DatMX&fN;d-I^a^|rY_rblvNG1v^!btleW^S{iL;cV+FGwh z6RYTT+p1<7-krZrbGU)Q`JX^n5({E^0V@=ONPhg${Rwn;YfA@9_MhWVjPK|TI?eFm zHDXWRQ*7+%SJ6&kZQ)hf)7wF+tY@)AM)({`-$_fohmbrDHu5Cq$R_n5hR$HM1;NkQ zq~7wgR_(=4Qqvk(JmJ4hgr6HCLZR96jVE95y}FQ^a1vI|cMIoM)}3~-n-HF8*?gfS zFSuWGF59Ni_q;}Y6Ho2c^2@r`*@n{lORI_DtVoxThftMEvaOm@oR|jN6qRh^Lp%Fm z4KF@(y%5CNXUAJSQtONS@5aBZhm9j9jy+dT4w?}vCr*EAMPu~k+HM^It@_hqnW`y<=oi53}#R4orb~}1|KQL z><5%D6(C)S5qmI5$GuOXwh?-)E^_T7uWUK+%KK{L`eC9jZ@z86|)wfl5MMwurzh zoKCd^lw*+rT95o{&QOn}KpgSx^B!gc@FrEv#OPCGwn!|GT@QNNK`~uV*AgU_MY^j2 z)z}F0$a(Vt2M8iRT)zXnTf2ZjsH-x7c!&*m6oU@G3;trxBkXc#y_9ZJy2xsJ1ZCe<9oF z?zk^(uvO!Wk~wwtQq`8m3u+hy1;6QI=mhrH;eCMbcZm+8ep{*$%bM#e7YUj$=jtJ{ z;$9M_QN{+@$;nvQ{G|N;GO1p5Tb-4tfLt8b$Z})FaNy8Yj zdWFAt0k(H~R}F3QJdq$<_<-$*=Sa!vj*2Bh5%jXDsnetu(-iMg-pETbw-G71sAXQ6 z_GqnrG0Gdurk5%WAXOmg{}$0*iI<2zEv{k2XuXx;ISU0fdru;=3hrT?T{)+-WP8*Y zZczzFAe2XS9}(UeC0i!C49f}+Ae2bc>x8~Q$q-1Ud8mUGsDa6&MUqo9zD0pT&RWaY z-PZHPsm22dZmBFtB|J%pjo4J;eBv#5f`ooi1;!#TvY>6nO{?{0b z$B&5re^f;B0){^oT%84Pu=ZpA<|0?SpNVV3nLQd+A;HiVz4BvEQxvPG`3}^#r|pVEB`XKO zsb9Z1aZp;1^$uXzCLLB`DE71IE7|JQPz+Il_9ADVy@bjv8(9TF>vSt!Lv%kgYb_PV zKfF3e;R`@q9*%%4(Bd0@05|ekpb+?9PPwp=9z+WhbdZ2bGAv~T_i))auGB;#nT(l< zG;hN9we%&b)i!|=xqK+L#!WhbXt*(v5dDNeYvNWAo)P|A1I_Di^r;s9y&ev&NlY4V zH4CC^u2$6x_wQDhad0x}P5mbwD|}5q%xz#X2Q*C-ROCW+HY3!^#dc&R3Pt?31g+3^ zJV_JDvGh}*1ds8>PH1km>qbttPfHeds(=CKHl?w_iR(vd?eMLIUc2|`|CY3X4}-E! zVd;z>834fiEor&Ad)r&O{V2)aYxOyfisQ{uA7bZD)dkRC$y?OfzSb{51gnR(dzi&T z)YeVo=szKtb%nE=b~Ku`O=KSt&z5GkYk|T%a0h481$fLZ*mNT%Rw6ph4N^hk;;9}U5DFfFHj&m-9C|c$@FPC+!a8iK5E{C`ds%InM8P) zu5d)XQdl1n_nj(kEoO{*#ES8vc*@qHNhal>PP0=ykydS>TS30H*y5US4!*UAVO^)H zXP)pJx((kGCl^-x)f{8jwqu^1GQn|bxso(yLDsI+7Ye6(7pSXAhF9)2ZI|saYP%?Q=LNBc! z)uD_+yF~n!ca9AgoId8$k}G54CUe&gj*Ck9Gt;0}1){djS+$DXwb3^Hs{#aA%5~d; z@uClQ74LU`Sev?9T4=bt+BjPMMw3JdMMO@FuzmDva>+eMSrBN`JrSuT#%n0FYkJc@ zhFji&Yx%~Pd5hecGF8@#eSzBJUO?plRA`Y$Ht6|K@_n;Zo_YMBil@~CN=_C~jI)Dm zf}zH9CJtZC+Bk^}Vq428BN<(u`Y=__f!j5+fln`9MjR?Ps8BgKpLfe%ez$S$K-bh_sfAx9SLYl3S zB9_y~_cSKGJ_AuKZ z9=0;3um}Qz$pIEl=Blnv&Tj1HPOg@}cXs|$0bon#mt>^UMu8W;uXKZrdCm%T?6aR{ zJ2cl}<1Cd~dDuGjtq2L;x;l{Ep<17Ng|?m@8=L-ppOg`j$y57J7U9;?s(>VBwO)sC z<7llK@!;oA(6kv*i3LL*_uX7EeHUEnZHB0k#4@)4vuGSzpND%4Y$BTXd8%@gUWT76 z$)vv)XyP#_B3>wGT|}jHGYmBk5ePSUnvYB+NQp$Aa5bv=P}~^Kfe;elU@uLm>X+@bH zVWBuTQkcH576(rJu7I4}xgBWEMG9Ks^ZMrk1}jBtc!;un2G`fvCXG&%vj zhT#tf0rveX{!E>n|AQY4cfTJQNiB{uKa_l=E3ELnxC}>zaB*$5_sl+%P=LITj-?<0 zlVV6aNl%t#>OcpjD{0U;PQm5`m*25WKmdJtS8bNW!G21qm4X{ z8qm|NtySDUFq00`_Cq{-MZ;(|3YX&> z8<5W&*Y)F1k;r30HeO}#=eA^iMI!%b_9-PyfVfX#3Nb{Cu$?#G(FWbpEGef$51qvS zYtp5%osi{JvBJ<2UXj)?L&S^mN=$k3nFNmBz`y{Jo7=DY-xA z9$Rr+uL;wOj}Bx69c9`e1;OTknd^|6yvH&#>0j->^QnrS))Rc0S->DSR349<+nL|uDI7d2EVKN3xBQR&`(yr>-npvc-yQtDulI-Hk2w`alD~BMel`5H z*YT%mJxtX2rStKt@!xgfKTQFEe$=1F{~wk3S3kdsg@5sc@&6v;AJXBkUVc?#{`9hm z^{=|juMU2#ivM(wh5ysRpEdHYroR>pf0{PJ)KAzly8T){{OaNFN%c>A0AQ9F0Qg6W m{nh;MG4ZeFh7bN?{(FQ}RYZdAE&zZA`vk$VI1BlYU;hW4mdP~$ literal 0 HcmV?d00001