From d3ab696d1050f085c4e830bca03e9b360a4cedc5 Mon Sep 17 00:00:00 2001 From: Begild Date: Mon, 4 Nov 2024 08:42:00 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BD=BF=E7=94=A8asyncio=E6=9D=A5=E6=9E=84?= =?UTF-8?q?=E5=BB=BA=E5=BC=82=E6=AD=A5=E7=BF=BB=E8=AF=91=E4=BB=BB=E5=8A=A1?= =?UTF-8?q?=E6=8F=90=E9=AB=98=E7=BF=BB=E8=AF=91=E6=95=88=E7=8E=87=20?= =?UTF-8?q?=E7=9B=AE=E5=89=8D=E5=AD=98=E5=9C=A8=E9=80=80=E5=87=BA=E7=9A=84?= =?UTF-8?q?=E6=97=B6=E5=80=99=E6=8A=A5=E9=94=99async=20runtime=20error?= =?UTF-8?q?=E8=BF=98=E6=9C=AA=E8=A7=A3=E5=86=B3=20=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=A3=80=E6=B5=8B=E5=B9=B6=E5=AE=89=E8=A3=85openpyxl=E5=92=8Co?= =?UTF-8?q?penai=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- excel_translate.py | 235 ++++++++++++++++++++++++++++----------------- input.xlsx | Bin 8642 -> 8993 bytes 2 files changed, 145 insertions(+), 90 deletions(-) diff --git a/excel_translate.py b/excel_translate.py index 365ea71..2c58df9 100755 --- a/excel_translate.py +++ b/excel_translate.py @@ -1,14 +1,25 @@ +from dataclasses import dataclass import shutil -import openpyxl +from typing import List, Tuple import os import sys -from openai import OpenAI -import json +try: + import openpyxl +except ImportError: + print("openpyxl is not installed, installing...") + os.system("pip install openpyxl") + import openpyxl +try: + from openai import AsyncOpenAI +except ImportError: + print("openai is not installed, installing...") + os.system("pip install openai") + from openai import AsyncOpenAI +import asyncio # Set up OpenAI API key -API_KEY = "sk-ckFgxmnjJAJoVfcVF918CbFbEc5a459eA72cA51e4dB24dAf" +API_KEY = "sk-ckFgxmnjJAJoVfcVF918CbFbEc5a459eA72cA51e4dB24dAf" #来自V3API API_URL = "https://api.gpt.ge/v1" -client = OpenAI(api_key=API_KEY, base_url=API_URL) # completion = client.chat.completions.create( # model="gpt-4o-mini", # messages=[ @@ -37,15 +48,13 @@ class Model: # 翻译时请结合所有内容整体去进行理解含义而不仅仅是单个单元格的内容。 # 注意仅输出翻译后的内容即可,不要保留原文任何内容!!! # 请翻译:\n""" -PROMT = """ -# 将我所提供的如下内容翻译为英文, -# 注意仅输出翻译后的内容即可,不要保留原文任何内容!!! -# 请翻译:\n""" -def chinese2english(text, model=Model.gpt_4o_mini): +PROMT = "将如下内容翻译为英文,仅输出翻译后的内容,不输出任何原文:" +async def chinese2english(text, model=Model.gpt_4o_mini): # print("start translate") #Translate the text using OpenAI - response = client.chat.completions.create( - model = Model.gpt_4o_mini, + client = AsyncOpenAI(api_key=API_KEY, base_url=API_URL) + response = await client.chat.completions.create( + model = model, messages = [ { "role": "user", @@ -57,49 +66,94 @@ def chinese2english(text, model=Model.gpt_4o_mini): ) # print("translate done") translated_text = response.choices[0].message.content + client.close() return translated_text -def chinese2english_stream(text, model=Model.gpt_4o_mini): - with client.chat.completions.with_streaming_response.create( - model = Model.gpt_4o_mini, - messages = [ - { - "role": "user", - "content": [ - {"type": "text", "text": PROMT + text}, - ], - } - ] - ) as response: - result = response.json() - print(result) - return '' +# def chinese2english_stream(text, model=Model.gpt_4o_mini): +# client = OpenAI(api_key=API_KEY, base_url=API_URL) +# with client.chat.completions.with_streaming_response.create( +# model = Model.gpt_4o_mini, +# messages = [ +# { +# "role": "user", +# "content": [ +# {"type": "text", "text": PROMT + text}, +# ], +# } +# ] +# ) as response: +# result = response.json() +# print(result) +# return '' + +def idx2excel_pos(row_idx, col_idx): + if col_idx < 1: + return '' + ret = '' + while col_idx: + ret += chr(ord('A') + (col_idx - 1) % 26) + col_idx = (col_idx - 1) // 26 + return f"{ret[::-1]}{row_idx}" + def update_translate(output_sheet, map_list, translated_text): - # json_obj = json.loads(translated_text) - # print(json_obj) - json_obj = [translated_text] - for idx, (row_idx, col_idx) in enumerate(map_list): - output_sheet.cell(row=row_idx, column=col_idx).value = json_obj[idx] + pos = [f"{idx2excel_pos(row_idx, col_idx)}" for row_idx, col_idx in map_list] + pos = ','.join(pos) + print(f"========================= [{pos}] Update Translated Text :\n{translated_text}") + for row_idx, col_idx in map_list: + output_sheet.cell(row=row_idx, column=col_idx).value = translated_text -def get_json_content(content): - content = content.strip().split('\n') - #去除掉api返回的code代码提示信息 - if content[0].strip().startswith('```') and content[-1].strip().endswith('```'): - content = content[1:-1] - return '\n'.join(content) +# def get_json_content(content): +# content = content.strip().split('\n') +# #去除掉api返回的code代码提示信息 +# if content[0].strip().startswith('```') and content[-1].strip().endswith('```'): +# content = content[1:-1] +# return '\n'.join(content) def is_all_ascii(s): return all(ord(char) < 128 for char in s) -def main(): - if len(sys.argv) >= 3: - print("Usage: python excel_translate.py input_file_path output_file_path") - input_file_path = sys.argv[1] - output_file_path = sys.argv[2] - translate_excel_process() -def translate_excel_process(input_file_path="test.xlsx", output_file_path="output.xlsx"): +@dataclass +class TaskInfo: + original_text: str + translated_text: str + pos:List[Tuple[int, int]] + +class TaskManager: + def __init__(self, output_sheet): + self.task_list = [] + self.output_sheet = output_sheet + + async def add_task(self, pos_list, original_text): + task = TaskInfo(original_text, '', pos_list) + self.task_list.append(task) + if len(self.task_list) >= 20: + await self.do_task() + + async def do_task(self): + tasks = [] + for task in self.task_list: + tasks.append(asyncio.create_task(chinese2english(task.original_text))) + results = await asyncio.gather(*tasks) + for idx, task in enumerate(self.task_list): + task.translated_text = results[idx] + update_translate(self.output_sheet, task.pos, task.translated_text) + self.task_list = [] + async def finish(self): + if len(self.task_list) > 0: + await self.do_task() + +async def main() -> None: + if len(sys.argv) < 2: + print("Usage: python excel_translate.py input_file_path") + sys.exit(0) + input_file = sys.argv[1] + output = os.path.splitext(input_file)[0] + '_translated' + os.path.splitext(input_file)[1] + await translate_excel_process(input_file, output) + print(f"{input_file} Translation complete => {output}.") + +async def translate_excel_process(input_file_path="input.xlsx", output_file_path="output.xlsx"): # Check if the input file exists if not os.path.exists(input_file_path): print("Input file not found.") @@ -108,54 +162,55 @@ def translate_excel_process(input_file_path="test.xlsx", output_file_path="outpu # Open the input file input_workbook = openpyxl.load_workbook(input_file_path) - input_sheet = input_workbook.active - - # Create a new output workbook output_workbook = openpyxl.load_workbook(output_file_path) - output_sheet = output_workbook.active - - # Loop through each row in the input sheet + sheets = input_workbook.sheetnames + map_list = [] #合并的单元格的翻译位置记录 original_text = "" - col_idx= 1 - map_list = [] - last_cell_value = "" - last_cell_value_trans = "" - # debug_test_count = 0 - for col_idx, col in enumerate(input_sheet.iter_cols(min_col=1, values_only=True), start=1): - # Get the original text and language code - for row_idx, content in enumerate(col, start=1): - if not content: - continue - elif isinstance(content, int):#如果是纯数字也不用翻译 - continue - #如果全是英文,则不翻译 - elif is_all_ascii(content): - continue - try: - int(content) - except ValueError: - pass - else: - continue - #和最近一次的cell内容相同则不翻译直接使用结果即可 - if content != last_cell_value: - original_text += f'{content}\n' - map_list.append((row_idx, col_idx)) - #if len(original_text) > 100: - # print(f"Original text: {original_text}") - translated_text = chinese2english(original_text) - # translated_text = get_json_content(translated_text) - # print(f"Translated text: {translated_text}") - update_translate(output_sheet, map_list, translated_text) - map_list = [] - original_text = "" - - # Add the original text and translated text to the output sheet - # output_sheet.append([original_text, translated_text]) + try: + for sheet_name in sheets: + print(f"Processing sheet: {sheet_name}") + input_sheet = input_workbook[sheet_name] + # Create a new output sheet + output_sheet = output_workbook[sheet_name] + task_manager = TaskManager(output_sheet) + # Loop through each row in the input sheet + for col_idx, col in enumerate(input_sheet.iter_cols(min_col=1, values_only=True), start=1): + # Get the original text and language code + for row_idx, content in enumerate(col, start=1): + if not content or str(content).strip() == '': #没内容不用翻译 + # print(f"Skip empty cell: {idx2excel_pos(row_idx, col_idx)}") + continue + elif isinstance(content, int):#如果是纯数字也不用翻译 + continue + #如果全是英文,则不翻译 + elif is_all_ascii(content): + continue + #和最近一次的cell内容相同则不翻译直接使用结果即可 + if content != last_cell_value: + if last_cell_value == '': #第一个单元格先记录一下,等待后面有不一样的才翻译 + last_cell_value = content + original_text = content + map_list.append((row_idx, col_idx)) + continue + last_cell_value = content + # print(f"Original text: {original_text}") + await task_manager.add_task(map_list, original_text) + original_text = content + map_list = [] + map_list.append((row_idx, col_idx)) + await task_manager.finish() + except Exception as e: + print(f"Error: {e}") + pass + except KeyboardInterrupt: + print("KeyboardInterrupt") + output_workbook.save(output_file_path) + sys.exit() # Save the output workbook output_workbook.save(output_file_path) - print("Translation complete.") -if __name__ == '__main__': - main() \ No newline at end of file + pass +if __name__ == "__main__": + asyncio.run(main()) + exit(0) \ No newline at end of file diff --git a/input.xlsx b/input.xlsx index 5427c793c87ada7c5ef9f455b7fd5ffc5156453e..2113cf82ed646c2d5cddf29ede88cf5d0c9d93ea 100755 GIT binary patch delta 3391 zcmZ9Pc{J4h7srROjCJhGWZ!p_2w5WgAk#$lEhD>BgDD>eT+rN}x+C5<(^ zDMlKySC%3Bl72nQIi2(S{Bh4c=iYzrec#u)_w&}I%)Is;6QiYX6QgzA9cMCIe3dpJ=Z?{tbS9}Eukqay)#$7zEJbAZOyMp*G64GLP6gAVnR+Ig@NI86&E>=@lsXC-y0{(14 zOG~e6mPenhA}G+iDlaZTvoZlt)NKyu&xG6H3RzR`#M=N2CaMCs%_mnw1}oN#HJ?9b zJS6a?3y%cua<>$IYa?zJRGo^=zl)&QGN-;Q^p&@MqFL<7wF~Q+b|I<9o#*tHAX$C> zc&j~(=d^bv2cdXk>q#f+=0x~1z~02HXUg;Zxn9% zl#F2NJCboZ9!M{JMNN3T@1X9UaduGMr48ZG`}A>L!v!ag1%LL-O>^pOTsfgdq$~EH zWfkRk`Bn}34{S&bfdJCEMz-nL(uxfg_3*^OIh>(3<~H{lOkKV$goO!sLbOA%K+fRpX{(nBiTe z&B)VZzXc^_8sb=R^l5@sg-z-evE-*@aI&@o=)fJc8Ju5`q4m#iBl>0`g zCxG>?L`08@#dRUuDD$4MgHS9<0c-XoD9@<6p=4DiX4XX^@%3tO9y$}VG&Ca5xSpz` z-|gQu)VOTZeA-cxSO2Eo{>Kv7lyhbw=#CC+nHato>Q_$57-QKa`=4+T@ zIYZAHwBO*B1_psvSnxvZ5`gXXDL8u~4ZJJPK=qltSlDF#*H!;Zjd~nG6D9lw_Y%I4 zau%Y!D1dl{idCE&4n{P+Jhzo4V9-VlG#mf?vXMYA%e_o*PBAa^wFTPs(`pnq~v)OV4fIEjw^!U)N2ejg7^Y3pz>l*!R$MH9LYe9Il!ylqD9eyf{SFM$Aj|2Y9!mf}1J(0TTwV#`^2Gv3Fc)7X_v zGutUwP$(KkfnOmg`_Qv2(VOh7rXW5=g_IwywGsT0lSWK1V%DPmPFTGrb@2!Iq(JZOu>H9j0efNE_MoJL>GLTd$_|_~;RHM5oU;%$BCT z`(zOn4s0}Ox4wIAGZ}HzzgHLO4v0dPJo&2XMeFS#Gbp3T(u)qEJNC|0YRYmpJo|AR zWRfvm%djmuSyuMq(s<63<(6w!XFTR8sofs)lyjY$X>tS11IK_NWIlSErqhD#@td#Q z3yYiVe2cnD=Y2JfO$!2?fS{EEh!nxg+_jJ zr?**mS>sa?=NGM9${ybT%eqqsUHx`YLeE8GHM~CV^GuJG8Y#Ur)2eP}Bgdu+yV>zv z+uFi}bMma9`nj9Yf&7+S$FNIrO)U|=q=t&HrD=9`GYAmmu?jJA4c~$o0tIe4GkamUpm(^gFqa=YQP4o z-@@Du_PwQs4GDhaU>!OI=b{RJ5rwSt{}J3ulvGTfhd@T?8RzJ!V?4gRvlgXaOfQbQ zYa3WOQkkjSnr3&>IAtfVWIhbYO1B-#ygof}>fXb7moSW3cvp5>9kAeS>PxsO=Xm<0 z*0T7tIglf%A@61dn^&#gxbgka!M;GXQZNX?K@V~6!iL!BhEtVXFB?}$JD^xfE(92} z2k*SAM%Y?;Oj_gG;+E6hjPpBH-`b;pf~AsQf6Bf{FtuWL)fgHecD%Jjlecp03srQH z)juTrkZT#3*uYn8V@~3o+$!WMRDv$OnnAn7f$pVYkxD65r!ZzZPSLA!mk2Fmy|EZ% zYqwNN=~_w~&65^$+*rvfB{V}Ee*=TnwfW$TzgCYnX}V}6!FgA1v)>v)I`g_4yEd%? zOr)MU%~_@79vzzO3@i)sq7$ildE;T|D@tx=MSNoDSKP`dS=p8t!?ml{V*5tMMX8ts z{+L6((xN5HU6~NqsO4I{)_zI$SS{!D2-+(9Og$gRjP;O>&F4aSmhGOZ}ya%6#>e;|A5c4i>~MpW~I? z-Mwb@4Yjs+Ws8Y7NDH1A`hk~*3be`YGZD2~26Y+q9GPTXaEtS5@oB?nGoyb2Aq;7_ z_4iQ`6*ge*dD3BZ2oga17$sg;#=hFfI9u4YPo7*IM91VZ4N;2Hi)UjKhh+X5EDpn- zD;g%J2x2QAM9uFol9~=+v9%{*E;ocaURV>Qfg815EzJR6we5KQ_gd|6_ak`@OYoff zGs5?JDZ&{{W%$GxxHyNOpSL1xw5IOMxfu}Z_b)W~K?KQ&tA~c8)vi)Q zs}-cI?kFfs>wf{qqo)|MsQJ{Y9s6UZD1ZJYTrvWRQmUJ?sLpvkAU8PZ7}h1G-$wGd zmiY7fwR}<4_QfbKaMD19X%e|Wg(2SwcpqV9l@#>!?&U2mcqq@*wnE3bXI@NO-3#&E zLd+=Zd`{7!KN;im7rkcC!Z0tXGiIro1MgF})t7#0QrRbI7~ zp$ka4^*h3X-EktCnoBU4c2?fJ<~cH#cH^7-24NE}EXMbFUI$ytGQ8yJ?S`l#0Sz$* zDHD_P-%gi}+N`(rt<~^6x+Oooq7;w87HCS3K27yFihR6q3~&ARXw>x>%SCIymna_a z9;tZ0$QgD2I)e8VGk+(s@wu@rBliQbAAcw*Y~JRLd~VSc;nt3Hp>7#%A0cM^maW)b z5b>|BpbQSg>H31SUjOR&&!1or5}yy|VEk;`d==3^6V{>wZe;s8T#Qri zd5vncy|gqPj6b{KkKyR%_X69Ya#PNk=`9B)*755-Zb^?eIS4Da?JD*xRIO>HY^i=` zk-X&=H=hOQ-E;MFf|_D3AWloHKrcst6ye#(xSImxRDLJa3gd+(R3P*gkfvZLK@rfv}7m;Un06V#Tmxpa*h_l0mrh!k69V2tm)c29#}v!Le3O~ z86o>FWr-WcSdvqkIm|UIi+jyYs9WuqD1P3?OB~Yg?DdLQRpZH{MPEotIdb;k7M6>s zsCCbmS1ynUwqtDfn5v6X7Q~bVmJI1fI#hmgo3eZU61HdDdlh_yvE1gvOR3*uJ#ts1 z9w@6yLw{3G9GSUwrhhIN5p=hPSgzCkjSFdC9JKUlXbdAMef_YM#;j@~9+IGE^@$8u z!L`W1BE-2jKB*30h?>Y>dF+Eh@=JJWdZX5yh!)9eUF->Cb<23kXDWA40th-Da zfjY!+!)S}BlF)3}u+#A9h3$K2155OT;-IChw~)UDYV(@6g0Ts0=&p}Ot@X|3%*r|k zYw#P?&Bz~d{kZPW)J(IC{@jq);=^CyrNbjYRja@K8Iit>ToS;LW;DfjATM&8^~osr z$8qzBo+0LT=M;;|DYiS<*8~z$*AS!zpa0JE^_qHK<;2~gFPf1BmY-7!0+qc&MHf5C zes<6oM8{KZp`PALen)GjB}BgWv! zZKo#%Z9ipp?8fhxmn_yX47Sb14C9%fnak;Coq#}~lmCRb5=S0B>Z<@J1t9~gHC21IsA217VNGL>{NO#;lm-&=r&YwLwv>=bao zKSX2EsV)LdA9|X#PlL7wkL1dIX^#ZS6@s+KeaRJQ5dD~D0FXQ{`6H?aG7%M^@#hAL zAK%kcF0g?6oM4AkmaMDAt8D=7vnG~FEPN=JY{PysBe`_ZpGiRr!&0q!ea$k#f%7_T z(5}P=CLpff+9%HV64M2dtBufPo3e;4@4Vf*yqx7s+l%S=2*J=8o;<_E7Rte3YPFbk zOZ8;Kv%Tz0>=E^EkO!EnpfKq9dwDyjAd3NyI63j%euHTCX`yZA!tuJh!b}*qI`3@T zGBDgUrZvPR1u$80P8NGX)h7p(lYw=8_(36+*S+L*buhfsVtSfsayPsMn3QR zAFn--;Bh@^ZAs`d_+(J-a^#nK*_9xvH2&EdzTA6yiT|!sw#tUmzX$(-6?aW|YBjN)tUv1uh&vLJ>3B`2)!C$39iKN5WGdrdcx!Vm z+9Qv8?IP*@uY?LB`_Kr9|9XesDBg=JJ{&G>nh&FnNA9po~w!@bN1Z`>MIP zsL)?8E)D0&4Q3-4MCO>EbHsN(ue*WzW~F0Chk0BP`k6R@!fvb|9PnhzY!x_y288OVRfxYDPdSO9!8n=ps4+7 z5ox0O+X!et#`-j1b`TWEqoKuv<_m9aoPN*lQ;q#}$N9?M@^lVzcQXqP^78YzC4I&DpS0dfl-ZH;S zrRV->)P2g&5f?QFV<`D7rTNRlVlZ#M@pr?6G@ZP1#Z(xcRieUo`%cJgyIRS}z0qDg zPZ=M11)=t=!OLQ6Y!wzo>SdXk#frMM93W0Nd|$Z5d0sUJrAe7|VdEShSk$7UZX3{B zO2Bw7M_=ncI*|NydA_R|FySRgy@%!rV=1@eLV8xguHB)=3TOA@e#~oUWvetP+P&hL ztw7Gf5V!1OT%++SadtOKX$3DMApDOG2F=f-0~k1l1ajLH3w}OdZywt1rE!q=9w|n7 zpOgS!Gm$ucrY|wbc$Yy%6Sh#^nf!&CoyA*wL*(=?uauvva;VNjGtER#Zb;(BHZ30L zJoy5Wx;Z`38*Ua|tM)KFBWvaMfg1bOx!5$vlbrhY(7iTA`cG_EJP=v&ZkB!5uK1_n z4rUyW*Y8M;`45Rj;S&KbT`%2*XlYd$gxghBns4k*cOD#_{q41#=M8B7J00^POsPHs zSOoIipCJMS;yxwqABzFtW8ek)5R%lZOu!s7hqSRi6*cmdM<|HyGz|gzpKsHGK#cz= z1cCB605w7WbN`R=pPWW+04FGV?)S5vGVXlp{;Yr0)6*0W@Ry((wf#9@Ur-P#&HuOf EZ}D<}0RR91