初次提交,做好基本的translate功能,但是速度较慢。
下一步计划改成async的发起多个请求看下能不能提升
This commit is contained in:
parent
0636efca75
commit
1f2de5d8ac
0
.gitignore
vendored
Normal file → Executable file
0
.gitignore
vendored
Normal file → Executable file
161
excel_translate.py
Executable file
161
excel_translate.py
Executable file
|
@ -0,0 +1,161 @@
|
|||
import shutil
|
||||
import openpyxl
|
||||
import os
|
||||
import sys
|
||||
from openai import OpenAI
|
||||
import json
|
||||
|
||||
# Set up OpenAI API key
|
||||
API_KEY = "sk-ckFgxmnjJAJoVfcVF918CbFbEc5a459eA72cA51e4dB24dAf"
|
||||
API_URL = "https://api.gpt.ge/v1"
|
||||
client = OpenAI(api_key=API_KEY, base_url=API_URL)
|
||||
# completion = client.chat.completions.create(
|
||||
# model="gpt-4o-mini",
|
||||
# messages=[
|
||||
# {"role": "system", "content": "You are a helpful assistant."},
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": "Write a haiku about recursion in programming."
|
||||
# }
|
||||
# ]
|
||||
# )
|
||||
|
||||
# print(completion.choices[0].message)
|
||||
# exit(0)
|
||||
|
||||
class Model:
|
||||
gpt_4o_mini = "gpt-4o-mini"
|
||||
text_davinci_002 = "text-davinci-002"
|
||||
gpt_4o = "gpt-4o"
|
||||
doubao_lite_128k = 'doubao-lite-128k'
|
||||
|
||||
# PROMT = """
|
||||
# 将我所提供的如下内容翻译为英文,
|
||||
# 内容主要是自行车,电助力自行车,仪表,嵌入式等相关的内容请翻译注意该领域内容的准确性。
|
||||
# 原始内容是我从excel中按照单元格提取出来的,每个单元格的内容前后会用{}包裹,
|
||||
# 输出结果只能是一个json的list的代码块,json的每个元素是一个原来{}包裹的内容翻译后的内容,请转义\n以保留换行的格式。
|
||||
# 翻译时请结合所有内容整体去进行理解含义而不仅仅是单个单元格的内容。
|
||||
# 注意仅输出翻译后的内容即可,不要保留原文任何内容!!!
|
||||
# 请翻译:\n"""
|
||||
PROMT = """
|
||||
# 将我所提供的如下内容翻译为英文,
|
||||
# 注意仅输出翻译后的内容即可,不要保留原文任何内容!!!
|
||||
# 请翻译:\n"""
|
||||
def chinese2english(text, model=Model.gpt_4o_mini):
|
||||
# print("start translate")
|
||||
#Translate the text using OpenAI
|
||||
response = client.chat.completions.create(
|
||||
model = Model.gpt_4o_mini,
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": PROMT + text},
|
||||
],
|
||||
}
|
||||
]
|
||||
)
|
||||
# print("translate done")
|
||||
translated_text = response.choices[0].message.content
|
||||
return translated_text
|
||||
|
||||
def chinese2english_stream(text, model=Model.gpt_4o_mini):
|
||||
with client.chat.completions.with_streaming_response.create(
|
||||
model = Model.gpt_4o_mini,
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": PROMT + text},
|
||||
],
|
||||
}
|
||||
]
|
||||
) as response:
|
||||
result = response.json()
|
||||
print(result)
|
||||
return ''
|
||||
|
||||
def update_translate(output_sheet, map_list, translated_text):
|
||||
# json_obj = json.loads(translated_text)
|
||||
# print(json_obj)
|
||||
json_obj = [translated_text]
|
||||
for idx, (row_idx, col_idx) in enumerate(map_list):
|
||||
output_sheet.cell(row=row_idx, column=col_idx).value = json_obj[idx]
|
||||
|
||||
def get_json_content(content):
|
||||
content = content.strip().split('\n')
|
||||
#去除掉api返回的code代码提示信息
|
||||
if content[0].strip().startswith('```') and content[-1].strip().endswith('```'):
|
||||
content = content[1:-1]
|
||||
return '\n'.join(content)
|
||||
|
||||
def is_all_ascii(s):
|
||||
return all(ord(char) < 128 for char in s)
|
||||
|
||||
def main():
|
||||
if len(sys.argv) >= 3:
|
||||
print("Usage: python excel_translate.py input_file_path output_file_path")
|
||||
input_file_path = sys.argv[1]
|
||||
output_file_path = sys.argv[2]
|
||||
translate_excel_process()
|
||||
|
||||
def translate_excel_process(input_file_path="test.xlsx", output_file_path="output.xlsx"):
|
||||
# Check if the input file exists
|
||||
if not os.path.exists(input_file_path):
|
||||
print("Input file not found.")
|
||||
sys.exit()
|
||||
shutil.copy(input_file_path, output_file_path)
|
||||
|
||||
# Open the input file
|
||||
input_workbook = openpyxl.load_workbook(input_file_path)
|
||||
input_sheet = input_workbook.active
|
||||
|
||||
# Create a new output workbook
|
||||
output_workbook = openpyxl.load_workbook(output_file_path)
|
||||
output_sheet = output_workbook.active
|
||||
|
||||
# Loop through each row in the input sheet
|
||||
original_text = ""
|
||||
col_idx= 1
|
||||
map_list = []
|
||||
|
||||
last_cell_value = ""
|
||||
last_cell_value_trans = ""
|
||||
# debug_test_count = 0
|
||||
for col_idx, col in enumerate(input_sheet.iter_cols(min_col=1, values_only=True), start=1):
|
||||
# Get the original text and language code
|
||||
for row_idx, content in enumerate(col, start=1):
|
||||
if not content:
|
||||
continue
|
||||
elif isinstance(content, int):#如果是纯数字也不用翻译
|
||||
continue
|
||||
#如果全是英文,则不翻译
|
||||
elif is_all_ascii(content):
|
||||
continue
|
||||
try:
|
||||
int(content)
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
continue
|
||||
#和最近一次的cell内容相同则不翻译直接使用结果即可
|
||||
if content != last_cell_value:
|
||||
original_text += f'{content}\n'
|
||||
map_list.append((row_idx, col_idx))
|
||||
#if len(original_text) > 100:
|
||||
# print(f"Original text: {original_text}")
|
||||
translated_text = chinese2english(original_text)
|
||||
# translated_text = get_json_content(translated_text)
|
||||
# print(f"Translated text: {translated_text}")
|
||||
update_translate(output_sheet, map_list, translated_text)
|
||||
map_list = []
|
||||
original_text = ""
|
||||
|
||||
# Add the original text and translated text to the output sheet
|
||||
# output_sheet.append([original_text, translated_text])
|
||||
|
||||
# Save the output workbook
|
||||
output_workbook.save(output_file_path)
|
||||
print("Translation complete.")
|
||||
if __name__ == '__main__':
|
||||
main()
|
BIN
input.xlsx
Executable file
BIN
input.xlsx
Executable file
Binary file not shown.
Loading…
Reference in New Issue
Block a user