excel_translate/excel_translate.py

import shutil
import openpyxl
import os
import sys
from openai import OpenAI
import json

# Set up OpenAI API key
API_KEY = "sk-ckFgxmnjJAJoVfcVF918CbFbEc5a459eA72cA51e4dB24dAf"
API_URL = "https://api.gpt.ge/v1"
client = OpenAI(api_key=API_KEY, base_url=API_URL)
# completion = client.chat.completions.create(
#     model="gpt-4o-mini",
#     messages=[
#         {"role": "system", "content": "You are a helpful assistant."},
#         {
#             "role": "user",
#             "content": "Write a haiku about recursion in programming."
#         }
#     ]
# )

# print(completion.choices[0].message)
# exit(0)

class Model:
    gpt_4o_mini = "gpt-4o-mini"
    text_davinci_002 = "text-davinci-002"
    gpt_4o = "gpt-4o"
    doubao_lite_128k = 'doubao-lite-128k'

# PROMT = """
# 将我所提供的如下内容翻译为英文，
# 内容主要是自行车，电助力自行车，仪表，嵌入式等相关的内容请翻译注意该领域内容的准确性。
# 原始内容是我从excel中按照单元格提取出来的，每个单元格的内容前后会用{}包裹,
# 输出结果只能是一个json的list的代码块，json的每个元素是一个原来{}包裹的内容翻译后的内容，请转义\n以保留换行的格式。
# 翻译时请结合所有内容整体去进行理解含义而不仅仅是单个单元格的内容。
# 注意仅输出翻译后的内容即可，不要保留原文任何内容！！！
# 请翻译：\n"""
PROMT = """
# 将我所提供的如下内容翻译为英文，
# 注意仅输出翻译后的内容即可，不要保留原文任何内容！！！
# 请翻译：\n"""
def chinese2english(text, model=Model.gpt_4o_mini):
    # print("start translate")
    #Translate the text using OpenAI
    response = client.chat.completions.create(
        model = Model.gpt_4o_mini,
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": PROMT + text},
                ],
            }
        ]
    )
    # print("translate done")
    translated_text = response.choices[0].message.content
    return translated_text

def chinese2english_stream(text, model=Model.gpt_4o_mini):
    with client.chat.completions.with_streaming_response.create(
        model = Model.gpt_4o_mini,
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": PROMT + text},
                ],
            }
        ]
    ) as response:
            result = response.json()
            print(result)
    return ''

def update_translate(output_sheet, map_list, translated_text):
    # json_obj = json.loads(translated_text)
    # print(json_obj)
    json_obj = [translated_text]
    for idx, (row_idx, col_idx) in enumerate(map_list):
        output_sheet.cell(row=row_idx, column=col_idx).value = json_obj[idx]

def get_json_content(content):
    content = content.strip().split('\n')
    #去除掉api返回的code代码提示信息
    if content[0].strip().startswith('```') and content[-1].strip().endswith('```'):
        content = content[1:-1]
    return '\n'.join(content)

def is_all_ascii(s):
    return all(ord(char) < 128 for char in s)

def main():
    if len(sys.argv) >= 3:
        print("Usage: python excel_translate.py input_file_path output_file_path")
        input_file_path = sys.argv[1]
        output_file_path = sys.argv[2]
    translate_excel_process()

def translate_excel_process(input_file_path="test.xlsx", output_file_path="output.xlsx"):
    # Check if the input file exists
    if not os.path.exists(input_file_path):
        print("Input file not found.")
        sys.exit()
    shutil.copy(input_file_path, output_file_path)

    # Open the input file
    input_workbook = openpyxl.load_workbook(input_file_path)
    input_sheet = input_workbook.active

    # Create a new output workbook
    output_workbook = openpyxl.load_workbook(output_file_path)
    output_sheet = output_workbook.active

    # Loop through each row in the input sheet
    original_text = ""
    col_idx= 1
    map_list = []

    last_cell_value = ""
    last_cell_value_trans = ""
    # debug_test_count = 0
    for col_idx, col in enumerate(input_sheet.iter_cols(min_col=1, values_only=True), start=1):
        # Get the original text and language code
        for row_idx, content in enumerate(col, start=1):
            if not content:
                continue
            elif isinstance(content, int):#如果是纯数字也不用翻译
                continue
            #如果全是英文，则不翻译
            elif is_all_ascii(content):
                continue
            try:
                int(content)
            except ValueError:
                pass
            else:
                continue
            #和最近一次的cell内容相同则不翻译直接使用结果即可
            if content != last_cell_value:
                original_text += f'{content}\n'
                map_list.append((row_idx, col_idx))
                #if len(original_text) > 100:
                # print(f"Original text: {original_text}")
                translated_text = chinese2english(original_text)
                # translated_text = get_json_content(translated_text)
                # print(f"Translated text: {translated_text}")
            update_translate(output_sheet, map_list, translated_text)
            map_list = []
            original_text = ""

    # Add the original text and translated text to the output sheet
    # output_sheet.append([original_text, translated_text])

    # Save the output workbook
    output_workbook.save(output_file_path)
    print("Translation complete.")
if __name__ == '__main__':
    main()