将具有多个sheet的excel表格中所有sheet某一列翻译成中文,且保留原始样式
将具有多个sheet的excel表格中所有sheet某一列翻译成中文,且保留原始样式
import pandas as pd
import numpy as np
import traceback
import openpyxl
def llm_summary(question):
from http import HTTPStatus
import dashscope
dashscope.api_key="sk-"
from dashscope import Generation
system_prompt="你是一位CUDA领域的专家,请将用户输入的专业术语翻译成中文,并简单解释"
messages = [{'role': 'system', 'content': system_prompt},
{'role': 'user', 'content': question}]
response = Generation.call(model="qwen-max", messages=messages,result_format='message')
if response.status_code == HTTPStatus.OK:
messages.append({'role': response.output.choices[0]['message']['role'],
'content': response.output.choices[0]['message']['content']})
output=response.output.choices[0]['message']['content']
return output
return ""
def translate(txt):
ret=llm_summary(txt)
print(txt,ret)
return ret
file_path = 'input.xlsx'
excel_file = pd.ExcelFile(file_path)
sheets_dict = pd.read_excel(file_path, sheet_name=None)
sheet_names = excel_file.sheet_names[3:]
error=False
for sheet_name in sheet_names:
if error:
break
df = sheets_dict[sheet_name]
print(sheet_name,len(df))
for i in range(len(df)):
Metric_description = str(df.iloc[i, 1])
if len(Metric_description)==0 or Metric_description=="nan":
break
Translation = str(df.iloc[i, 2])
try:
if len(Translation)==0 or Translation=="nan":
df.iloc[i, 2] = translate(Metric_description)
except:
error=True
print(i,Metric_description)
traceback.print_exc()
break
sheets_dict[sheet_name] = df
workbook = openpyxl.load_workbook(file_path)
# 使用 openpyxl 将新的数据写回到原始工作表里
for sheet_name in sheets_dict:
ws = workbook[sheet_name]
df = sheets_dict[sheet_name]
for r_idx, row in enumerate(ws.iter_rows(min_row=2), 1): # min_row=2跳过标题行
for c_idx, cell in enumerate(row, 1):
try:
new_value = df.iat[r_idx-1, c_idx-1] # r_idx-1 和 c_idx-1 是因为 pandas 索引从 0 开始
cell.value = new_value
except IndexError:
break
# 保存工作簿
workbook.save(file_path)