用python对excel进行批量处理（2）：将表格中的英文翻译成中文

1mmorta1

已于 2022-08-01 14:16:48 修改

阅读量1.8k

点赞数 1

分类专栏： python 文章标签： python 开发语言爬虫

于 2022-07-31 23:31:58 首次发布

本文链接：https://blog.csdn.net/qq_41866334/article/details/126091856

版权

python 专栏收录该内容

2 篇文章 1 订阅

订阅专栏

要翻译的excel大致长这样：
在这里插入图片描述
其实有第一篇的基础，我以为这次只要随便改改就行。没想到最大的问题出在找不到合适的用于Translate的python包。最著名的googletrans 包因为国内众所周知的原因不能使用。因此找了很久替代品，最后找到了pygtrans这个包。它可以支持在接口中传入proxy的值，也就是科学上网所使用的端口。别的部分就很简单了，改一改就可以成功运行了。

from openpyxl import *
from openpyxl.drawing.image import Image
from openpyxl.cell import MergedCell
from openpyxl.drawing.spreadsheet_drawing import OneCellAnchor, AnchorMarker
from openpyxl.utils.units import pixels_to_EMU, cm_to_EMU
from openpyxl.drawing.xdr import XDRPoint2D, XDRPositiveSize2D

from io import BytesIO
import os
from tqdm import tqdm
import traceback
from pygtrans import Translate
class ExcelOp(object):
    def __init__(self, file, sheet_name="Sheet1"):
        self.file = file
        self.wb = load_workbook(self.file)
        self.ws = self.wb[sheet_name]
        # self.max_rows, self.max_cols = self.ws.max_row, self.ws.max_column

    # 获取某个单元格的值
    def get_cell_value(self, row, column):
        cell = self._parser_merged_cell(row, column)
        cell_value = cell.value
        return cell_value

    # 获取某列的所有值
    def get_col_value(self, column) -> list:
        rows = self.ws.max_row
        column_data = []
        for i in range(1, rows + 1):
            cell_value = self.get_cell_value(row=i, column=column)
            column_data.append(cell_value)
        return column_data

    # 获取某行所有值
    def get_row_value(self, row):
        columns = self.ws.max_column
        row_data = []
        for i in range(1, columns + 1):
            cell_value = self.get_cell_value(row=row, column=i)
            row_data.append(cell_value)
        return row_data

    # 设置某个单元格的值
    def set_cell_value(self, row, colunm, cellvalue):
        cell = self._parser_merged_cell(row, colunm)
        try:
            cell.value = cellvalue
        except:
            cell.value = "ERROR:write fail"
    
    # 在单元格后添加文本内容
    def append_cell_value(self, row, colunm, cellvalue):
        cell = self._parser_merged_cell(row, colunm)
        assert(type(cell.value) == str)
        try:
            cell.value += cellvalue
        except:
            cell.value += "ERROR:append fail"

    # 在某个单元格上添加一张图
    def set_image(self, fp: BytesIO, row: int, column: int, img_pixel_height=None, img_pixel_width=None):
        cell = self._parser_merged_cell(row, column)
        image_data = Image(fp)
        w = image_data.width if img_pixel_width is None else img_pixel_width
        h = image_data.height if img_pixel_height is None else img_pixel_height
        size = XDRPositiveSize2D(pixels_to_EMU(w), pixels_to_EMU(h))
        # https://stackoverflow.com/questions/55309671/more-precise-image-placement-possible-with-openpyxl-pixel-coordinates-instead
        # AnchorMarker 它的row和col 又从0开始数了 好烦
        marker = AnchorMarker(col=cell.column-1, row=cell.row-1)
        image_data.anchor = OneCellAnchor(_from=marker, ext=size)
        # image_data.anchor = "A1"
        self.ws.add_image(image_data)

    def _parser_merged_cell(self, row, col):
        """
        检查是否为合并单元格并获取对应行列单元格的值。
        如果是合并单元格，则取合并区域左上角单元格的值作为当前单元格的值,否则直接返回该单元格的值
        :param sheet: 当前工作表对象
        :param row: 需要获取的单元格所在行
        :param col: 需要获取的单元格所在列
        :return: 
        """
        cell = self.ws.cell(row=row, column=col)
        if isinstance(cell, MergedCell):  # 判断该单元格是否为合并单元格
            for merged_range in self.ws.merged_cells.ranges:  # 循环查找该单元格所属的合并区域
                if cell.coordinate in merged_range:
                    # 获取合并区域左上角的单元格作为该单元格的值返回
                    cell = self.ws.cell(
                        row=merged_range.min_row, column=merged_range.min_col)
                    break
        return cell


if __name__ == "__main__":
    dirs = os.listdir()     
    files = []
    for file in dirs:
        if file.split('.')[-1]=="xlsx" and "中文" not in file: # 排除有 _中文  后缀的文件
            files.append(file ) 
    translator = Translate(source='auto',target='zh-CN',fmt='text',proxies={'http': 'http://localhost:10080', 'https': 'http://localhost:10080'})
    files = files[0:]
    for file in files:
        excel = ExcelOp(file=file,sheet_name='题库')
        columns = [2,8,9,10,11]
        if '3' in file or'4' in file:
            columns = [2,9,10,11,12]
        for i in columns:
            english = excel.get_col_value(i)  # excel行和列都是从1开始数  7对应G
            for idx in range(len(english)):
                if english[idx]==None:
                    english[idx] = ''
                english[idx] = english[idx].strip()
            chinese = translator.translate(english)
            chinese = [txt.translatedText for txt in chinese]
            try:    
                for row, txt in (enumerate(chinese)):
                    row = row+1  # python从0开始数  excel从1开始
                    if(english[row-1]!=txt):
                        excel.append_cell_value(row,i,txt)
                    # excel.ws.row_dimensions[row].height = 250  # 设置单元格高
            except:
                print("Error in {}, colum {}".format(file,i))
        excel.wb.save(file[0:-5]+'_中文.xlsx') #输出文件名上加上 _中文 后缀