python 解析excel表并排重输出到txt

需求

解析excel表中每个单元格的元素,并且排重后输出到txt文件中,保存格式为UTF-8

程序

  • pip install xlrd

import xlrd

# 保存唯一字符的列表
convert_list = []


def sort_one_row(row_list):

    """
    #!brief: find out different character and put in convert_list
    #!param: row_list: all cells in one excel line
    """

    global convert_list
    for element in row_list:
        # 对某一个元素进行去重并排序
        element = sorted(set(element))
        # 第i次添加排序并去重后的element元素
        convert_list.extend(element)
        # 因为和之前的添加的元素可能有重叠,所以仍需要做一次去重
        convert_list = list(set(convert_list))
        # 去重后排序,由于characters_list是一个全局变量,所以能一直保存结果
        convert_list = sorted(convert_list)
    #print(i, characters_list)


def parse_excel_to_txt(file_path):

    """
    #!brief: find out different character and put in convert_list
    #!param: file path
    """

    # open excel
    excel = xlrd.open_workbook(file_path)
    # get first sheet
    table = excel.sheet_by_index(0)
    # get row numbers
    num_rows = table.nrows
    # convert process
    for i in range(num_rows):
        # 获取每一行的单元格元素并组成列表
        row_data = table.row_values(i)
        sort_one_row(row_data)
    # write result to txt, txt file name = "T2_Character.txt"
    """
    # ANSI      ---->   GBK
    # UTF-8     ---->   UTF-8
    # Unicode   ---->   UTF-16
    """
    with open("T2_Character.txt", 'w', encoding='utf-16') as f:
        f.writelines(convert_list)
        f.close()

    pass

# self test
# file_path = "D:\Tool\Python\PythonProjects\T2 ExtractCharacter\T2Texts.xlsx"
# parse_excel_to_txt(file_path)


发布了35 篇原创文章 · 获赞 7 · 访问量 10万+
展开阅读全文

没有更多推荐了,返回首页

©️2019 CSDN 皮肤主题: 大白 设计师: CSDN官方博客

分享到微信朋友圈

×

扫一扫,手机浏览