需求
解析excel表中每个单元格的元素,并且排重后输出到txt文件中,保存格式为UTF-8
程序
- pip install xlrd
import xlrd
# 保存唯一字符的列表
convert_list = []
def sort_one_row(row_list):
"""
#!brief: find out different character and put in convert_list
#!param: row_list: all cells in one excel line
"""
global convert_list
for element in row_list:
# 对某一个元素进行去重并排序
element = sorted(set(element))
# 第i次添加排序并去重后的element元素
convert_list.extend(element)
# 因为和之前的添加的元素可能有重叠,所以仍需要做一次去重
convert_list = list(set(convert_list))
# 去重后排序,由于characters_list是一个全局变量,所以能一直保存结果
convert_list = sorted(convert_list)
#print(i, characters_list)
def parse_excel_to_txt(file_path):
"""
#!brief: find out different character and put in convert_list
#!param: file path
"""
# open excel
excel = xlrd.open_workbook(file_path)
# get first sheet
table = excel.sheet_by_index(0)
# get row numbers
num_rows = table.nrows
# convert process
for i in range(num_rows):
# 获取每一行的单元格元素并组成列表
row_data = table.row_values(i)
sort_one_row(row_data)
# write result to txt, txt file name = "T2_Character.txt"
"""
# ANSI ----> GBK
# UTF-8 ----> UTF-8
# Unicode ----> UTF-16
"""
with open("T2_Character.txt", 'w', encoding='utf-16') as f:
f.writelines(convert_list)
f.close()
pass
# self test
# file_path = "D:\Tool\Python\PythonProjects\T2 ExtractCharacter\T2Texts.xlsx"
# parse_excel_to_txt(file_path)