from openpyxl import load_workbook
def get_newwords(read_file):
wb = load_workbook(read_file)
sh = wb["Sheet1"] # "Sheet1"表示子表名称
for item in list(sh.rows)[1:]:
values = []
for val in item:
values.append(val.value)
# values 存储的是xlsx的文件
读取子表
import pandas as pd
def read_excel(path): # 读取多个子表
data_xls = pd.io.excel.ExcelFile(path)
data = []
query_set = set()
print(data_xls.sheet_names) # 获取子表名称
for name in data_xls.sheet_names:
df = pd.read_excel(data_xls, sheet_name=name, header=None)
query_set = query_set.union(set(df[0].unique()))
query_set = query_set.union(set(df[1].unique()))
data.append([name, len(df[0].unique())]) # 第一列中不重复的query
# print(data[name][:4])
return data, list(query_set)
按行遍历DataFame
for index, row in df.iterrows():
print(index) # 行号
print([row[0], row[1]) # 输出第一第二列