示例excel文件内容:
读取上述excel文件:
# coding:utf-8
import xlrd
from xlrd import xldate_as_tuple
import xlwt
import os
from datetime import datetime
current_path = os.path.dirname(os.path.abspath(__file__))
excel_path = os.path.join(current_path, "data", "test_excel.xlsx")
class UserInfo(object):
"""
对应于excel中的用户信息sheet页
"""
def __init__(self, *args):
if len(args) >= 4:
self._username = args[0]
self._gender = args[1]
self._age = args[2]
self._occupation = args[3]
def print_userinfo(self):
print(self._username, self._gender, self._age, self._occupation)
class MovieInfo(object):
"""
对应于excel中的电影信息sheet页
"""
def __init__(self, *args):
if len(args) >= 2:
self._movie_id = args[0]
self._title = args[1]
def print_movieinfo(self):
print(self._movie_id, self._title)
class Rating(object):
"""
对应于excel中的电影评分sheet页
"""
def __init__(self, *args):
if len(args) >= 4:
self._user_id = args[0]
self._movie_id = args[1]
self._rating = args[2]
self._timestamp = args[3]
def print_ratinginfo(self):
print(self._user_id, self._movie_id, self._rating, self._timestamp)
def load_excel(excel_path):
"""
加载excel表格
:param excel_path: excel 文件路径
:return:
"""
excel_data = xlrd.open_workbook(filename=excel_path)
print(excel_data.sheet_names())
return excel_data
def get_sheet_by_index(excel_data, index=0):
"""
根据索引号获取sheet页
:param excel_data:
:param index:
:return:
"""
sheet = excel_data.sheet_by_index(index)
return sheet
def get_sheet_cell_by_index(sheet, row_index, col_index):
"""
通过行、列索引获取sheet页指定cell中的内容
:param sheet:
:param row_index:
:param col_index:
:return:
"""
return sheet.cell_value(row_index, col_index)
def convert_cell_type(cell, cell_ctype):
"""
转换cell中读取到的数据的格式,如日期转换为XXXX/XX/XX格式
:param cell:
:param cell_ctype:
:return:
"""
# print(cell, cell_ctype)
if cell_ctype == 2 and cell_ctype % 1 == 0: # 如果是整形
cell = int(cell)
elif cell_ctype == 3: # 转成datetime对象
date = datetime(*xldate_as_tuple(cell, 0))
cell = date.strftime('%Y/%d/%m')
elif cell_ctype == 4: # 转换为bool类型
cell = True if cell == 1 else False
return cell
def save_excel(excel_path):
pass
if __name__ == '__main__':
excel_data = load_excel(excel_path)
userinfo_sheet = get_sheet_by_index(excel_data, 0)
movieinfo_sheet = get_sheet_by_index(excel_data, 1)
rating_sheet = get_sheet_by_index(excel_data, 2)
# 处理用户信息sheet页中的数据
userinfo_sheet_name = userinfo_sheet.name
userinfo_row_count = userinfo_sheet.nrows
userinfo_col_count = userinfo_sheet.ncols
userinfo_sheet_tags = userinfo_sheet.row_values(0)
print("sheet name -> %s, 行数 -> %s, 列数 -> %s, 属性名称 -> %s" % (
userinfo_sheet_name, userinfo_row_count, userinfo_col_count, userinfo_sheet_tags))
for row_index in range(1, userinfo_row_count):
row_content = []
for col_index in range(0, userinfo_col_count):
cell_ctype = userinfo_sheet.cell(row_index, col_index).ctype
cell = get_sheet_cell_by_index(userinfo_sheet, row_index, col_index)
cell = convert_cell_type(cell, cell_ctype)
row_content.append(cell)
# row_content = userinfo_sheet.row_values(row_index)
userinfo = UserInfo(*row_content)
userinfo.print_userinfo()
# 处理电影信息sheet页中的数据
movieinfo_sheet_name = movieinfo_sheet.name
movieinfo_row_count = movieinfo_sheet.nrows
movieinfo_col_count = movieinfo_sheet.ncols
movieinfo_sheet_tags = movieinfo_sheet.row_values(0)
print("sheet name -> %s, 行数 -> %s, 列数 -> %s, 属性名称 -> %s" % (
movieinfo_sheet_name, movieinfo_row_count, movieinfo_col_count, movieinfo_sheet_tags))
for row_index in range(1, movieinfo_row_count):
row_content = []
for col_index in range(movieinfo_col_count):
cell_ctype = movieinfo_sheet.cell(row_index, col_index).ctype
cell = get_sheet_cell_by_index(movieinfo_sheet, row_index, col_index)
cell = convert_cell_type(cell, cell_ctype)
row_content.append(cell)
movieinfo = MovieInfo(*row_content)
movieinfo.print_movieinfo()
# 处理电影评分sheet页中的数据
rating_sheet_name = rating_sheet.name
rating_row_count = rating_sheet.nrows
rating_col_count = rating_sheet.ncols
rating_sheet_tags = rating_sheet.row_values(0)
print("sheet name -> %s, 行数 -> %s, 列数 -> %s, 属性名称 -> %s" % (
rating_sheet_name, rating_row_count, rating_col_count, rating_sheet_tags))
for row_index in range(1, rating_row_count):
row_content = []
for col_index in range(rating_col_count):
cell_ctype = rating_sheet.cell(row_index, col_index).ctype
cell = get_sheet_cell_by_index(rating_sheet, row_index, col_index)
cell = convert_cell_type(cell, cell_ctype)
row_content.append(cell)
rating = Rating(*row_content)
rating.print_ratinginfo()