#coding=utf-8 """
Created on Wed Nov 28 18:39:17 2018
@author: **
"""import xlrd
import xlwt
import pandas
import numpy as np
defpandas_parse_xls(filename, col_index =[1], sub_index =None):"""
reference:
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_excel.html
func:
parse the xlsx file into the ndarray or list
args:
imgname_col_index:, list of int, refer the col index of image name field
mos_col_index: list of int, refer the col index of MOS field
std_index: list of ints, refer the col index of subjetive score area
return:
list of imgname, MOS and std_value
"""#header=1,index_col=1,usecols=[1,3]#DataFrame:Attributes and underlying data#DataFrame.get_values()#DataFrame.values:Return a Numpy representation of the DataFrame.#区域对应的列索引列表,从第4列到第15列if sub_index isNone:
Sub_score_area =range(3,15)#解析字符型数据字段时,注意将‘unicode’类型转为‘str’型,
imgname_list = pandas.read_excel(filename, usecols = imgname_col_index).astype('str').values
#将返回的数组降维,并转为列表
imgname_list = np.squeeze(imgname_list).tolist()##读取数值区域,计算每行的标准差,返回一维标准差## #<class 'pandas.core.frame.DataFrame'>
sub_DataFrame = pandas.read_excel(filename, parse_cols = Sub_score_area)# 将DataFrame类型转为ndarray,二维数组
sub_array = sub_DataFrame.values
#计算标准差
std_array = np.std(sub_array,axis=1,ddof=1)return imgname_list, std_array
defpandas_write_excel(filename, imgname_col_index =[0]):
imgname_DataFrame = pandas.read_excel(filename, usecols = imgname_col_index).astype('str').values
imgname_Arr = np.squeeze(imgname_DataFrame)
result =[]for item in imgname_Arr:ifnot item.endswith('.jpg'):
item = item +'.jpg'
result.append(item)# pandas库写操作必须将array或者list转化为DataFrame类型之后,才能进行
img_DataFrame = pandas.DataFrame(data = result, columns =['imagename'])# 保存文件名为filename
writer = pandas.ExcelWriter(filename)
img_DataFrame.to_excel(writer,index =False)
writer.close()defxlrd_parse_xls(path_xls):"""
reference: https://xlrd.readthedocs.io/en/latest/api.html
func:
#Cell object in the given row and column.
xlrd.sheet.cell(rowx,colx)
#Value of the cell in the given row,column.
xlrd.sheet.cell_value(rowx,colx)
#Returns a slice of the values of the cells in the given column.
xlrd.sheet.col_values(index_col) # the first index value is 1
#Returns a slice of the values of the cells in the given row.
xlrd.sheet.row_values(index_col)
"""
data = xlrd.open_workbook(path_xls)# get sheet
table = data.sheets()[0]# equal to data.sheet_by_index(0)#将‘unicode’转为‘str’
name_list =[item.encode('utf-8')for item in table.col_values(1,1)]
score_list = table.col_values(15,1)#print(name_list,score_list)return name_list,score_list
#==============================================================================# result = []# #循环遍历读取# row ,col = table.nrows ,table.ncols# for i in range(1,2):# for j in range(1,row):# #get cell value# temp_str = table.cell_value(j,i)# print(temp_str)# result.append(temp_str)#==============================================================================defwrite_xls(dest_xls):#creat workbook
work_book = xlwt.Workbook(encoding ='ascii')
work_sheet = work_book.add_sheet('sheet1')
work_sheet.write(0,0, label ='Row 0, Column 0 Value')
work_book.save('Excel_Workbook.xls')if __name__=='__main__':
pandas_parse_xls('MOS.xlsx')
xlrd_parse_xls('MOS.xlsx')