python学习笔记之读写excel或csv文件

读写Excel与CSV数据技巧

原创已于 2024-04-29 11:45:52 修改 · 785 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#python #excel #csv

于 2018-11-28 19:06:15 首次发布

python 专栏收录该内容

22 篇文章

订阅专栏

读写excel数据方法

首选pandas库里pandas.read_excel和pandas.to_excel函数，相对比较简单.

#coding=utf-8 
"""
Created on Wed Nov 28 18:39:17 2018
@author: **
"""
import xlrd
import xlwt
import pandas
import numpy as np

def pandas_parse_xls(filename, col_index = [1], sub_index = None):
    """
    reference: 
        https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_excel.html
    func:
        parse the xlsx file into the ndarray or list
    args:
        imgname_col_index:, list of int, refer the col index of image name field
        mos_col_index: list of int, refer the col index of MOS field
        std_index: list of ints, refer the col index of subjetive score area
    return:
        list of imgname, MOS and std_value
    """
    #header=1,index_col=1,usecols=[1,3]
    #DataFrame:Attributes and underlying data
    #DataFrame.get_values()
    #DataFrame.values:Return a Numpy representation of the DataFrame.
    
    #区域对应的列索引列表，从第4列到第15列
    if sub_index is None:
        Sub_score_area = range(3,15)   
    
    #解析字符型数据字段时，注意将‘unicode’类型转为‘str’型，
    imgname_list = pandas.read_excel(filename, usecols = imgname_col_index).astype('str').values
    #将返回的数组降维，并转为列表
    imgname_list = np.squeeze(imgname_list).tolist()   
    
    ##读取数值区域，计算每行的标准差，返回一维标准差## 
    #<class 'pandas.core.frame.DataFrame'>            
    sub_DataFrame = pandas.read_excel(filename, parse_cols  = Sub_score_area)
    # 将DataFrame类型转为ndarray，二维数组
    sub_array = sub_DataFrame.values
    #计算标准差
    std_array = np.std(sub_array,axis=1,ddof=1)
                     
    return imgname_list, std_array
    
def pandas_write_excel(filename, imgname_col_index = [0]):
    imgname_DataFrame = pandas.read_excel(filename, usecols = imgname_col_index).astype('str').values
    imgname_Arr = np.squeeze(imgname_DataFrame)
    
    result = []
    for item in imgname_Arr:
        if not item.endswith('.jpg'):
            item = item +'.jpg'
        result.append(item)
    # pandas库写操作必须将array或者list转化为DataFrame类型之后，才能进行
    img_DataFrame = pandas.DataFrame(data = result, columns = ['imagename'])
    # 保存文件名为filename
    writer = pandas.ExcelWriter(filename)
    img_DataFrame.to_excel(writer,index = False)
    writer.close()

def xlrd_parse_xls(path_xls):
    """
    reference: https://xlrd.readthedocs.io/en/latest/api.html
    func:
        #Cell object in the given row and column.
        xlrd.sheet.cell(rowx,colx)
        #Value of the cell in the given row,column.
        xlrd.sheet.cell_value(rowx,colx) 
        #Returns a slice of the values of the cells in the given column.
        xlrd.sheet.col_values(index_col)  # the first index value is 1
        #Returns a slice of the values of the cells in the given row.
        xlrd.sheet.row_values(index_col) 
    """
    data = xlrd.open_workbook(path_xls)
    # get sheet
    table = data.sheets()[0] # equal to data.sheet_by_index(0)

    #将‘unicode’转为‘str’
    name_list = [item.encode('utf-8') for item in table.col_values(1,1)]
    
    score_list = table.col_values(15,1)

    #print(name_list,score_list)
    return name_list,score_list
       
#==============================================================================
#     result = []
#     #循环遍历读取
#     row ,col = table.nrows ,table.ncols
#     for i in range(1,2):
#         for j in range(1,row):
#             #get cell value
#             temp_str = table.cell_value(j,i)
#             print(temp_str)
#             result.append(temp_str)
#==============================================================================
def write_xls(dest_xls):
    #creat workbook
    work_book = xlwt.Workbook(encoding = 'ascii')
    work_sheet = work_book.add_sheet('sheet1')
    work_sheet.write(0, 0, label = 'Row 0, Column 0 Value') 
    work_book.save('Excel_Workbook.xls')
    
if __name__=='__main__':
    pandas_parse_xls('MOS.xlsx')
    xlrd_parse_xls('MOS.xlsx')

读写csv数据方法

python自带csv库进行读写

#utf-8

def read_csv(input_csv_path):
	f = open(input_csv_path, 'r', newline='', encoding='utf-8')
	    readerObject = csv.reader(f)
	    for index, row in enumerate(readerObject):
	    	# 过滤第一行的字符字段
	        if index == 0:
	            continue
	        count += 1
	        image_name = os.path.join(input_test_dir, row[0])
	        # 取第一列至倒数一列的列表
	        pts = row[1:-1] 
		    print(pts)
		    print(count)
	    f.close()
   def write_csv():
	   f = open('csv_path.csv', 'w', encoding='utf-8', newline='')
	   field_names = ['imageName', 'x0', 'y0', 'x0', 'y1','appid', 'rows', 'cols']
	   writer = csv.writer(f, dialect='excel')
	   # 写入第一行字符字段
	   writer.writerow(field_names)
	   wm_rows = 2
	   wm_cols = 3
	   for item in os.listdir(input_test_dir):
	       crop_name = os.path.splitext(item)[0]+".png"
	       crop_png_path = os.path.join(crop_dir, crop_name)
	       if item.endswith(".JPG") or item.endswith(".jpg") or item.endswith(".png"):
	           img_name = os.path.join(input_test_dir, item)
	           # 函数接口
	           pts  = []
	           pts.append(appid)
	           pts.append(wm_rows)
	           pts.append(wm_cols)
			   pts.insert(0,img_name)
	           # 逐行写入每一列的值，pts为列表格式
	           writer.writerow(pts)
	   f.close()

pandas库解析csv

# coding=utf-8
import os
import json
import csv
import pandas
import shutil
import numpy as np

#读取CSV
def get_csv_data(input_csv_path, input_csv_path1):
    if not os.path.exists(input_csv_path):
        raise FileNotFoundError("%s not found" % input_csv_path)
    # 仅有行名存在时，禁用列名解析
    data = pandas.read_csv(input_xls_path, header=None)
    # 仅有列名存在时，禁用行名解析
    data1 = pandas.read_csv(input_xls_path, index_col=None)
	# 行列均存在
	df= pandas.read_csv(input_csv_path1)
	# 遍历列数据
	for col in df.columns:
	    print('列名：', col)
	    print('列数据：', df[col].values)
	
	# 遍历行数据
	for index, row in df.iterrows():
	    print('行名：', index)
	    print('行数据：', row.values)
    
    # DataFrame
    # df.shape  # (100, 6) 查看行数和列数
    # df.info()  # 查看索引、数据类型和内存信息
    # df.describe()  # 查看数值型列的汇总统计
    # df.dtypes  # 查看各字段类型
    # df.axes  # 显示数据行和列名
    # df.columns  # 列名
    # https://pandas.pydata.org/pandas-docs/stable/reference/frame.html
    reader = data.parse(sheet_name="Sheet1")
    files = reader.get('image_name')
    # print(type(files)) # 'pandas.core.series.Series'

    reader.set_index('image_name', inplace=True)
    for name in files:
        col_items = reader[reader.index == name]
        # print(type(col_items)) # pandas.core.frame.DataFrame
        pts = col_items['points'].values
        uuid = col_items['uuid'].values
        if pandas.isnull(pts) or pandas.isnull(uuid):
            continue
        print(name, '--->', pts, '---->', uuid[0])
        #break


def get_pts_xls(input_path, data_dir, save_dir):
    if not os.path.exists(input_path):
        raise FileNotFoundError("%s not found" % input_path)

    if not os.path.exists(data_dir):
        raise FileNotFoundError("%s not found" % data_dir)

    data = pandas.ExcelFile(input_path)
    # DataFrame
    # df.shape  # (100, 6) 查看行数和列数
    # df.info()  # 查看索引、数据类型和内存信息
    # df.describe()  # 查看数值型列的汇总统计
    # df.dtypes  # 查看各字段类型
    # df.axes  # 显示数据行和列名
    # df.columns  # 列名
    # https://pandas.pydata.org/pandas-docs/stable/reference/frame.html
    reader = data.parse(sheet_name="Sheet1")
    files = reader.get('fileName')
    # print(type(files)) # 'pandas.core.series.Series'
    for index, row in reader.iterrows():
        # row is tuple ,row[0] is id ， row[1] is  pandas.core.series.Series'
        image_name = row[1]
        # print(image_name)
        # <class 'pandas.core.series.Series'>
        pts = row[4:12]

        tmp = [str(i) for i in pts.values]
        ptsList = ','.join(tmp)
        src = os.path.join(data_dir, image_name)
        dst = os.path.join(save_dir, image_name)
        homography_transfer_roi(src, ptsList, dst)
        break

if __name__=="__main__":
    get_pts_xls(r"./rec.xlsx", r"./data", r"./roi")