python读取csv文件xls文件

最新推荐文章于 2024-05-14 23:21:33 发布

月夜星星雨

最新推荐文章于 2024-05-14 23:21:33 发布

阅读量784

点赞数

分类专栏： python数据分析

本文链接：https://blog.csdn.net/m493096871/article/details/89488325

版权

python数据分析专栏收录该内容

13 篇文章 0 订阅

订阅专栏

import os
DATADIR = ""
DATAFILE = "beatles-diskography.csv"
def parse_file(datafile):
    data = []
    with open(datafile, "r") as ff:
        header= ff.readline().split(",")
        counter = 0
        for line in ff:
            if counter == 10:
                break
            fields = line.split(",")
            entry={}
            for i,value in enumerate(fields):
                entry[header[i].strip()]=value.strip()
            data.append(entry)
            counter+=1
    return data


def test():
    # a simple test of your implemetation
    datafile = os.path.join(DATADIR, DATAFILE)
    d = parse_file(datafile)
    firstline = {'Title': 'Please Please Me', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)',
                 'Released': '22 March 1963', 'US Chart Position': '-', 'RIAA Certification': 'Platinum',
                 'BPI Certification': 'Gold'}
    tenthline = {'Title': '', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)', 'Released': '10 July 1964',
                 'US Chart Position': '-', 'RIAA Certification': '', 'BPI Certification': 'Gold'}

    assert d[0] == firstline
    assert d[9] == tenthline


test()

pip install xlrd

import xlrd

datafile = "2013_ERCOT_Hourly_Load_Data.xls"


def parse_file(datafile):
    workbook = xlrd.open_workbook(datafile)
    sheet = workbook.sheet_by_index(0)

    data = [[sheet.cell_value(r, col) 
                for col in range(sheet.ncols)] 
                    for r in range(sheet.nrows)]

    print "\nList Comprehension"
    print "data[3][2]:",
    print data[3][2]

    print "\nCells in a nested loop:"    
    for row in range(sheet.nrows):
        for col in range(sheet.ncols):
            if row == 50:
                print sheet.cell_value(row, col),


    ### other useful methods:
    print "\nROWS, COLUMNS, and CELLS:"
    print "Number of rows in the sheet:", 
    print sheet.nrows
    print "Type of data in cell (row 3, col 2):", 
    print sheet.cell_type(3, 2)
    print "Value in cell (row 3, col 2):", 
    print sheet.cell_value(3, 2)
    print "Get a slice of values in column 3, from rows 1-3:"
    print sheet.col_values(3, start_rowx=1, end_rowx=4)

    print "\nDATES:"
    print "Type of data in cell (row 1, col 0):", 
    print sheet.cell_type(1, 0)
    exceltime = sheet.cell_value(1, 0)
    print "Time in Excel format:",
    print exceltime
    print "Convert time to a Python datetime tuple, from the Excel float:",
    print xlrd.xldate_as_tuple(exceltime, 0)

    return data

data = parse_file(datafile)

#!/usr/bin/env python
"""
Your task is as follows:
- read the provided Excel file
- find and return the min, max and average values for the COAST region
- find and return the time value for the min and max entries
- the time values should be returned as Python tuples

Please see the test function for the expected return format

"""

import xlrd
from zipfile import ZipFile
datafile = "2013_ERCOT_Hourly_Load_Data.xls"

def open_zip(datafile):
with ZipFile('{0}.zip'.format(datafile), 'r') as myzip:
myzip.extractall()

def parse_file(datafile):
workbook = xlrd.open_workbook(datafile)
sheet = workbook.sheet_by_index(0)
data=[[sheet.cell_value(r,col) for col in range(sheet.ncols)] for r in range(sheet.nrows) ]
cv = sheet.col_values(1,start_rowx=1,end_rowx=None)
maxval=max(cv)
minval=min(cv)
maxpos=cv.index(maxval)+1
minpos=cv.index(minval)+1
maxtime=sheet.cell_value(maxpos,0)
realtime=xlrd.xldate_as_tuple(maxtime,0)
mintime=sheet.cell_value(minpos,0)
realmintime=xlrd.xldate_as_tuple(mintime,0)

### example on how you can get the data
# sheet_data = [[sheet.cell_value(r, col) for col in range(sheet.ncols)] for r in range(sheet.nrows)]

### other useful methods:
# print "\nROWS, COLUMNS, and CELLS:"
# print "Number of rows in the sheet:",
# print sheet.nrows
# print "Type of data in cell (row 3, col 2):",
# print sheet.cell_type(3, 2)
# print "Value in cell (row 3, col 2):",
# print sheet.cell_value(3, 2)
# print "Get a slice of values in column 3, from rows 1-3:"
# print sheet.col_values(3, start_rowx=1, end_rowx=4)

# print "\nDATES:"
# print "Type of data in cell (row 1, col 0):",
# print sheet.cell_type(1, 0)
# exceltime = sheet.cell_value(1, 0)
# print "Time in Excel format:",
# print exceltime
# print "Convert time to a Python datetime tuple, from the Excel float:",
# print xlrd.xldate_as_tuple(exceltime, 0)

data = {
'maxtime': realtime,
'maxvalue': maxval,
'mintime': realmintime,
'minvalue': minval,
'avgcoast': sum(cv)/float(len(cv))
}
return data

def test():
open_zip(datafile)
data = parse_file(datafile)

assert data['maxtime'] == (2013, 8, 13, 17, 0, 0)
assert round(data['maxvalue'], 10) == round(18779.02551, 10)

test()

月夜星星雨

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python读取csv文件xls文件

import osDATADIR = ""DATAFILE = "beatles-diskography.csv"def parse_file(datafile): data = [] with open(datafile, "r") as ff: header= ff.readline().split(",") counter = 0 ...
复制链接

扫一扫

专栏目录