在日常数据分析时最常打交道的是csv文件和list,dict类型。涉及到的主要需求有:
- 将一个二重列表[[],[]]写入到csv文件中
- 从文本文件中读取返回为列表
- 将一字典写入到csv文件中
- 从csv文件中读取一个字典
- 从csv文件中读取一个计数字典
实现如下:
# 功能:将一个二重列表写入到csv文件中 # 输入:文件名称,数据列表 def createListCSV(fileName="", dataList=[]): with open(fileName, "wb") as csvFile: csvWriter = csv.writer(csvFile) for data in dataList: csvWriter.writerow(data) csvFile.close
》
# 功能:从文本文件中读取返回为列表的形式 # 输入:文件名称,分隔符(默认,) def readListCSV(fileName="", splitsymbol=","): dataList = [] with open(fileName, "r") as csvFile: dataLine = csvFile.readline().strip("\n") while dataLine != "": tmpList = dataLine.split(splitsymbol) dataList.append(tmpList) dataLine = csvFile.readline().strip("\n") csvFile.close() return dataList
》
# 功能:将一字典写入到csv文件中 # 输入:文件名称,数据字典 def createDictCSV(fileName="", dataDict={}): with open(fileName, "wb") as csvFile: csvWriter = csv.writer(csvFile) for k,v in dataDict.iteritems(): csvWriter.writerow([k,v]) csvFile.close()
》
# 功能:从csv文件中读取一个字典 # 输入:文件名称,keyIndex,valueIndex def readDictCSV(fileName="", keyIndex=0, valueIndex=1): dataDict = {} with open(fileName, "r") as csvFile: dataLine = csvFile.readline().strip("\n") while dataLine != "": tmpList = dataLine.split(splitsymbol) dataDict[tmpList[keyIndex]] = tmpList[valueIndex] dataLine = csvFile.readline().strip("\n") csvFile.close() return dataDict
》
# 功能:从csv文件中读取一个计数字典 # 输入:文件名称,keyIndex def readDictCSV(fileName="", keyIndex=0): dataDict = {} with open(fileName, "r") as csvFile: dataLine = csvFile.readline().strip("\n") while dataLine != "": tmpList = dataLine.split(splitsymbol) if dataDict.get(tmpList[keyIndex]) == None: dataDict[tmpList[keyIndex]] = 0 dataDict[tmpList[keyIndex]] = dataDict.get(tmpList[keyIndex]) + 1 dataLine = csvFile.readline().strip("\n") csvFile.close() return dataDict