import pandas as pd import matplotlib.pyplot as plt from pylab import * from matplotlib.ticker import MultipleLocator from matplotlib.ticker import FormatStrFormatter dir = './数据/' train_2013 = pd.read_table(dir + '2013.txt',engine='python') train_2014 = pd.read_table(dir + '2014.txt',engine='python') train_2015 = pd.read_table(dir + '2015.txt',engine='python') def dataRead(fileName): print('read {name} context!'.format(name=fileName)) dataList = [] f = open(fileName, encoding='UTF-8') context = f.readline() while True: context = f.readline() if context == '': break context = context.strip('\n').split() context = [eval(i) for i in context] dataList.append(context) return dataList #写txt def writeTxt(dataList, fileName, strHead): f = open(fileName, 'a') length = len(strHead) for i in range(length-1): f.write(strHead[i]) f.write('\t') f.write(strHead[length-1]) f.write('\n') length = len(dataList[0]) for ele in dataList: for i in range(length-1): f.write(str(ele[i])+'\t') f.write(str(ele[length-1])) f.write('\n') def generateData(year, n_days, start_week): dataList = [] day1=1; day2=1; day3=1; day4=1; day5=1; day6=1; day7=1; day8=1; day9=1; day10=1; day11=1; day12=1 if year==2013: num_days=0 elif year==2014: num_days = 365 elif year==2015: num_days = 730 elif year==2016: num_days = 1095 elif year==2017: num_days = 1461 if year%4!=0: #非闰年 for i in range(n_days-num_days): if i<=30: #1月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 1, day1]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day1 = day1+1 elif i>30 and i<=58: #2月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 2, day2]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day2 = day2+1 elif i>58 and i<=89: #3月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 3, day3]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day3 = day3+1 elif i>89 and i<=119: #4月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 4, day4]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day4 = day4+1 elif i>119 and i<=150: #5月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 5, day5]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day5 = day5+1 elif i>150 and i<=180: #6月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 6, day6]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day6 = day6+1 elif i>180 and i<=211: #7月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 7, day7]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day7 = day7+1 elif i>211 and i<=242: #8月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 8, day8]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day8 = day8+1 elif i>242 and i<=272: #9月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 9, day9]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day9 = day9+1 elif i>272 and i<=303: #10月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 10, day10]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day10 = day10+1 elif i>303 and i<=333: #11月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 11, day11]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day11 = day11+1 elif i>333 and i<=364: #12月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 12, day12]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day12 = day12+1 else: for i in range(n_days): if i<=30: #1月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 1, day1]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day1 = day1+1 elif i>30 and i<=59: #2月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 2, day2]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day2 = day2+1 elif i>59 and i<=90: #3月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 3, day3]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day3 = day3+1 elif i>90 and i<=120: #4月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 4, day4]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day4 = day4+1 elif i>120 and i<=151: #5月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 5, day5]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day5 = day5+1 elif i>151 and i<=181: #6月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 6, day6]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day6 = day6+1 elif i>181 and i<=212: #7月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 7, day7]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day7 = day7+1 elif i>212 and i<=243: #8月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 8, day8]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day8 = day8+1 elif i>243 and i<=273: #9月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 9, day9]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day9 = day9+1 elif i>273 and i<=304: #10月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 10, day10]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day10 = day10+1 elif i>304 and i<=334: #11月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 11, day11]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day11 = day11+1 elif i>334 and i<=365: #12月 for j in range(5): dataList.append([i+1, start_week, j+1, 0, year, 12, day12]) start_week = (start_week+1) % 7 if start_week==0: start_week=7 day12 = day12+1 return dataList def generateDict(data): length = len(data) dataDict = {} for i in range(length): tmpData = data[i] strIndex = str(tmpData[4])+'-'+str(tmpData[5])+'-'+str(tmpData[6])+'-'+str(tmpData[2]) dataDict[strIndex] = tmpData return dataDict #参数1是短的字典(不完全) def combineData(dictOne, dictTwo): for key in dictOne: if key in dictTwo: dictTwo[key][1::]=dictOne[key][1::] return dictTwo def selectData(dict): dataList =[] for key, value in dict.items(): dataList.append(value) return dataList # 1. 读取源数据 train2013 = dataRead(dir+'2013.txt') train2014 = dataRead(dir+'2014.txt') train2015 = dataRead(dir+'2015.txt') train2016 = dataRead(dir+'2016.txt') # print(train2013) # print(train2014) # print(train2015) # 2. 将源数据转换为字典,方便后续合并原始数据和完整数据 train2013 = generateDict(train2013) train2014 = generateDict(train2014) train2015 = generateDict(train2015) train2016 = generateDict(train2016) # print(train2013) # 3. 给每年生成完整的数据 data2013 = generateData(2013,365,2) data2014 = generateData(2014,730,3) data2015 = generateData(2015,1095,4) data2016 = generateData(2016,1461,5) data2017 = generateData(2017,1826,7) # print(data2013) # print(data2014) # print(data2015) # print(data2016) # print(data2017) # 4. 将完整数据转换为字典,方便后续合并 data2013 = generateDict(data2013) data2014 = generateDict(data2014) data2015 = generateDict(data2015) data2016 = generateDict(data2016) # 5. 将原始数据和完整数据合并(这里操作的是字典) data2013 = combineData(train2013, data2013) data2014 = combineData(train2014, data2014) data2015 = combineData(train2015, data2015) data2016 = combineData(train2016, data2016) # print(data2013) # 6. 将每年的数据提取出来,作为模型训练数据 data2013 = selectData(data2013) data2014 = selectData(data2014) data2015 = selectData(data2015) data2016 = selectData(data2016) # 7. 将步骤6中得到的数据写入txt文本中 writeTxt(data2013, './data/data2013.txt', ['date', 'day_of_week', 'brand', 'cnt', 'year', 'month', 'day']) writeTxt(data2014, './data/data2014.txt', ['date', 'day_of_week', 'brand', 'cnt', 'year', 'month', 'day']) writeTxt(data2015, './data/data2015.txt', ['date', 'day_of_week', 'brand', 'cnt', 'year', 'month', 'day']) writeTxt(data2016, './data/data2016.txt', ['date', 'day_of_week', 'brand', 'cnt', 'year', 'month', 'day'])
python数据分析案例1-2
最新推荐文章于 2024-05-15 12:59:31 发布