python数据分析案例1-2

import pandas as pd
import matplotlib.pyplot as plt
from pylab import *
from matplotlib.ticker import  MultipleLocator
from matplotlib.ticker import  FormatStrFormatter



dir = './数据/'
train_2013 = pd.read_table(dir + '2013.txt',engine='python')
train_2014 = pd.read_table(dir + '2014.txt',engine='python')
train_2015 = pd.read_table(dir + '2015.txt',engine='python')


def dataRead(fileName):
    print('read {name} context!'.format(name=fileName))
    dataList = []
    f = open(fileName, encoding='UTF-8')
    context = f.readline()
    while True:
        context = f.readline()
        if context == '':
            break
        context = context.strip('\n').split()
        context = [eval(i) for i in context]
        dataList.append(context)
    return dataList

#写txt
def writeTxt(dataList, fileName, strHead):
    f = open(fileName, 'a')
    length = len(strHead)
    for i in range(length-1):
        f.write(strHead[i])
        f.write('\t')
    f.write(strHead[length-1])
    f.write('\n')
    length = len(dataList[0])
    for ele in dataList:
        for i in range(length-1):
            f.write(str(ele[i])+'\t')
        f.write(str(ele[length-1]))
        f.write('\n')


def generateData(year, n_days, start_week):
    dataList = []
    day1=1; day2=1; day3=1; day4=1; day5=1; day6=1; day7=1; day8=1; day9=1; day10=1; day11=1; day12=1
    if year==2013:
        num_days=0
    elif year==2014:
        num_days = 365
    elif year==2015:
        num_days = 730
    elif year==2016:
        num_days = 1095
    elif year==2017:
        num_days = 1461
    if year%4!=0:    #非闰年
        for i in range(n_days-num_days):
            if i<=30:  #1月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 1, day1])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day1 = day1+1
            elif i>30 and i<=58:  #2月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 2, day2])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day2 = day2+1
            elif i>58 and i<=89:  #3月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 3, day3])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day3 = day3+1
            elif i>89 and i<=119:  #4月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 4, day4])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day4 = day4+1
            elif i>119 and i<=150:  #5月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 5, day5])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day5 = day5+1
            elif i>150 and i<=180:  #6月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 6, day6])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day6 = day6+1
            elif i>180 and i<=211:  #7月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 7, day7])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day7 = day7+1
            elif i>211 and i<=242:  #8月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 8, day8])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day8 = day8+1
            elif i>242 and i<=272:  #9月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 9, day9])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day9 = day9+1
            elif i>272 and i<=303:  #10月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 10, day10])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day10 = day10+1
            elif i>303 and i<=333:  #11月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 11, day11])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day11 = day11+1
            elif i>333 and i<=364:  #12月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 12, day12])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day12 = day12+1
    else:
        for i in range(n_days):
            if i<=30:  #1月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 1, day1])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day1 = day1+1
            elif i>30 and i<=59:  #2月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 2, day2])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day2 = day2+1
            elif i>59 and i<=90:  #3月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 3, day3])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day3 = day3+1
            elif i>90 and i<=120:  #4月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 4, day4])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day4 = day4+1
            elif i>120 and i<=151:  #5月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 5, day5])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day5 = day5+1
            elif i>151 and i<=181:  #6月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 6, day6])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day6 = day6+1
            elif i>181 and i<=212:  #7月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 7, day7])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day7 = day7+1
            elif i>212 and i<=243:  #8月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 8, day8])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day8 = day8+1
            elif i>243 and i<=273:  #9月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 9, day9])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day9 = day9+1
            elif i>273 and i<=304:  #10月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 10, day10])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day10 = day10+1
            elif i>304 and i<=334:  #11月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 11, day11])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day11 = day11+1
            elif i>334 and i<=365:  #12月
                for j in range(5):
                    dataList.append([i+1, start_week, j+1, 0, year, 12, day12])
                start_week = (start_week+1) % 7
                if start_week==0:
                    start_week=7
                day12 = day12+1
    return dataList

def generateDict(data):
    length = len(data)
    dataDict = {}
    for i in range(length):
        tmpData = data[i]
        strIndex = str(tmpData[4])+'-'+str(tmpData[5])+'-'+str(tmpData[6])+'-'+str(tmpData[2])
        dataDict[strIndex] = tmpData
    return dataDict

#参数1是短的字典(不完全)
def combineData(dictOne, dictTwo):
    for key in dictOne:
        if key in dictTwo:
            dictTwo[key][1::]=dictOne[key][1::]
    return dictTwo

def selectData(dict):
    dataList =[]
    for key, value in dict.items():
        dataList.append(value)
    return dataList


# 1. 读取源数据
train2013 = dataRead(dir+'2013.txt')
train2014 = dataRead(dir+'2014.txt')
train2015 = dataRead(dir+'2015.txt')
train2016 = dataRead(dir+'2016.txt')
# print(train2013)
# print(train2014)
# print(train2015)

# 2. 将源数据转换为字典,方便后续合并原始数据和完整数据
train2013 = generateDict(train2013)
train2014 = generateDict(train2014)
train2015 = generateDict(train2015)
train2016 = generateDict(train2016)
# print(train2013)

# 3. 给每年生成完整的数据
data2013 = generateData(2013,365,2)
data2014 = generateData(2014,730,3)
data2015 = generateData(2015,1095,4)
data2016 = generateData(2016,1461,5)
data2017 = generateData(2017,1826,7)
# print(data2013)
# print(data2014)
# print(data2015)
# print(data2016)
# print(data2017)

# 4. 将完整数据转换为字典,方便后续合并
data2013 = generateDict(data2013)
data2014 = generateDict(data2014)
data2015 = generateDict(data2015)
data2016 = generateDict(data2016)

# 5. 将原始数据和完整数据合并(这里操作的是字典)
data2013 = combineData(train2013, data2013)
data2014 = combineData(train2014, data2014)
data2015 = combineData(train2015, data2015)
data2016 = combineData(train2016, data2016)
# print(data2013)

# 6. 将每年的数据提取出来,作为模型训练数据
data2013 = selectData(data2013)
data2014 = selectData(data2014)
data2015 = selectData(data2015)
data2016 = selectData(data2016)

# 7. 将步骤6中得到的数据写入txt文本中
writeTxt(data2013, './data/data2013.txt', ['date', 'day_of_week', 'brand', 'cnt', 'year', 'month', 'day'])
writeTxt(data2014, './data/data2014.txt', ['date', 'day_of_week', 'brand', 'cnt', 'year', 'month', 'day'])
writeTxt(data2015, './data/data2015.txt', ['date', 'day_of_week', 'brand', 'cnt', 'year', 'month', 'day'])
writeTxt(data2016, './data/data2016.txt', ['date', 'day_of_week', 'brand', 'cnt', 'year', 'month', 'day'])
  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值