数据清洗,数据格式
import pandas as pd
user={}
with open("/home/kesci/input/com6636/user.txt",'r') as f:
for line in f.readlines():
num=line.split(' ')
l=[]
for i in range(1,len(num)):
l.append(int(num[i].replace("\n",'')))
user[int(num[0])]=l
#print(user)
#for key in user.keys():
#print(key)
#break
l={}
#for key in user.keys():
#if key<=:
# print(user[key])
x=[x for x in range(1,31)]
#user=data
user=[float(x) for x in user[105]]
dict_1={"date":x,"user":user}
data=pd.DataFrame(dict_1)
data
date user
0 1 0.0
1 2 0.0
2 3 0.0
3 4 0.0
4 5 0.0
5 6 0.0
6 7 0.0
7 8 0.0
8 9 0.0
9 10 0.0
10 11 0.0
11 12 2.0
12 13 0.0
13 14 2.0
14 15 2.0
15 16 1.0
16 17 2.0
17 18 2.0
18 19 2.0
19 20 2.0
20 21 2.0
21 22 2.0
22 23 2.0
23 24 0.0
24 25 2.0
25 26 0.0
26 27 1.0
27 28 2.0
28 29 0.0
29 30 2.0
# 显示cell运行时长
import pandas as pd
user={}
with open("/home/kesci/input/com6636/user.txt",'r') as f:
for line in f.readlines():
num=line.split(' ')
l=[]
for i in range(1,len(num)):
l.append(int(num[i].replace("\n",'')))
user[int(num[0])]=l
#print(user)
#for key in user.keys():
#print(key)
#break
#import pandas as pd
# print(i)
import warnings
warnings.filterwarnings("ignore")
#for key in dict(list(data_reg)):
from statsmodels.tsa.arima_model import ARIMA
import pandas as pd
def model_build(data,i,j): #建立模型
x=[x for x in range(1,31)]
user=data
user=[float(x) for x in user]
dict_1={"date":x,"user":user}
data=pd.DataFrame(dict_1)
#data=data["user"].apply(lambda x:float(x))
dg_1=data[(data['date']!=0)].groupby(['date'])['user'].sum()
model = ARIMA(dg_1.values, order=(i,1,j))
return model
def pro(l): #预测情况
l=[-x for x in l]
for i in range(7):
if l[i]<0:
l[i]=0
return l
def acc(x,key_3): #测试精度
li=[]
k=0
l=0
for i in range(29):
if x[i]>0.09:
li.append(1.0)
k=k+1
else:
li.append(0.0)
if user[key_3][i]!=0:
l=l+1
#print(li)
m=k/l
return m
from statsmodels.tsa.arima_model import ARIMA
import matplotlib.pyplot as plt
#model=model = ARIMA(dg_1.values, order=(1, 1, 2))
f_name=[]
loss_u=[]
#global loss_key=0
def find_c(u,key_2): #寻找ARIMA模型合适的q p 值
pram_l=[0,0]
#pram_l[0]=1
#pram_l[1]=4
ac_1=0.0
flag_1=0
for x in range(1,10):
for y in range(1,10):
try:
model=model_build(u,x,y)
results_AR = model.fit(disp=-1)
f_pred_1=results_AR.predict(30,36)
#print(f_pred_1)
pram_l.append(x)
pram_l.append(y)
break
results_AR = model.fit(disp=-1)
pred=[ -x for x in results_AR.fittedvalues]
#pred=[ -x for x in results_AR.predict(0,37)]
for i in range(29):
if pred[i]<0:
pred[i]=0
if ac_1<acc(pred,key_2):
ac_1=acc(pred,key_2)
pram_l.append(x)
pram_l.append(y)
except:
if x==9 and y==9:
#loss_key=loss_key+1
#loss_u.append(key)
flag_1=key
#print(loss_key)
model=model_build(u,0,0)
results_AR = model.fit(disp=-1)
f_pred_1=results_AR.predict(30,36)
#f_pred_1=
else:
print("wait for .....")
return (f_pred_1,flag_1)
try:
k=0
bb=0
for key in range(0,5000):
#k=k+1
if key in user.keys(): #遍历所有用户,对每个用户进行预测
#bb=bb+1
print("speed====="+str(key))
#l=find_c(user[key],key)
#print(l[-2])
#print(l[-1])
try:
model=model_build(user[key],1,2)
results_AR = model.fit(disp=-1)
#fig, ax = plt.subplots(figsize=(15,6))
#ax.plot(dg_1.values)
#pred=[ -x for x in results_AR.fittedvalues]
#pred=[ -x for x in results_AR.predict(0,37)]
#for i in range(29):
# if pred[i]<0:
# pred[i]=0
#print(results_AR.fittedvalues)
#print(pred)
#print(pred[0])
#fig, ax = plt.subplots(figsize=(15,6))
#ax.plot(pred)
#print(results_AR.predict(30,36))
f_pred=results_AR.predict(30,36)
for name in pro(f_pred):
if name>=0.09:
f_name.append(str(key))
break
except:
l,x=find_c(user[key],key)
if x!=0:
loss_u.append(x)
print(len(loss_u))
#print(l[-2])
#print(l[-1])
#model=model_build(user[key],l[-2],l[-1])
#results_AR = model.fit(disp=-1)
#fig, ax = plt.subplots(figsize=(15,6))
#ax.plot(dg_1.values)
#pred=[ -x for x in results_AR.fittedvalues]
#pred=[ -x for x in results_AR.predict(0,37)]
#for i in range(29):
# if pred[i]<0:
# pred[i]=0
#print(results_AR.fittedvalues)
#print(pred)
#print(pred[0])
#fig, ax = plt.subplots(figsize=(15,6))
#ax.plot(pred)
#print(results_AR.predict(30,36))
#f_pred=results_AR.predict(30,36)
for name in pro(l):
if name>=0.09:
f_name.append(str(key))
break
finally:
with open("./f_name.txt","a+") as f:
for n in f_name:
f.write(str(n)+"\n")
with open("./loss_u.txt","a+") as f_u:
for u in loss_u:
f.write(str(u)+"\n")
#print(data_g(data_ac,14808,ac))
speed=====16
speed=====30
speed=====98
speed=====105
speed=====176
wait for .....
wait for .....
wait for .....
wait for .....
wait for .....
speed=====211
speed=====218
speed=====225