司机出车分层

司机时间切片(144*10分钟 = 24小时)

select
city_id,driver_id
,sum(case when gap= 0   then rate_online_time else 0    end) as m1
,sum(case when gap= 1   then rate_online_time else 0    end) as m2
,sum(case when gap= 2   then rate_online_time else 0    end) as m3
,sum(case when gap= 3   then rate_online_time else 0    end) as m4
,sum(case when gap= 4   then rate_online_time else 0    end) as m5
,sum(case when gap= 5   then rate_online_time else 0    end) as m6
,sum(case when gap= 6   then rate_online_time else 0    end) as m7
,sum(case when gap= 7   then rate_online_time else 0    end) as m8
,sum(case when gap= 8   then rate_online_time else 0    end) as m9
,sum(case when gap= 9   then rate_online_time else 0    end) as m10
,sum(case when gap= 10  then rate_online_time else 0    end) as m11
,sum(case when gap= 11  then rate_online_time else 0    end) as m12
,sum(case when gap= 12  then rate_online_time else 0    end) as m13
,sum(case when gap= 13  then rate_online_time else 0    end) as m14
,sum(case when gap= 14  then rate_online_time else 0    end) as m15
,sum(case when gap= 15  then rate_online_time else 0    end) as m16
,sum(case when gap= 16  then rate_online_time else 0    end) as m17
,sum(case when gap= 17  then rate_online_time else 0    end) as m18
,sum(case when gap= 18  then rate_online_time else 0    end) as m19
,sum(case when gap= 19  then rate_online_time else 0    end) as m20
,sum(case when gap= 20  then rate_online_time else 0    end) as m21
,sum(case when gap= 21  then rate_online_time else 0    end) as m22
,sum(case when gap= 22  then rate_online_time else 0    end) as m23
,sum(case when gap= 23  then rate_online_time else 0    end) as m24
,sum(case when gap= 24  then rate_online_time else 0    end) as m25
,sum(case when gap= 25  then rate_online_time else 0    end) as m26
,sum(case when gap= 26  then rate_online_time else 0    end) as m27
,sum(case when gap= 27  then rate_online_time else 0    end) as m28
,sum(case when gap= 28  then rate_online_time else 0    end) as m29
,sum(case when gap= 29  then rate_online_time else 0    end) as m30
,sum(case when gap= 30  then rate_online_time else 0    end) as m31
,sum(case when gap= 31  then rate_online_time else 0    end) as m32
,sum(case when gap= 32  then rate_online_time else 0    end) as m33
,sum(case when gap= 33  then rate_online_time else 0    end) as m34
,sum(case when gap= 34  then rate_online_time else 0    end) as m35
,sum(case when gap= 35  then rate_online_time else 0    end) as m36
,sum(case when gap= 36  then rate_online_time else 0    end) as m37
,sum(case when gap= 37  then rate_online_time else 0    end) as m38
,sum(case when gap= 38  then rate_online_time else 0    end) as m39
,sum(case when gap= 39  then rate_online_time else 0    end) as m40
,sum(case when gap= 40  then rate_online_time else 0    end) as m41
,sum(case when gap= 41  then rate_online_time else 0    end) as m42
,sum(case when gap= 42  then rate_online_time else 0    end) as m43
,sum(case when gap= 43  then rate_online_time else 0    end) as m44
,sum(case when gap= 44  then rate_online_time else 0    end) as m45
,sum(case when gap= 45  then rate_online_time else 0    end) as m46
,sum(case when gap= 46  then rate_online_time else 0    end) as m47
,sum(case when gap= 47  then rate_online_time else 0    end) as m48
,sum(case when gap= 48  then rate_online_time else 0    end) as m49
,sum(case when gap= 49  then rate_online_time else 0    end) as m50
,sum(case when gap= 50  then rate_online_time else 0    end) as m51
,sum(case when gap= 51  then rate_online_time else 0    end) as m52
,sum(case when gap= 52  then rate_online_time else 0    end) as m53
,sum(case when gap= 53  then rate_online_time else 0    end) as m54
,sum(case when gap= 54  then rate_online_time else 0    end) as m55
,sum(case when gap= 55  then rate_online_time else 0    end) as m56
,sum(case when gap= 56  then rate_online_time else 0    end) as m57
,sum(case when gap= 57  then rate_online_time else 0    end) as m58
,sum(case when gap= 58  then rate_online_time else 0    end) as m59
,sum(case when gap= 59  then rate_online_time else 0    end) as m60
,sum(case when gap= 60  then rate_online_time else 0    end) as m61
,sum(case when gap= 61  then rate_online_time else 0    end) as m62
,sum(case when gap= 62  then rate_online_time else 0    end) as m63
,sum(case when gap= 63  then rate_online_time else 0    end) as m64
,sum(case when gap= 64  then rate_online_time else 0    end) as m65
,sum(case when gap= 65  then rate_online_time else 0    end) as m66
,sum(case when gap= 66  then rate_online_time else 0    end) as m67
,sum(case when gap= 67  then rate_online_time else 0    end) as m68
,sum(case when gap= 68  then rate_online_time else 0    end) as m69
,sum(case when gap= 69  then rate_online_time else 0    end) as m70
,sum(case when gap= 70  then rate_online_time else 0    end) as m71
,sum(case when gap= 71  then rate_online_time else 0    end) as m72
,sum(case when gap= 72  then rate_online_time else 0    end) as m73
,sum(case when gap= 73  then rate_online_time else 0    end) as m74
,sum(case when gap= 74  then rate_online_time else 0    end) as m75
,sum(case when gap= 75  then rate_online_time else 0    end) as m76
,sum(case when gap= 76  then rate_online_time else 0    end) as m77
,sum(case when gap= 77  then rate_online_time else 0    end) as m78
,sum(case when gap= 78  then rate_online_time else 0    end) as m79
,sum(case when gap= 79  then rate_online_time else 0    end) as m80
,sum(case when gap= 80  then rate_online_time else 0    end) as m81
,sum(case when gap= 81  then rate_online_time else 0    end) as m82
,sum(case when gap= 82  then rate_online_time else 0    end) as m83
,sum(case when gap= 83  then rate_online_time else 0    end) as m84
,sum(case when gap= 84  then rate_online_time else 0    end) as m85
,sum(case when gap= 85  then rate_online_time else 0    end) as m86
,sum(case when gap= 86  then rate_online_time else 0    end) as m87
,sum(case when gap= 87  then rate_online_time else 0    end) as m88
,sum(case when gap= 88  then rate_online_time else 0    end) as m89
,sum(case when gap= 89  then rate_online_time else 0    end) as m90
,sum(case when gap= 90  then rate_online_time else 0    end) as m91
,sum(case when gap= 91  then rate_online_time else 0    end) as m92
,sum(case when gap= 92  then rate_online_time else 0    end) as m93
,sum(case when gap= 93  then rate_online_time else 0    end) as m94
,sum(case when gap= 94  then rate_online_time else 0    end) as m95
,sum(case when gap= 95  then rate_online_time else 0    end) as m96
,sum(case when gap= 96  then rate_online_time else 0    end) as m97
,sum(case when gap= 97  then rate_online_time else 0    end) as m98
,sum(case when gap= 98  then rate_online_time else 0    end) as m99
,sum(case when gap= 99  then rate_online_time else 0    end) as m100
,sum(case when gap= 100 then rate_online_time else 0    end) as m101
,sum(case when gap= 101 then rate_online_time else 0    end) as m102
,sum(case when gap= 102 then rate_online_time else 0    end) as m103
,sum(case when gap= 103 then rate_online_time else 0    end) as m104
,sum(case when gap= 104 then rate_online_time else 0    end) as m105
,sum(case when gap= 105 then rate_online_time else 0    end) as m106
,sum(case when gap= 106 then rate_online_time else 0    end) as m107
,sum(case when gap= 107 then rate_online_time else 0    end) as m108
,sum(case when gap= 108 then rate_online_time else 0    end) as m109
,sum(case when gap= 109 then rate_online_time else 0    end) as m110
,sum(case when gap= 110 then rate_online_time else 0    end) as m111
,sum(case when gap= 111 then rate_online_time else 0    end) as m112
,sum(case when gap= 112 then rate_online_time else 0    end) as m113
,sum(case when gap= 113 then rate_online_time else 0    end) as m114
,sum(case when gap= 114 then rate_online_time else 0    end) as m115
,sum(case when gap= 115 then rate_online_time else 0    end) as m116
,sum(case when gap= 116 then rate_online_time else 0    end) as m117
,sum(case when gap= 117 then rate_online_time else 0    end) as m118
,sum(case when gap= 118 then rate_online_time else 0    end) as m119
,sum(case when gap= 119 then rate_online_time else 0    end) as m120
,sum(case when gap= 120 then rate_online_time else 0    end) as m121
,sum(case when gap= 121 then rate_online_time else 0    end) as m122
,sum(case when gap= 122 then rate_online_time else 0    end) as m123
,sum(case when gap= 123 then rate_online_time else 0    end) as m124
,sum(case when gap= 124 then rate_online_time else 0    end) as m125
,sum(case when gap= 125 then rate_online_time else 0    end) as m126
,sum(case when gap= 126 then rate_online_time else 0    end) as m127
,sum(case when gap= 127 then rate_online_time else 0    end) as m128
,sum(case when gap= 128 then rate_online_time else 0    end) as m129
,sum(case when gap= 129 then rate_online_time else 0    end) as m130
,sum(case when gap= 130 then rate_online_time else 0    end) as m131
,sum(case when gap= 131 then rate_online_time else 0    end) as m132
,sum(case when gap= 132 then rate_online_time else 0    end) as m133
,sum(case when gap= 133 then rate_online_time else 0    end) as m134
,sum(case when gap= 134 then rate_online_time else 0    end) as m135
,sum(case when gap= 135 then rate_online_time else 0    end) as m136
,sum(case when gap= 136 then rate_online_time else 0    end) as m137
,sum(case when gap= 137 then rate_online_time else 0    end) as m138
,sum(case when gap= 138 then rate_online_time else 0    end) as m139
,sum(case when gap= 139 then rate_online_time else 0    end) as m140
,sum(case when gap= 140 then rate_online_time else 0    end) as m141
,sum(case when gap= 141 then rate_online_time else 0    end) as m142
,sum(case when gap= 142 then rate_online_time else 0    end) as m143
,sum(case when gap= 143 then rate_online_time else 0    end) as m144
from
(select  city_id
        ,driver_id
        ,hour
        ,gap
        ,round(avg(online_min),4) as rate_online_time
from 
     (
     select   city_id
              ,current_stat_date
              ,driver_id
              ,hour
              ,floor((hour(minute)*60+minute(minute))/10) as gap
              ,count(distinct time)
              ,round(count(distinct time)/10,4) as online_min
     from 
     (
     select  
        country_code
        ,city_id
        ,weekofyear(concat_ws('-', substr(pt, 1, 4), substr(pt, 5, 2), substr(pt, 7, 2))) as week_num
        ,date_format(to_date(stat_start_hour), 'u') week_day
        ,pt
        ,case   when car_level in (1300, 20001, 2400) then 10001
                    when car_level in (1800, 21018) then 10002
                    when car_level in (21096, 21114) then 10003
             end as product_ds_id
        ,driver_id
        ,to_date(from_unixtime(unix_timestamp(time) + (2 * 3600))) as current_stat_date
        ,hour(from_unixtime(unix_timestamp(time) + (2 * 3600)))    as hour
        ,from_unixtime(unix_timestamp(time) + (2 * 3600))  as minute
        ,from_unixtime(unix_timestamp(time) + (2 * 3600))  as time
        from    drv_table
    where   pt between regexp_replace('${start_date}', '-', '') and regexp_replace('${end_date}', '-', '')
      and   country_code in ('BR', 'MX')
     )a group by city_id,driver_id,current_stat_date,hour,floor((hour(minute)*60+minute(minute))/10)
)b group by city_id ,driver_id ,hour ,gap
) c
group by city_id,driver_id;
import numpy as np
import pandas as pd
import time
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
import matplotlib.cm as cm
import matplotlib.pyplot as plt

n_clu=4

data_raw_1 = pd.read_csv("/Users/didi/Desktop/澳洲挑单与忠诚司机收入指标分析/driver_data.csv", sep=',')
data_raw_1 = data_raw_1.fillna(0)

data_raw = data_raw_1[data_raw_1.city_id==61020600]
data_raw.tail()

city_id_list = data_raw['city_id'].unique()
for city_id in city_id_list:
    data_all = data_raw[data_raw.city_id==city_id]
    #data_all = data_raw
    x = data_all.ix[:, 2:].values
    u,s,v = np.linalg.svd(x, full_matrices=False)
    v0 = v[0, :]
    v1 = v[1, :]
    v2 = v[2, :]
    v3 = v[3, :]

    corr0 = []
    corr1 = []
    corr2 = []
    corr3 = []

    for i in range(len(x)):
        corr0.append((np.corrcoef(x[i],v0))[0][1])
        corr1.append((np.corrcoef(x[i],v1))[0][1])
        corr2.append((np.corrcoef(x[i],v2))[0][1])
        corr3.append((np.corrcoef(x[i],v3))[0][1])

    t = pd.concat([data_all['driver_id'],pd.Series(corr0,index=data_all.index),pd.Series(corr1,index=data_all.index),pd.Series(corr2,index=data_all.index),pd.Series(corr3,index=data_all.index)],axis=1)
    t.columns=['driver_id','v0','v1','v2','v3']

    x_train = t.ix[:, 1:].values
    where_are_inf = np.isinf(x_train)
    x_train[where_are_inf] = 0
    kmeans = KMeans(n_clusters=n_clu,random_state=100).fit(x_train)
    kmeans_group_out =kmeans.labels_

    data_out = pd.concat([data_all[['city_id', 'driver_id']], pd.Series(kmeans_group_out,index=data_all.index)], axis=1)
    data_out.columns = ['city_id', 'driver_id','group_id']
    data_out = data_out.sort_values(by='group_id', ascending=True)
    data_out.to_csv("/Users/didi/Desktop/澳洲挑单与忠诚司机收入指标分析/{0}_result.csv".format(str(city_id)), sep='\t', header=False, index=0)

    print(len(data_all))
    print(len(data_out))
    data_all.drop(['city_id'], axis=1, inplace=True)
    data_out.drop(['city_id'], axis=1, inplace=True)
    merge_df = pd.merge(data_out, data_all, on=['driver_id'], how='inner')
    merge_df.to_csv("/Users/didi/Desktop/澳洲挑单与忠诚司机收入指标分析/{0}_merge.csv".format(str(city_id)), index=0)

    pic_data = merge_df.ix[1:, 2:].values
    plt.figure(figsize=(12, 10))
    plt.imshow(pic_data, aspect='auto', cmap=cm.viridis)
    plt.xlim(0,143)
    plt.colorbar()
    plt.title('{0}_{1}'.format(str(city_id), n_clu))
    plt.savefig('/Users/didi/Desktop/澳洲挑单与忠诚司机收入指标分析/{0}_{1}_pic.png'.format(str(city_id), n_clu))
    
    print("{0} complete".format(str(city_id)))

data = pd.read_csv("/Users/didi/Desktop/澳洲挑单与忠诚司机收入指标分析/data.csv", sep=',')
data = data.fillna(0)

data

driver_1_high=data.loc[(data.lable==0)&(data.grab_rate>0.7)]
driver_1_low=data.loc[(data.lable==0)&(data.grab_rate<=0.7)]

from scipy import stats

data.tail()

stats.levene(driver_1_high.gmv,driver_1_low.gmv)

import matplotlib.pyplot as plt
plt.hist(driver_1_low.tph)

stats.normaltest(driver_1_high.gmv)

stats.mannwhitneyu(driver_1_high.rides_rate,driver_1_low.rides_rate)

driver_0=data.loc[(data.lable==0)]

stats.normaltest(driver_0)

stats.normaltest(data)

stats.mannwhitneyu(driver_1_high.iph,driver_1_low.iph)

stats.levene(driver_1_high.iph,driver_1_low.iph)

stats.ttest_ind(driver_1_high.gmv,driver_1_low.gmv,equal_var=False)

stats.ttest_ind(driver_1_high.iph,driver_1_low.iph)

stats.levene(driver_2_high.iph,driver_2_low.iph)

stats.ttest_ind(driver_2_high.iph,driver_2_low.iph)

stats.levene(driver_1_high.gmv,driver_1_low.gmv)

stats.ttest_ind(driver_1_high.gmv,driver_1_low.gmv,equal_var=False)

stats.ttest_ind(driver_1_high.iph,driver_1_low.iph,equal_var=False)

import numpy as np
import pandas as pd
import time
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
import matplotlib.cm as cm
import matplotlib.pyplot as plt
data = pd.read_csv("/Users/didi/Desktop/data.csv", sep=',')

data

plt.boxplot(x=data.values,labels=data.columns,whis=1.5)
plt.show()

data.boxplot()
plt.show()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值