商品销售额预测

基于历史各项数据预计未来三个月销售额情况

在这里插入图片描述

import os.path
import pandas as pd
import numpy as np
import xgboost as xgb 

base_path = "./data/"

数据读取

# 数据读取

navigation = pd.read_csv(os.path.join(base_path, 'navigation.csv'))
sales = pd.read_csv(os.path.join(base_path, 'sales.csv'))
train = pd.read_csv(os.path.join(base_path, 'train.csv'))
test = pd.read_csv(os.path.join(base_path, 'test.csv'))
vimages = pd.read_csv(os.path.join(base_path, 'vimages.csv'))
test.head()
IDmonthsku_hashproduct_typeproduct_gendermacro_functionfunctionsub_functionmodelaesthetic_sub_linemacro_materialcolorfr_FR_priceen_US_description
0ed4c7471eac7e8c6e6718364c2b6e75462eeb47c_11ed4c7471eac7e8c6e6718364c2b6e75462eeb47cAccessoriesWomenTEXTILESSCARVESWINTER SCARVESECHARPE REYKJAVIKSANS LIGNE ESTHETIQUETEXTILENaN650.0In pure, soft cashmere, the Reykjavik scarf is...
126b2c4f6281482cccf1e748ef388f1649ecf1c8b_1126b2c4f6281482cccf1e748ef388f1649ecf1c8bLeather GoodsWomenCITY BAGSDAILY BAGSHANDBAGSVERY ZIPPED TOTEVERYLEATHERKAKI FANGO COQUILLE2550.0For Fall-Winter 2018, the Very Zipped Tote han...
200aad36ebc04aaa761df219c1b48d1e6162d28bb_1100aad36ebc04aaa761df219c1b48d1e6162d28bbLeather GoodsWomenCITY BAGSDAILY BAGSCROSS SHOULDER BAGSHINAMAHINALEATHERNOIR2460.0The Hina is two bags in one: gather in the sid...
38281636e7d162c92c1a3c1f9e40afb05d39a02f9_118281636e7d162c92c1a3c1f9e40afb05d39a02f9AccessoriesMenTEXTILESSTOLESWINTER STOLESETOLE MONOGRAM NIGHTFALLSANS LIGNE ESTHETIQUETEXTILENaN455.0With its subtle tone-on-tone Monogram pattern ...
4b76f8505700409a198fe69fb33e5d6cc01166c9b_11b76f8505700409a198fe69fb33e5d6cc01166c9bAccessoriesMenTIESTIESOTHER TIESCRAVATE MONOGRAM ILLUSIONSANS LIGNE ESTHETIQUETEXTILENaN150.0Suitable both for business and special occasio...
vimages.head()
sku_hashdim_1dim_2dim_3dim_4dim_5dim_6dim_7dim_8dim_9...dim_2039dim_2040dim_2041dim_2042dim_2043dim_2044dim_2045dim_2046dim_2047dim_2048
03f6eba29c2c71cc804edb9a4a88760a273dd43480.0000000.1652510.0170610.00.0973320.0000000.0087930.0645290.067048...0.0750030.0000000.3192210.7568350.2862870.0000000.0000000.0000001.0516080.677220
1d7681062d1826c06aeca716e0aff6d5c096ac6d80.1268990.0000000.0416900.00.6005560.5566080.6084400.8960610.011975...0.0000000.0101330.3252230.0066430.1692980.3172760.3082930.0868371.1221890.074612
2c594b658ce0f17ea594a50426dcf69468b83bdef0.2188910.4448780.0236550.00.9333650.0000000.4131720.0205600.010038...0.0752540.0000000.0370030.4040530.3018760.0499350.0316180.1386460.6973060.075182
35b7dbbfcc0eba1037e6ab3c9e5cf3ea819deba650.0029350.0000000.5047650.00.3913930.1591872.8946310.7086970.485973...0.7030620.0000000.0025090.0000000.4419450.8509160.0439800.0000000.1129080.618085
43d3487afe62434c742af37cf935a726f5bb162510.0000000.0302890.0843320.00.4161950.0000000.4879410.1544080.052589...0.0542070.0000000.0000000.0000000.1184800.0000000.0000000.0736850.0000000.114883

5 rows × 2049 columns

train.head()
IDmonthsku_hashproduct_typeproduct_gendermacro_functionfunctionsub_functionmodelaesthetic_sub_linemacro_materialcolorfr_FR_priceen_US_descriptiontarget
03d8a4ae769b526187c36901f204691a663333fa4_113d8a4ae769b526187c36901f204691a663333fa4Leather GoodsWomenLG ACCESSORIESSMALL LEATHER GOODSCOMPACT WALLETSPF.VICTORINEDAMIER AZURCANVASAZUR370.0Fashioned from summery Damier Azur canvas, thi...1366.0
1c05a54f7067be054ec4b27d0d6081353ef7d9df6_11c05a54f7067be054ec4b27d0d6081353ef7d9df6AccessoriesWomenFANCY ACCESSORIESBAG CHARMSBAG CHARMSBAG CHARM PETITE MALLESANS LIGNE ESTHETIQUECANVASNaN295.0This collectible piece features our iconic Pet...526.0
27cb4d3626bd48a9b523d8693266219c34aeccde8_117cb4d3626bd48a9b523d8693266219c34aeccde8Leather GoodsWomenCITY BAGSDAILY BAGSSHOULDER BAGSGIROLATAMAHINALEATHERGALET2660.0The perfect summer companion, this effortlessl...503.0
38e5967f90ff9fb93aa2840fceecc537b2e8e6ff3_118e5967f90ff9fb93aa2840fceecc537b2e8e6ff3Leather GoodsWomenCITY BAGSDAILY BAGSSHOULDER BAGSGIROLATAMAHINALEATHERMAGNOLIA2660.0The perfect summer companion, this effortlessl...276.0
46913a128945e0efeafc52101dcdeaa610eaa4430_116913a128945e0efeafc52101dcdeaa610eaa4430AccessoriesMenFASHION JEWELRYNECKLACESNECKLACESCOLL.CHARMS FOR GENTLEMENSANS LIGNE ESTHETIQUENON PRECIOUS METALNaN325.0This distinctive charm necklace reinterprets d...206.0
navigation.head()
Datesku_hashday_visitmonth_visitwebsite_version_zone_numberwebsite_version_country_numbertraffic_sourcepage_viewsaddtocart
0Day 6b6cb56a971c9ffeeaf1833202ab453b51ab680a9Wednesday95.032.0Source 11050
1Day 49c5060f9fb648cdfafd66f170af517f4de0a9162Monday95.032.0Source 13140
2Day 4da1549af83c813e5b53a4e4c0d588fa0099e3d3dMonday105.032.0Source 1570
3Day 6477ac04baf2a484f441ffe688abc89bb46a1c21dWednesday105.032.0Source 15470
4Day 4799bd8691fe0be0137173eadd5e79d033a389bd8Monday105.032.0Source 2430
sales.head()
Dateday_transaction_dateMonth_transactiontypezone_numbercountry_numbernamesku_hashsales_quantitycurrency_rate_USD...NetSentiment_6_day_beforePositiveSentiment_6_day_beforeNegativeSentiment_6_day_beforeImpressions_6_day_beforeTotalBuzzPost_7_day_beforeTotalBuzz_7_day_beforeNetSentiment_7_day_beforePositiveSentiment_7_day_beforeNegativeSentiment_7_day_beforeImpressions_7_day_before
0Day_7Thursday4Type_2443Name_184d4e90b67c2b9902cff522be4eb121f637c17b551.225636...77.52559933634.04258.0643226023.068960.0107970.070.47365632032.05548.0631477319.0
1Day_6Wednesday4Type_1420Name_184d4e90b67c2b9902cff522be4eb121f637c17b551.228951...70.47365632032.05548.0631477319.065312.0102920.071.78870334277.05629.0730132649.0
2Day_1Monday1Type_2265Name_170f3dd559074ecfc9d69ff64480bc8acae977e3a51.201496...46.15853227594.010165.0359205211.045746.064881.073.08362721349.03320.0268925766.0
3Day_7Sunday1Type_1349Name_170f3dd559074ecfc9d69ff64480bc8acae977e3a51.204091...72.43861116292.02604.0307167067.042796.061653.068.74444220872.03866.0323820589.0
4Day_5Tuesday2Type_1420Name_11f8b7a2b42d4cdf5eef70f75fb86a34d514d274751.238391...72.77659328344.04466.0440849942.060653.090848.075.33875326527.03731.0393463263.0

5 rows × 97 columns

获取不同颜色产品的平均指标

product_descriptor = ['product_type', 'product_gender', 'macro_function', 
                      'function', 'sub_function', 'model', 'aesthetic_sub_line', 'macro_material',
                      'month']
product_target_sum = train.groupby(product_descriptor)['target'].sum().reset_index(name = 'sum_target')
product_target_count = train.groupby(product_descriptor)['target'].count().reset_index(name = 'count_target')
product_target_stats = pd.merge(product_target_sum,product_target_count,on=product_descriptor)

train = train.merge(product_target_stats,on=product_descriptor,how='left')
test = test.merge(product_target_stats,on=product_descriptor,how='left')

train['mean_target'] = (train['sum_target'] - train['target'])/(train['count_target']-1)
test['mean_target'] = test['sum_target']/test['count_target']

train.drop(['count_target','sum_target'],axis=1) #这块需要指定inplace 没指定相当于打印不改变DF
test.drop(['count_target','sum_target'],axis=1).head()
IDmonthsku_hashproduct_typeproduct_gendermacro_functionfunctionsub_functionmodelaesthetic_sub_linemacro_materialcolorfr_FR_priceen_US_descriptionmean_target
0ed4c7471eac7e8c6e6718364c2b6e75462eeb47c_11ed4c7471eac7e8c6e6718364c2b6e75462eeb47cAccessoriesWomenTEXTILESSCARVESWINTER SCARVESECHARPE REYKJAVIKSANS LIGNE ESTHETIQUETEXTILENaN650.0In pure, soft cashmere, the Reykjavik scarf is...782.0
126b2c4f6281482cccf1e748ef388f1649ecf1c8b_1126b2c4f6281482cccf1e748ef388f1649ecf1c8bLeather GoodsWomenCITY BAGSDAILY BAGSHANDBAGSVERY ZIPPED TOTEVERYLEATHERKAKI FANGO COQUILLE2550.0For Fall-Winter 2018, the Very Zipped Tote han...179.0
200aad36ebc04aaa761df219c1b48d1e6162d28bb_1100aad36ebc04aaa761df219c1b48d1e6162d28bbLeather GoodsWomenCITY BAGSDAILY BAGSCROSS SHOULDER BAGSHINAMAHINALEATHERNOIR2460.0The Hina is two bags in one: gather in the sid...NaN
38281636e7d162c92c1a3c1f9e40afb05d39a02f9_118281636e7d162c92c1a3c1f9e40afb05d39a02f9AccessoriesMenTEXTILESSTOLESWINTER STOLESETOLE MONOGRAM NIGHTFALLSANS LIGNE ESTHETIQUETEXTILENaN455.0With its subtle tone-on-tone Monogram pattern ...NaN
4b76f8505700409a198fe69fb33e5d6cc01166c9b_11b76f8505700409a198fe69fb33e5d6cc01166c9bAccessoriesMenTIESTIESOTHER TIESCRAVATE MONOGRAM ILLUSIONSANS LIGNE ESTHETIQUETEXTILENaN150.0Suitable both for business and special occasio...NaN
train.head()
IDmonthsku_hashproduct_typeproduct_gendermacro_functionfunctionsub_functionmodelaesthetic_sub_linemacro_materialcolorfr_FR_priceen_US_descriptiontargetsum_targetcount_targetmean_target
03d8a4ae769b526187c36901f204691a663333fa4_113d8a4ae769b526187c36901f204691a663333fa4Leather GoodsWomenLG ACCESSORIESSMALL LEATHER GOODSCOMPACT WALLETSPF.VICTORINEDAMIER AZURCANVASAZUR370.0Fashioned from summery Damier Azur canvas, thi...1366.01837.02471.000000
1c05a54f7067be054ec4b27d0d6081353ef7d9df6_11c05a54f7067be054ec4b27d0d6081353ef7d9df6AccessoriesWomenFANCY ACCESSORIESBAG CHARMSBAG CHARMSBAG CHARM PETITE MALLESANS LIGNE ESTHETIQUECANVASNaN295.0This collectible piece features our iconic Pet...526.0526.01NaN
27cb4d3626bd48a9b523d8693266219c34aeccde8_117cb4d3626bd48a9b523d8693266219c34aeccde8Leather GoodsWomenCITY BAGSDAILY BAGSSHOULDER BAGSGIROLATAMAHINALEATHERGALET2660.0The perfect summer companion, this effortlessl...503.01395.04297.333333
38e5967f90ff9fb93aa2840fceecc537b2e8e6ff3_118e5967f90ff9fb93aa2840fceecc537b2e8e6ff3Leather GoodsWomenCITY BAGSDAILY BAGSSHOULDER BAGSGIROLATAMAHINALEATHERMAGNOLIA2660.0The perfect summer companion, this effortlessl...276.01395.04373.000000
46913a128945e0efeafc52101dcdeaa610eaa4430_116913a128945e0efeafc52101dcdeaa610eaa4430AccessoriesMenFASHION JEWELRYNECKLACESNECKLACESCOLL.CHARMS FOR GENTLEMENSANS LIGNE ESTHETIQUENON PRECIOUS METALNaN325.0This distinctive charm necklace reinterprets d...206.0206.01NaN
train.drop(['count_target','sum_target'],axis=1,inplace =True) 
test.drop(['count_target','sum_target'],axis=1,inplace =True)
train.head()
IDmonthsku_hashproduct_typeproduct_gendermacro_functionfunctionsub_functionmodelaesthetic_sub_linemacro_materialcolorfr_FR_priceen_US_descriptiontargetmean_target
03d8a4ae769b526187c36901f204691a663333fa4_113d8a4ae769b526187c36901f204691a663333fa4Leather GoodsWomenLG ACCESSORIESSMALL LEATHER GOODSCOMPACT WALLETSPF.VICTORINEDAMIER AZURCANVASAZUR370.0Fashioned from summery Damier Azur canvas, thi...1366.0471.000000
1c05a54f7067be054ec4b27d0d6081353ef7d9df6_11c05a54f7067be054ec4b27d0d6081353ef7d9df6AccessoriesWomenFANCY ACCESSORIESBAG CHARMSBAG CHARMSBAG CHARM PETITE MALLESANS LIGNE ESTHETIQUECANVASNaN295.0This collectible piece features our iconic Pet...526.0NaN
27cb4d3626bd48a9b523d8693266219c34aeccde8_117cb4d3626bd48a9b523d8693266219c34aeccde8Leather GoodsWomenCITY BAGSDAILY BAGSSHOULDER BAGSGIROLATAMAHINALEATHERGALET2660.0The perfect summer companion, this effortlessl...503.0297.333333
38e5967f90ff9fb93aa2840fceecc537b2e8e6ff3_118e5967f90ff9fb93aa2840fceecc537b2e8e6ff3Leather GoodsWomenCITY BAGSDAILY BAGSSHOULDER BAGSGIROLATAMAHINALEATHERMAGNOLIA2660.0The perfect summer companion, this effortlessl...276.0373.000000
46913a128945e0efeafc52101dcdeaa610eaa4430_116913a128945e0efeafc52101dcdeaa610eaa4430AccessoriesMenFASHION JEWELRYNECKLACESNECKLACESCOLL.CHARMS FOR GENTLEMENSANS LIGNE ESTHETIQUENON PRECIOUS METALNaN325.0This distinctive charm necklace reinterprets d...206.0NaN

统计同款的数量

count_vec_cols = ['macro_function', 'function', 'sub_function', 'model',
                  'aesthetic_sub_line', 'macro_material', 'color']

for col in count_vec_cols:
    tmp = pd.DataFrame({'sku_hash':pd.concat([train['sku_hash'],test['sku_hash']]),col:pd.concat([train[col],test[col]])})
    tmp = pd.DataFrame(tmp.groupby(col)['sku_hash'].count()).reset_index()
    tmp.columns = [col,col+'_count']
    
    train = train.merge(tmp,on=col,how='left')
    test = test.merge(tmp,on=col,how='left')
train.head()
IDmonthsku_hashproduct_typeproduct_gendermacro_functionfunctionsub_functionmodelaesthetic_sub_line...en_US_descriptiontargetmean_targetmacro_function_countfunction_countsub_function_countmodel_countaesthetic_sub_line_countmacro_material_countcolor_count
03d8a4ae769b526187c36901f204691a663333fa4_113d8a4ae769b526187c36901f204691a663333fa4Leather GoodsWomenLG ACCESSORIESSMALL LEATHER GOODSCOMPACT WALLETSPF.VICTORINEDAMIER AZUR...Fashioned from summery Damier Azur canvas, thi...1366.0471.00000022922094447901263213120.0
1c05a54f7067be054ec4b27d0d6081353ef7d9df6_11c05a54f7067be054ec4b27d0d6081353ef7d9df6AccessoriesWomenFANCY ACCESSORIESBAG CHARMSBAG CHARMSBAG CHARM PETITE MALLESANS LIGNE ESTHETIQUE...This collectible piece features our iconic Pet...526.0NaN546126102334773213NaN
27cb4d3626bd48a9b523d8693266219c34aeccde8_117cb4d3626bd48a9b523d8693266219c34aeccde8Leather GoodsWomenCITY BAGSDAILY BAGSSHOULDER BAGSGIROLATAMAHINA...The perfect summer companion, this effortlessl...503.0297.333333303927846991590408939.0
38e5967f90ff9fb93aa2840fceecc537b2e8e6ff3_118e5967f90ff9fb93aa2840fceecc537b2e8e6ff3Leather GoodsWomenCITY BAGSDAILY BAGSSHOULDER BAGSGIROLATAMAHINA...The perfect summer companion, this effortlessl...276.0373.000000303927846991590408960.0
46913a128945e0efeafc52101dcdeaa610eaa4430_116913a128945e0efeafc52101dcdeaa610eaa4430AccessoriesMenFASHION JEWELRYNECKLACESNECKLACESCOLL.CHARMS FOR GENTLEMENSANS LIGNE ESTHETIQUE...This distinctive charm necklace reinterprets d...206.0NaN843171162334771023NaN

5 rows × 23 columns

统计不同访问源的数量

traffic_source_views = navigation.groupby(['sku_hash','traffic_source'])['page_views'].sum().reset_index()
traffic_source_views[:5]
sku_hashtraffic_sourcepage_views
0000cb631113e2f54ca5512139a6592e9584957aaSource 11748
1000cb631113e2f54ca5512139a6592e9584957aaSource 240
2000cb631113e2f54ca5512139a6592e9584957aaSource 530
3000cb631113e2f54ca5512139a6592e9584957aaSource 648
40020d561eab8b88ab55dfde84a2f12b865e5e0b4Source 116300
traffic_source_views = traffic_source_views.pivot(index = 'sku_hash',columns = 'traffic_source',values = 'page_views').reset_index()
traffic_source_views.columns = ['sku_hash', 
                                'page_views_nav1', 'page_views_nav2', 'page_views_nav3', 
                                'page_views_nav4', 'page_views_nav5', 'page_views_nav6']
traffic_source_views.head()
sku_hashpage_views_nav1page_views_nav2page_views_nav3page_views_nav4page_views_nav5page_views_nav6
0000cb631113e2f54ca5512139a6592e9584957aa1748.040.0NaNNaN30.048.0
10020d561eab8b88ab55dfde84a2f12b865e5e0b416300.0286.0NaN25.093.025.0
20026e7a0fcfe5999a44b70b1acaff00fc1ad3ac2147.05.0NaNNaN5.0NaN
300287bbb94c12066df6491dccd744ee87ff01a903473.015.0NaN25.0NaNNaN
4003f8a76cb823eb7c58b6d052c57a8933f9275fd47468.0251.0NaN55.0NaN10.0

统计不同类型的销售数量

type_sales = sales.groupby(['sku_hash','type'])['sales_quantity'].sum().reset_index()
type_sales = type_sales.pivot(index = 'sku_hash',columns = 'type',values = 'sales_quantity').reset_index()
type_sales.columns = ['sku_hash', 'sales_quantity_type1', 'sales_quantity_type2']
type_sales.head()
sku_hashsales_quantity_type1sales_quantity_type2
0000cb631113e2f54ca5512139a6592e9584957aa104.010.0
10020d561eab8b88ab55dfde84a2f12b865e5e0b4325.040.0
20026e7a0fcfe5999a44b70b1acaff00fc1ad3ac285.025.0
300287bbb94c12066df6491dccd744ee87ff01a9035.010.0
4003f8a76cb823eb7c58b6d052c57a8933f9275fd253.0145.0

统计不同地区情况

zone_sales = sales.groupby(['sku_hash','zone_number'])['sales_quantity'].sum().reset_index()

zone_sales = zone_sales.pivot(index = 'sku_hash',columns = 'zone_number',values = 'sales_quantity').reset_index()
zone_sales.columns = ['sku_hash', 
                      'sales_quantity_zone1', 'sales_quantity_zone2', 'sales_quantity_zone3', 
                      'sales_quantity_zone4', 'sales_quantity_zone5']
zone_sales.head()
sku_hashsales_quantity_zone1sales_quantity_zone2sales_quantity_zone3sales_quantity_zone4sales_quantity_zone5
0000cb631113e2f54ca5512139a6592e9584957aa20.05.015.050.024.0
10020d561eab8b88ab55dfde84a2f12b865e5e0b4NaNNaN82.020.0263.0
20026e7a0fcfe5999a44b70b1acaff00fc1ad3ac255.035.010.0NaN10.0
300287bbb94c12066df6491dccd744ee87ff01a9010.0NaN5.030.0NaN
4003f8a76cb823eb7c58b6d052c57a8933f9275fd45.0148.050.0140.015.0

统计各种网络情况

navigation_stats = navigation.groupby(['sku_hash'])['page_views'].sum().reset_index(name='page_views')
sales_stats = sales.groupby(['sku_hash'])['sales_quantity','TotalBuzzPost', 'TotalBuzz','NetSentiment', 'PositiveSentiment', 'NegativeSentiment', 'Impressions'].sum().reset_index()
navigation_stats.head()
sku_hashpage_views
0000cb631113e2f54ca5512139a6592e9584957aa1866
10020d561eab8b88ab55dfde84a2f12b865e5e0b416729
20026e7a0fcfe5999a44b70b1acaff00fc1ad3ac2157
300287bbb94c12066df6491dccd744ee87ff01a903513
4003f8a76cb823eb7c58b6d052c57a8933f9275fd47784
sales_stats.head()
sku_hashsales_quantityTotalBuzzPostTotalBuzzNetSentimentPositiveSentimentNegativeSentimentImpressions
0000cb631113e2f54ca5512139a6592e9584957aa1141308458.01791611.01471.884478469288.067668.08.729030e+09
10020d561eab8b88ab55dfde84a2f12b865e5e0b4365497215.0803633.0851.430118228523.038871.04.159187e+09
20026e7a0fcfe5999a44b70b1acaff00fc1ad3ac21101206926.01718843.01380.767295524646.0100366.01.121427e+10
300287bbb94c12066df6491dccd744ee87ff01a9045475098.0680543.0581.175056211602.033527.03.253251e+09
4003f8a76cb823eb7c58b6d052c57a8933f9275fd3983282466.04795323.03742.2533841401916.0307473.02.389471e+10

划分数据集

train['idx'] = pd.Categorical(train.sku_hash).codes
train['idx'] = train['idx'] % 5
train.head()
IDmonthsku_hashproduct_typeproduct_gendermacro_functionfunctionsub_functionmodelaesthetic_sub_line...targetmean_targetmacro_function_countfunction_countsub_function_countmodel_countaesthetic_sub_line_countmacro_material_countcolor_countidx
03d8a4ae769b526187c36901f204691a663333fa4_113d8a4ae769b526187c36901f204691a663333fa4Leather GoodsWomenLG ACCESSORIESSMALL LEATHER GOODSCOMPACT WALLETSPF.VICTORINEDAMIER AZUR...1366.0471.00000022922094447901263213120.01
1c05a54f7067be054ec4b27d0d6081353ef7d9df6_11c05a54f7067be054ec4b27d0d6081353ef7d9df6AccessoriesWomenFANCY ACCESSORIESBAG CHARMSBAG CHARMSBAG CHARM PETITE MALLESANS LIGNE ESTHETIQUE...526.0NaN546126102334773213NaN1
27cb4d3626bd48a9b523d8693266219c34aeccde8_117cb4d3626bd48a9b523d8693266219c34aeccde8Leather GoodsWomenCITY BAGSDAILY BAGSSHOULDER BAGSGIROLATAMAHINA...503.0297.333333303927846991590408939.04
38e5967f90ff9fb93aa2840fceecc537b2e8e6ff3_118e5967f90ff9fb93aa2840fceecc537b2e8e6ff3Leather GoodsWomenCITY BAGSDAILY BAGSSHOULDER BAGSGIROLATAMAHINA...276.0373.000000303927846991590408960.02
46913a128945e0efeafc52101dcdeaa610eaa4430_116913a128945e0efeafc52101dcdeaa610eaa4430AccessoriesMenFASHION JEWELRYNECKLACESNECKLACESCOLL.CHARMS FOR GENTLEMENSANS LIGNE ESTHETIQUE...206.0NaN843171162334771023NaN0

5 rows × 24 columns

整合数据集

X = train.copy()
X = X.merge(navigation_stats, on = 'sku_hash', how = 'left')
X = X.merge(sales_stats, on = 'sku_hash', how = 'left')
X = X.merge(traffic_source_views, on = 'sku_hash', how = 'left')
X = X.merge(type_sales, on = 'sku_hash', how = 'left')
X = X.merge(zone_sales, on = 'sku_hash', how = 'left')
X.head()
IDmonthsku_hashproduct_typeproduct_gendermacro_functionfunctionsub_functionmodelaesthetic_sub_line...page_views_nav4page_views_nav5page_views_nav6sales_quantity_type1sales_quantity_type2sales_quantity_zone1sales_quantity_zone2sales_quantity_zone3sales_quantity_zone4sales_quantity_zone5
03d8a4ae769b526187c36901f204691a663333fa4_113d8a4ae769b526187c36901f204691a663333fa4Leather GoodsWomenLG ACCESSORIESSMALL LEATHER GOODSCOMPACT WALLETSPF.VICTORINEDAMIER AZUR...20.025.029.0946.0554.0506.0238.075.0544.0137.0
1c05a54f7067be054ec4b27d0d6081353ef7d9df6_11c05a54f7067be054ec4b27d0d6081353ef7d9df6AccessoriesWomenFANCY ACCESSORIESBAG CHARMSBAG CHARMSBAG CHARM PETITE MALLESANS LIGNE ESTHETIQUE...NaN39.0NaN338.060.084.055.025.0155.079.0
27cb4d3626bd48a9b523d8693266219c34aeccde8_117cb4d3626bd48a9b523d8693266219c34aeccde8Leather GoodsWomenCITY BAGSDAILY BAGSSHOULDER BAGSGIROLATAMAHINA...10.010.05.0429.0145.0190.055.035.0195.099.0
38e5967f90ff9fb93aa2840fceecc537b2e8e6ff3_118e5967f90ff9fb93aa2840fceecc537b2e8e6ff3Leather GoodsWomenCITY BAGSDAILY BAGSSHOULDER BAGSGIROLATAMAHINA...5.010.05.0228.055.0108.055.020.020.080.0
46913a128945e0efeafc52101dcdeaa610eaa4430_116913a128945e0efeafc52101dcdeaa610eaa4430AccessoriesMenFASHION JEWELRYNECKLACESNECKLACESCOLL.CHARMS FOR GENTLEMENSANS LIGNE ESTHETIQUE...NaN5.0NaN110.025.035.05.020.030.045.0

5 rows × 45 columns

X = train.copy()
X = X.merge(navigation_stats, on = 'sku_hash', how = 'left')
X = X.merge(sales_stats, on = 'sku_hash', how = 'left')
X = X.merge(traffic_source_views, on = 'sku_hash', how = 'left')
X = X.merge(type_sales, on = 'sku_hash', how = 'left')
X = X.merge(zone_sales, on = 'sku_hash', how = 'left')

X.loc[X.product_type=='Accessories','product_type'] = '0'
X.loc[X.product_type=='Leather Goods','product_type'] = '1'
X.product_type = X.product_type.astype(int)

X.loc[X.product_gender=='Women','product_gender'] = '-1'
X.loc[X.product_gender=='Unisex','product_gender'] = '0'
X.loc[X.product_gender=='Men','product_gender'] = '1'
X.product_gender = X.product_gender.astype(int)

# 变换标签
X['y'] = np.log(X['target']+1)
X.head()
IDmonthsku_hashproduct_typeproduct_gendermacro_functionfunctionsub_functionmodelaesthetic_sub_line...page_views_nav5page_views_nav6sales_quantity_type1sales_quantity_type2sales_quantity_zone1sales_quantity_zone2sales_quantity_zone3sales_quantity_zone4sales_quantity_zone5y
03d8a4ae769b526187c36901f204691a663333fa4_113d8a4ae769b526187c36901f204691a663333fa41-1LG ACCESSORIESSMALL LEATHER GOODSCOMPACT WALLETSPF.VICTORINEDAMIER AZUR...25.029.0946.0554.0506.0238.075.0544.0137.07.220374
1c05a54f7067be054ec4b27d0d6081353ef7d9df6_11c05a54f7067be054ec4b27d0d6081353ef7d9df60-1FANCY ACCESSORIESBAG CHARMSBAG CHARMSBAG CHARM PETITE MALLESANS LIGNE ESTHETIQUE...39.0NaN338.060.084.055.025.0155.079.06.267201
27cb4d3626bd48a9b523d8693266219c34aeccde8_117cb4d3626bd48a9b523d8693266219c34aeccde81-1CITY BAGSDAILY BAGSSHOULDER BAGSGIROLATAMAHINA...10.05.0429.0145.0190.055.035.0195.099.06.222576
38e5967f90ff9fb93aa2840fceecc537b2e8e6ff3_118e5967f90ff9fb93aa2840fceecc537b2e8e6ff31-1CITY BAGSDAILY BAGSSHOULDER BAGSGIROLATAMAHINA...10.05.0228.055.0108.055.020.020.080.05.624018
46913a128945e0efeafc52101dcdeaa610eaa4430_116913a128945e0efeafc52101dcdeaa610eaa443001FASHION JEWELRYNECKLACESNECKLACESCOLL.CHARMS FOR GENTLEMENSANS LIGNE ESTHETIQUE...5.0NaN110.025.035.05.020.030.045.05.332719

5 rows × 46 columns

整合测试集

Z = test.copy()
Z = Z.merge(navigation_stats, on = 'sku_hash', how = 'left')
Z = Z.merge(sales_stats, on = 'sku_hash', how = 'left')
Z = Z.merge(traffic_source_views, on = 'sku_hash', how = 'left')
Z = Z.merge(type_sales, on = 'sku_hash', how = 'left')
Z = Z.merge(zone_sales, on = 'sku_hash', how = 'left')

Z.loc[Z.product_type=='Accessories','product_type'] = '0'
Z.loc[Z.product_type=='Leather Goods','product_type'] = '1'
Z.product_type = Z.product_type.astype(int)

Z.loc[Z.product_gender=='Women','product_gender'] = '-1'
Z.loc[Z.product_gender=='Unisex','product_gender'] = '0'
Z.loc[Z.product_gender=='Men','product_gender'] = '1'
Z.product_gender = Z.product_gender.astype(int)
features = ['product_type', 'product_gender', 
            'page_views', 'sales_quantity',
            'TotalBuzzPost', 'TotalBuzz', 'NetSentiment', 'PositiveSentiment', 'NegativeSentiment', 'Impressions',
            'fr_FR_price',
            'macro_function_count', 'function_count', 'sub_function_count', 'model_count', 'aesthetic_sub_line_count', 'macro_material_count', 'color_count',
            'page_views_nav1', 'page_views_nav2', 'page_views_nav3', 'page_views_nav4', 'page_views_nav5', 'page_views_nav6',
            'sales_quantity_type1', 'sales_quantity_type2',
            'sales_quantity_zone1','sales_quantity_zone2','sales_quantity_zone3', 'sales_quantity_zone4','sales_quantity_zone5',
            'mean_target',]

交叉验证

# 选取某一个月的情况

def train_test_split(tr, te, mo, feats, num_folds):
    
    Xtrain = []
    ytrain = []
    dtrain = []
    Xval = []
    yval = []
    dval =[]
    
    for i in range(num_folds):
        Xtrain.append(tr.loc[(tr.month==mo)&(tr.idx != i),feats].values)
        ytrain.append(tr.loc[(tr.month==mo)&(tr.idx != i),'y'].values)
        dtrain.append(xgb.DMatrix(Xtrain[i],ytrain[i]))
        
        Xval.append(tr.loc[(tr.month==mo)&(tr.idx == i),feats].values)
        yval.append(tr.loc[(tr.month==mo)&(tr.idx == i),'y'].values)
        dval.append(xgb.DMatrix(Xval[i],yval[i]))
        
    Xtest = te.loc[(te.month ==mo),feats].values
    dtest = xgb.DMatrix(Xtest)
    
    return dtrain, dval, dtest

Xgboost参数

param = {}
param['objective'] = 'reg:linear'
param['eval_metric'] =  'rmse'
param['booster'] = 'gbtree'
param['eta'] = 0.025
param['subsample'] = 0.7
param['colsample_bytree'] = 0.7
param['num_parallel_tree'] = 3
param['min_child_weight'] = 25
param['gamma'] = 5
param['max_depth'] =  3
param['silent'] = 1

第一个月的情况

# train models for the 1 month

dtrain, dval, dtest = train_test_split(tr = X, te = Z, mo = 1, feats = features, num_folds = 5)

model_m1 = []
for i in range(5):
    model_m1.append(
        xgb.train(
                  param,
                  dtrain[i],
                  50000,
                  [(dtrain[i],'train'), (dval[i],'eval')],
                  early_stopping_rounds = 200,
                  verbose_eval = False)
    )
    
# run predictions for the 1 month    
    
oof_m1 = []
oof_test_m1 = []
for i in range(5):
    oof_m1.append(model_m1[i].predict(dval[i]))
    oof_test_m1.append(model_m1[i].predict(dtest))
    
test_m1 = np.mean(oof_test_m1, axis=0)    
    
m1 = {}
for i in range(5):
    m1 = {**m1, **dict(zip(X.loc[(X.month==1) & (X.idx==i),'sku_hash'], oof_m1[i]))}
    
m1 = {**m1, **dict(zip(Z.loc[(Z.month==1),'sku_hash'], test_m1))}
    
oof_m1 = pd.DataFrame.from_dict(m1, orient='index').reset_index()    
oof_m1.columns = ['sku_hash', 'oof_m1']

X2 = pd.merge(X.copy(), oof_m1, on = 'sku_hash')
Z2 = pd.merge(Z.copy(), oof_m1, on = 'sku_hash')
features2 = features + ['oof_m1']    

第二个月情况

dtrain2, dval2, dtest2 = train_test_split(tr = X2, te = Z2, mo = 2, feats = features2, num_folds = 5)

model_m2 = []

for i in range(5):
    model_m2.append(
        xgb.train(
                  param,
                  dtrain2[i],
                  50000,
                  [(dtrain2[i],'train'), (dval2[i],'eval')],
                  early_stopping_rounds = 200,
                  verbose_eval = False)
    )

# run predictions for the 2 month        
    
oof_m2 = []
oof_test_m2 = []
for i in range(5):
    oof_m2.append(model_m2[i].predict(dval2[i]))
    oof_test_m2.append(model_m2[i].predict(dtest2))
    
test_m2 = np.mean(oof_test_m2, axis=0)    
    
m2 = {}
for i in range(5):
    m2 = {**m2, **dict(zip(X.loc[(X.month==2) & (X.idx==i),'sku_hash'], oof_m2[i]))}
    
m2 = {**m2, **dict(zip(Z.loc[(Z.month==2),'sku_hash'], test_m2))}
    
oof_m2 = pd.DataFrame.from_dict(m2, orient='index').reset_index()    
oof_m2.columns = ['sku_hash', 'oof_m2']

X3 = pd.merge(X2.copy(), oof_m2, on = 'sku_hash')
Z3 = pd.merge(Z2.copy(), oof_m2, on = 'sku_hash')
features3 = features2 + ['oof_m2']    

第三个月情况

dtrain3, dval3, dtest3 = train_test_split(tr = X3, te = Z3, mo = 3, feats = features3, num_folds = 5)

model_m3 = []

for i in range(5):
    model_m3.append(
        xgb.train(
                  param,
                  dtrain3[i],
                  50000,
                  [(dtrain3[i],'train'),(dval3[i],'eval')],
                  early_stopping_rounds = 200,
                  verbose_eval = False)
    )

# run predictions for the 3 month        
    
oof_m3 = []
oof_test_m3 = []
for i in range(5):
    oof_m3.append(model_m3[i].predict(dval3[i]))
    oof_test_m3.append(model_m3[i].predict(dtest3))
    
test_m3 = np.mean(oof_test_m3, axis=0)    
    
m3 = {}
for i in range(5):
    m3 = {**m3, **dict(zip(X.loc[(X.month==3) & (X.idx==i),'sku_hash'], oof_m3[i]))}
    
m3 = {**m3, **dict(zip(Z.loc[(Z.month==3),'sku_hash'], test_m3))}
    
oof_m3 = pd.DataFrame.from_dict(m3, orient='index').reset_index()    
oof_m3.columns = ['sku_hash', 'oof_m3']

X3 = pd.merge(X3.copy(), oof_m3, on = 'sku_hash')
Z3 = pd.merge(Z3.copy(), oof_m3, on = 'sku_hash')
# 方便评估,设定一个

Z3['target'] = 0
Z3.loc[Z3.month == 1, 'target'] = Z3.loc[Z3.month == 1, 'oof_m1'] 
Z3.loc[Z3.month == 2, 'target'] = Z3.loc[Z3.month == 2, 'oof_m2'] 
Z3.loc[Z3.month == 3, 'target'] = Z3.loc[Z3.month == 3, 'oof_m3'] 

X3['pred_target'] = 0
X3.loc[X3.month == 1, 'pred_target'] = X3.loc[X3.month == 1, 'oof_m1'] 
X3.loc[X3.month == 2, 'pred_target'] = X3.loc[X3.month == 2, 'oof_m2'] 
X3.loc[X3.month == 3, 'pred_target'] = X3.loc[X3.month == 3, 'oof_m3'] 

评估结果

print(f"month1: {np.sqrt(np.mean((X3.loc[X3.month==1,'y'] - X3.loc[X3.month==1,'pred_target'])**2))}")
print(f"month2: {np.sqrt(np.mean((X3.loc[X3.month==2,'y'] - X3.loc[X3.month==2,'pred_target'])**2))}")
print(f"month3: {np.sqrt(np.mean((X3.loc[X3.month==3,'y'] - X3.loc[X3.month==3,'pred_target'])**2))}")
print(f"overall: {np.sqrt(np.mean((X3['y'] - X3['pred_target'])**2))}")
month1: 0.4401109610656908
month2: 0.5816163893598695
month3: 0.7147597943809006
overall: 0.5895921884618234

生成结果

Z3['target'] = np.exp(Z3.target)-1
final_sub = Z3[['ID','target']]
final_sub.to_csv(os.path.join(base_path,'silly-raddar-sub4.csv'),index=None)
final = pd.read_csv('./data/silly-raddar-sub4.csv')
final.head()
IDtarget
0ed4c7471eac7e8c6e6718364c2b6e75462eeb47c_1300.303182
1ed4c7471eac7e8c6e6718364c2b6e75462eeb47c_2301.345169
2ed4c7471eac7e8c6e6718364c2b6e75462eeb47c_3212.500844
326b2c4f6281482cccf1e748ef388f1649ecf1c8b_185.048020
426b2c4f6281482cccf1e748ef388f1649ecf1c8b_271.009720
  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 4
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

RunsenLIu

顺便点一个赞

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值