sold_sums = sorted(sold_sums.items(),key = lambda d:d[1] , reverse=True) #按从大到小 id_means = sorted(id_means.items(),key=lambda d:d[1]) #按从小到大
import pandas as pd
from collections import Counter
import re
df = pd.read_excel("make_model_ebayno_市场年代.xlsx")
#处理sold字段--------------
# for i in range(len(df)):
# sold = df.loc[i,"sold"]
# if pd.isnull(sold):
# new_sold = 0
# else:
# new_sold= re.findall("(\d{1,}) Sold",sold)
# if len(new_sold) == 0:
# new_sold = 0
# else:
# new_sold=new_sold[0]
# df.loc[i,"new_sold"] = new_sold
# df.to_excel("make_model_ebayno_市场年代_new.xlsx",index = False)
#-----------------------------------------------------
end_models = df[["vio_end","model"]].drop_duplicates() #去重
result = []
for models in end_models.values:
vio_end = models[0]
model = models[1]
subdf = df[(df.model == model) & (df.vio_end == vio_end)]
y_ends = subdf["ebay_end"].dropna() # 去掉空值
#统计频次
items = Counter(y_ends)
values = items.most_common(3)
#统计sold量和id
sig_ends = y_ends.drop_duplicates() #去重
sold_sums = {}
id_means = {}
for ends in sig_ends:
print(ends)
subdf2 = subdf[subdf.ebay_end == ends]
sold_sum = subdf2["new_sold"].sum()
id_mean = subdf2["ebay_id"].mean()
sold_sums[ends] = sold_sum
id_means[ends] =id_mean
#字典排序
sold_sums = sorted(sold_sums.items(),key = lambda d:d[1] , reverse=True) #按从大到小
id_means = sorted(id_means.items(),key=lambda d:d[1]) #按从小到大
sold_sums_max = sold_sums[0][0]
id_means_min = id_means[0][0]
years = [vio_end, model, len(subdf), sold_sums_max , id_means_min]
for val in values:
years.extend([int(val[0]),round(val[1]/len(subdf),2 )])
result.append(years)
df = pd.DataFrame(result,columns=["vio_end","model","sum","sold_max","id_min","first","f_rate","second","s_rate","third","t_rate"])
df.to_excel("make_model_ebayno_市场统计1107.xlsx",index=False)