deff(nums):ifnot nums:return0
res =1
cur_len =1for i inrange(1,len(nums)):if nums[i-1]< nums[i]:
cur_len +=1
res =max(cur_len, res)else:
cur_len =1return res
for name,group in df.groupby('cuts'):
group = group.sort_values(by='depth')
s = group['price']print(name,f(s.tolist()))
for name,group in df[['carat','price','color']].groupby('color'):
L1 = np.array([np.ones(group.shape[0]),group['carat']]).reshape(2,group.shape[0])
L2 = group['price']
result =(np.linalg.inv(L1.dot(L1.T)).dot(L1)).dot(L2).reshape(2,1)print('颜色为%s时回归系数为:%f'%(name,result[1]))
idx=pd.IndexSlice
for i inrange(2010,2018):
county =(df.groupby(['COUNTY','YYYY']).sum().loc[idx[:,i],:].idxmax()[0][0])
state = df.query('COUNTY == "%s"'%county)['State'].iloc[0]
state_true = df.groupby(['State','YYYY']).sum().loc[idx[:,i],:].idxmax()[0][0]if state==state_true:print('在%d年,%s县的报告数最多,它所属的州%s也是报告数最多的'%(i,county,state))else:print('在%d年,%s县的报告数最多,但它所属的州%s不是报告数最多的,%s州报告数最多'%(i,county,state,state_true))