python123第一周作业错题_bike1234.ipynb

最新推荐文章于 2021-02-19 11:01:28 发布

weixin_39727743

最新推荐文章于 2021-02-19 11:01:28 发布

阅读量154

点赞数

文章标签： python123第一周作业错题

{

"cells": [

{

"cell_type": "code",

"execution_count": 120,

"metadata": {},

"outputs": [],

"source": [

"import numpy as np\n",

"import pandas as pd\n",

"import os"

]

{

"cell_type": "code",

"execution_count": 121,

"metadata": {

"scrolled": true

"outputs": [

{

"data": {

"text/html": [

\n",

" .dataframe thead tr:only-child th {\n",

" text-align: right;\n",

" }\n",

"\n",

" .dataframe thead th {\n",

" text-align: left;\n",

" }\n",

"\n",

" .dataframe tbody tr th {\n",

" vertical-align: top;\n",

" }\n",

"\n",

" \n",

\n",

instant\n",

dteday\n",

season\n",

yr\n",

mnth\n",

holiday\n",

weekday\n",

workingday\n",

weathersit\n",

temp\n",

atemp\n",

hum\n",

windspeed\n",

casual\n",

registered\n",

cnt\n",

\n",

0\n",

1\n",

2011-01-01\n",

1\n",

0\n",

1\n",

0\n",

6\n",

0\n",

2\n",

0.344167\n",

0.363625\n",

0.805833\n",

0.160446\n",

331\n",

654\n",

985\n",

\n",

1\n",

2\n",

2011-01-02\n",

1\n",

0\n",

1\n",

0\n",

2\n",

0.363478\n",

0.353739\n",

0.696087\n",

0.248539\n",

131\n",

670\n",

801\n",

\n",

2\n",

3\n",

2011-01-03\n",

1\n",

0\n",

1\n",

0\n",

1\n",

0.196364\n",

0.189405\n",

0.437273\n",

0.248309\n",

120\n",

1229\n",

1349\n",

\n",

3\n",

4\n",

2011-01-04\n",

1\n",

0\n",

1\n",

0\n",

2\n",

1\n",

0.200000\n",

0.212122\n",

0.590435\n",

0.160296\n",

108\n",

1454\n",

1562\n",

\n",

4\n",

5\n",

2011-01-05\n",

1\n",

0\n",

1\n",

0\n",

3\n",

1\n",

0.226957\n",

0.229270\n",

0.436957\n",

0.186900\n",

82\n",

1518\n",

1600\n",

\n",

"text/plain": [

" instant dteday season yr mnth holiday weekday workingday \\\n",

"0 1 2011-01-01 1 0 1 0 6 0 \n",

"1 2 2011-01-02 1 0 1 0 0 0 \n",

"2 3 2011-01-03 1 0 1 0 1 1 \n",

"3 4 2011-01-04 1 0 1 0 2 1 \n",

"4 5 2011-01-05 1 0 1 0 3 1 \n",

"\n",

" weathersit temp atemp hum windspeed casual registered \\\n",

"0 2 0.344167 0.363625 0.805833 0.160446 331 654 \n",

"1 2 0.363478 0.353739 0.696087 0.248539 131 670 \n",

"2 1 0.196364 0.189405 0.437273 0.248309 120 1229 \n",

"3 1 0.200000 0.212122 0.590435 0.160296 108 1454 \n",

"4 1 0.226957 0.229270 0.436957 0.186900 82 1518 \n",

"\n",

" cnt \n",

"0 985 \n",

"1 801 \n",

"2 1349 \n",

"3 1562 \n",

"4 1600 "

]

"execution_count": 121,

"metadata": {},

"output_type": "execute_result"

}

"source": [

"train = pd.read_csv(\"day.csv\")\n",

"train.head()"

]

{

"cell_type": "code",

"execution_count": 122,

"metadata": {},

"outputs": [

{

"name": "stdout",

"output_type": "stream",

"text": [

" mnth_1 mnth_2 mnth_3 mnth_4 mnth_5 mnth_6 mnth_7 mnth_8 mnth_9 \\\n",

"0 1 0 0 0 0 0 0 0 0 \n",

"1 1 0 0 0 0 0 0 0 0 \n",

"2 1 0 0 0 0 0 0 0 0 \n",

"3 1 0 0 0 0 0 0 0 0 \n",

"4 1 0 0 0 0 0 0 0 0 \n",

"\n",

" mnth_10 ... weathersit_1 weathersit_2 weathersit_3 weekday_0 \\\n",

"0 0 ... 0 1 0 0 \n",

"1 0 ... 0 1 0 1 \n",

"2 0 ... 1 0 0 0 \n",

"3 0 ... 1 0 0 0 \n",

"4 0 ... 1 0 0 0 \n",

"\n",

" weekday_1 weekday_2 weekday_3 weekday_4 weekday_5 weekday_6 \n",

"0 0 0 0 0 0 1 \n",

"1 0 0 0 0 0 0 \n",

"2 1 0 0 0 0 0 \n",

"3 0 1 0 0 0 0 \n",

"4 0 0 1 0 0 0 \n",

"\n",

"[5 rows x 22 columns]\n"

]

}

"source": [

"categorical_features = ['mnth','weathersit','weekday']\n",

"for col in categorical_features:\n",

" train[col] = train[col].astype('object')\n",

"x_train_cat = train[categorical_features]\n",

"x_train_cat = pd.get_dummies(x_train_cat)\n",

"print x_train_cat.head()"

]

{

"cell_type": "code",

"execution_count": 123,

"metadata": {},

"outputs": [

{

"data": {

"text/html": [

\n",

" .dataframe thead tr:only-child th {\n",

" text-align: right;\n",

" }\n",

"\n",

" .dataframe thead th {\n",

" text-align: left;\n",

" }\n",

"\n",

" .dataframe tbody tr th {\n",

" vertical-align: top;\n",

" }\n",

"\n",

" \n",

\n",

temp\n",

atemp\n",

hum\n",

windspeed\n",

\n",

0\n",

0.355170\n",

0.373517\n",

0.828620\n",

0.284606\n",

\n",

1\n",

0.379232\n",

0.360541\n",

0.715771\n",

0.466215\n",

\n",

2\n",

0.171000\n",

0.144830\n",

0.449638\n",

0.465740\n",

\n",

3\n",

0.175530\n",

0.174649\n",

0.607131\n",

0.284297\n",

\n",

4\n",

0.209120\n",

0.197158\n",

0.449313\n",

0.339143\n",

\n",

"text/plain": [

" temp atemp hum windspeed\n",

"0 0.355170 0.373517 0.828620 0.284606\n",

"1 0.379232 0.360541 0.715771 0.466215\n",

"2 0.171000 0.144830 0.449638 0.465740\n",

"3 0.175530 0.174649 0.607131 0.284297\n",

"4 0.209120 0.197158 0.449313 0.339143"

]

"execution_count": 123,

"metadata": {},

"output_type": "execute_result"

}

"source": [

"from sklearn.preprocessing import MinMaxScaler\n",

"mn_x = MinMaxScaler()\n",

"numerical_features = ['temp','atemp','hum','windspeed']\n",

"x_train_num = train[numerical_features]\n",

"temp = mn_x.fit_transform(x_train_num)\n",

"x_train_num = pd.DataFrame(data=temp, columns=numerical_features, index =train.index)\n",

"x_train_num.head()\n"

]

{

"cell_type": "code",

"execution_count": 124,

"metadata": {},

"outputs": [],

"source": [

"x_train = pd.concat([x_train_num, train['holiday']], axis = 1, ignore_index=False)\n",

"FE_train = pd.concat([train['instant'], x_train, train['yr'],train['cnt']], axis = 1)\n",

"FE_train.to_csv('FE_day.csv', index=False)\n"

]

{

"cell_type": "code",

"execution_count": 125,

"metadata": {},

"outputs": [

{

"name": "stdout",

"output_type": "stream",

"text": [

"(365, 8)\n",

"(366, 8)\n"

]

}

"source": [

"#对数据值型特征，用常用统计量观察其分布\n",

"data = pd.read_csv('FE_day.csv')\n",

"train_data = data[data.yr == 0]\n",

"test_data = data[data.yr == 1]\n",

"train_data.to_csv('train_data.csv', index=False)\n",

"test_data.to_csv('test_data.csv', index=False)\n",

"print train_data.shape\n",

"print test_data.shape"

]

{

"cell_type": "code",

"execution_count": 126,

"metadata": {},

"outputs": [

{

"name": "stdout",

"output_type": "stream",

"text": [

"0.706085466961 0.325969065896\n"

]

}

"source": [

"x_test = test_data.iloc[:, list(range(7))]\n",

"y_test = test_data['cnt']\n",

"\n",

"x_train = train_data.iloc[:, list(range(7))]\n",

"y_train = train_data['cnt']\n",

"\n",

"from sklearn.linear_model import LinearRegression\n",

"from sklearn.metrics import r2_score\n",

"lr = LinearRegression()\n",

"lr.fit(x_train,y_train)\n",

"y_test_pred_lr = lr.predict(x_test)\n",

"y_train_pred_lr = lr.predict(x_train)\n",

"print r2_score(y_train,y_train_pred_lr), r2_score(y_test,y_test_pred_lr)\n",

"\n",

"\n"

]

{

"cell_type": "code",

"execution_count": null,

"metadata": {},

"outputs": [],

"source": []

{

"cell_type": "code",

"execution_count": 127,

"metadata": {},

"outputs": [

{

"data": {

"text/html": [

\n",

" .dataframe thead tr:only-child th {\n",

" text-align: right;\n",

" }\n",

"\n",

" .dataframe thead th {\n",

" text-align: left;\n",

" }\n",

"\n",

" .dataframe tbody tr th {\n",

" vertical-align: top;\n",

" }\n",

"\n",

" \n",

\n",

mnth_1\n",

mnth_2\n",

mnth_3\n",

mnth_4\n",

mnth_5\n",

mnth_6\n",

mnth_7\n",

mnth_8\n",

mnth_9\n",

mnth_10\n",

...\n",

weathersit_1\n",

weathersit_2\n",

weathersit_3\n",

weekday_0\n",

weekday_1\n",

weekday_2\n",

weekday_3\n",

weekday_4\n",

weekday_5\n",

weekday_6\n",

\n",

0\n",

1\n",

0\n",

...\n",

0\n",

1\n",

0\n",

1\n",

\n",

1\n",

0\n",

...\n",

0\n",

1\n",

0\n",

1\n",

0\n",

\n",

2\n",

1\n",

0\n",

...\n",

1\n",

0\n",

1\n",

0\n",

\n",

3\n",

1\n",

0\n",

...\n",

1\n",

0\n",

1\n",

0\n",

\n",

4\n",

1\n",

0\n",

...\n",

1\n",

0\n",

1\n",

0\n",

\n",

5 rows × 22 columns

\n",

"text/plain": [

" mnth_1 mnth_2 mnth_3 mnth_4 mnth_5 mnth_6 mnth_7 mnth_8 mnth_9 \\\n",

"0 1 0 0 0 0 0 0 0 0 \n",

"1 1 0 0 0 0 0 0 0 0 \n",

"2 1 0 0 0 0 0 0 0 0 \n",

"3 1 0 0 0 0 0 0 0 0 \n",

"4 1 0 0 0 0 0 0 0 0 \n",

"\n",

" mnth_10 ... weathersit_1 weathersit_2 weathersit_3 weekday_0 \\\n",

"0 0 ... 0 1 0 0 \n",

"1 0 ... 0 1 0 1 \n",

"2 0 ... 1 0 0 0 \n",

"3 0 ... 1 0 0 0 \n",

"4 0 ... 1 0 0 0 \n",

"\n",

" weekday_1 weekday_2 weekday_3 weekday_4 weekday_5 weekday_6 \n",

"0 0 0 0 0 0 1 \n",

"1 0 0 0 0 0 0 \n",

"2 1 0 0 0 0 0 \n",

"3 0 1 0 0 0 0 \n",

"4 0 0 1 0 0 0 \n",

"\n",

"[5 rows x 22 columns]"

]

"execution_count": 127,

"metadata": {},

"output_type": "execute_result"

}

"source": []

{

"cell_type": "code",

"execution_count": 128,

"metadata": {},

"outputs": [

{

"ename": "KeyError",

"evalue": "\"['temp' 'hum' 'windspeed'] not in index\"",

"output_type": "error",

"traceback": [

"\u001b[1;31m\u001b[0m",

"\u001b[1;31mKeyError\u001b[0mTraceback (most recent call last)",

"\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mmn_x\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mMinMaxScaler\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mnumerical_features\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;34m'temp'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'hum'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'windspeed'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m \u001b[0mtemp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmn_x\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mnumerical_features\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 6\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[0mx_train_num\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtemp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mnumerical_features\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m \u001b[1;33m=\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",

"\u001b[1;32mC:\\Users\\62744\\Anaconda2\\lib\\site-packages\\pandas\\core\\frame.pyc\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 1956\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mSeries\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mIndex\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1957\u001b[0m \u001b[1;31m# either boolean or fancy integer index\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1958\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_array\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1959\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1960\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_frame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",

"\u001b[1;32mC:\\Users\\62744\\Anaconda2\\lib\\site-packages\\pandas\\core\\frame.pyc\u001b[0m in \u001b[0;36m_getitem_array\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 2000\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconvert\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2001\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2002\u001b[1;33m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_convert_to_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2003\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconvert\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2004\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",

"\u001b[1;32mC:\\Users\\62744\\Anaconda2\\lib\\site-packages\\pandas\\core\\indexing.pyc\u001b[0m in \u001b[0;36m_convert_to_indexer\u001b[1;34m(self, obj, axis, is_setter)\u001b[0m\n\u001b[0;32m 1229\u001b[0m \u001b[0mmask\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcheck\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1230\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mmask\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0many\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1231\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'%s not in index'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mobjarr\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mmask\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1232\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1233\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0m_values_from_object\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",

"\u001b[1;31mKeyError\u001b[0m: \"['temp' 'hum' 'windspeed'] not in index\""

]

}

"source": []

{

"cell_type": "code",

"execution_count": null,

"metadata": {},

"outputs": [],

"source": [

"fig = plt.figure()\n",

"sns.distplot(data.holiday.values, bins=30, kde=False)\n",

"plt.xlabel('holiday', fontsize=12)\n",

"plt.show()"

]

{

"cell_type": "code",

"execution_count": null,

"metadata": {},

"outputs": [],

"source": []

}

"metadata": {

"kernelspec": {

"display_name": "Python 2",

"language": "python",

"name": "python2"

"language_info": {

"codemirror_mode": {

"name": "ipython",

"version": 2

"file_extension": ".py",

"mimetype": "text/x-python",

"name": "python",

"nbconvert_exporter": "python",

"pygments_lexer": "ipython2",

"version": "2.7.13"

}

"nbformat": 4,

"nbformat_minor": 2

}

一键复制

编辑

Web IDE

原始数据

标准视图

历史

weixin_39727743

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python123第一周作业错题_bike1234.ipynb

{"cells": [{"cell_type": "code","execution_count": 120,"metadata": {},"outputs": [],"source": ["import numpy as np\n","import pandas as pd\n","import os"]},{"cell_type": "code","execution_count": 121,...
复制链接

扫一扫