{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Python数据分析的三剑客"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"import pandas as pd\n",
"\n",
"# pip install matplotlib\n",
"# 画图,可视化!\n",
"# 头号玩家,虚拟现实游戏,可视化,立体化\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 生成对象"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/plain": [
"张三 88.0\n",
"李四 103.0\n",
"王五 68.0\n",
"老路 134.0\n",
"Jack 99.0\n",
"Name: Python, dtype: float32"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 一维的\n",
"s = pd.Series(data = [88,103,68,134,99],index = ['张三','李四','王五','老路','Jack'],\n",
" dtype=np.float32,name = 'Python')\n",
"s"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>数学</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>张三</th>\n",
" <td>104</td>\n",
" <td>31</td>\n",
" <td>72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>李四</th>\n",
" <td>50</td>\n",
" <td>62</td>\n",
" <td>108</td>\n",
" </tr>\n",
" <tr>\n",
" <th>王五</th>\n",
" <td>79</td>\n",
" <td>51</td>\n",
" <td>98</td>\n",
" </tr>\n",
" <tr>\n",
" <th>老路</th>\n",
" <td>87</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jack</th>\n",
" <td>144</td>\n",
" <td>59</td>\n",
" <td>31</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En 数学\n",
"张三 104 31 72\n",
"李四 50 62 108\n",
"王五 79 51 98\n",
"老路 87 5 2\n",
"Jack 144 59 31"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(data = np.random.randint(0,150,size=(5,3)),\n",
" index = ['张三','李四','王五','老路','Jack'],\n",
" columns=['Python','En','数学'])\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 查看数据"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"68.0"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s['王五']"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>数学</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>张三</th>\n",
" <td>104</td>\n",
" <td>31</td>\n",
" <td>72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>李四</th>\n",
" <td>50</td>\n",
" <td>62</td>\n",
" <td>108</td>\n",
" </tr>\n",
" <tr>\n",
" <th>王五</th>\n",
" <td>79</td>\n",
" <td>51</td>\n",
" <td>98</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En 数学\n",
"张三 104 31 72\n",
"李四 50 62 108\n",
"王五 79 51 98"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>数学</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>王五</th>\n",
" <td>79</td>\n",
" <td>51</td>\n",
" <td>98</td>\n",
" </tr>\n",
" <tr>\n",
" <th>老路</th>\n",
" <td>87</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jack</th>\n",
" <td>144</td>\n",
" <td>59</td>\n",
" <td>31</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En 数学\n",
"王五 79 51 98\n",
"老路 87 5 2\n",
"Jack 144 59 31"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 选择"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"outputs": [
{
"ename": "KeyError",
"evalue": "('张三', '王五')",
"output_type": "error",
"traceback": [
"\u001b[1;31m-------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32md:\\python3.8.1\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 2645\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2646\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2647\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
"\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
"\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
"\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
"\u001b[1;31mKeyError\u001b[0m: ('张三', '王五')",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-16-6e051fa231bb>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'张三'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'王五'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32md:\\python3.8.1\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 2798\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2799\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2800\u001b[1;33m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2801\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2802\u001b[0m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32md:\\python3.8.1\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 2646\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2647\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2648\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2649\u001b[0m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2650\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msize\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
"\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
"\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
"\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
"\u001b[1;31mKeyError\u001b[0m: ('张三', '王五')"
]
}
],
"source": [
"df['张三','王五']"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>张三</th>\n",
" <td>104</td>\n",
" <td>31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>李四</th>\n",
" <td>50</td>\n",
" <td>62</td>\n",
" </tr>\n",
" <tr>\n",
" <th>王五</th>\n",
" <td>79</td>\n",
" <td>51</td>\n",
" </tr>\n",
" <tr>\n",
" <th>老路</th>\n",
" <td>87</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jack</th>\n",
" <td>144</td>\n",
" <td>59</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En\n",
"张三 104 31\n",
"李四 50 62\n",
"王五 79 51\n",
"老路 87 5\n",
"Jack 144 59"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 列索引\n",
"df[['Python','En']]"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>数学</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>张三</th>\n",
" <td>104</td>\n",
" <td>31</td>\n",
" <td>72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>王五</th>\n",
" <td>79</td>\n",
" <td>51</td>\n",
" <td>98</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jack</th>\n",
" <td>144</td>\n",
" <td>59</td>\n",
" <td>31</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En 数学\n",
"张三 104 31 72\n",
"王五 79 51 98\n",
"Jack 144 59 31"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[0::2]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>数学</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>张三</th>\n",
" <td>104</td>\n",
" <td>31</td>\n",
" <td>72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jack</th>\n",
" <td>144</td>\n",
" <td>59</td>\n",
" <td>31</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En 数学\n",
"张三 104 31 72\n",
"Jack 144 59 31"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 检索行索引\n",
"df.loc[['张三','Jack']]"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>数学</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>张三</th>\n",
" <td>104</td>\n",
" <td>31</td>\n",
" <td>72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jack</th>\n",
" <td>144</td>\n",
" <td>59</td>\n",
" <td>31</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En 数学\n",
"张三 104 31 72\n",
"Jack 144 59 31"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 检索行,可以使用iloc\n",
"# 带有i,数字0,1,2,3\n",
"df.iloc[[0,4]]"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>数学</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>张三</th>\n",
" <td>104</td>\n",
" <td>72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jack</th>\n",
" <td>144</td>\n",
" <td>31</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python 数学\n",
"张三 104 72\n",
"Jack 144 31"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 数据库中,存着不同人的成绩,需要获取数据库中,张三,Jack的Python和数学成绩\n",
"# ???sql,你在脑子,想一下\n",
"# pandas比sql简单。\n",
"df[['Python','数学']].loc[['张三','Jack']]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 缺失值"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>数学</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>张三</th>\n",
" <td>104</td>\n",
" <td>31.0</td>\n",
" <td>72.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>李四</th>\n",
" <td>50</td>\n",
" <td>62.0</td>\n",
" <td>108.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>王五</th>\n",
" <td>79</td>\n",
" <td>51.0</td>\n",
" <td>98.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>老路</th>\n",
" <td>87</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jack</th>\n",
" <td>144</td>\n",
" <td>59.0</td>\n",
" <td>31.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En 数学\n",
"张三 104 31.0 72.0\n",
"李四 50 62.0 108.0\n",
"王五 79 51.0 98.0\n",
"老路 87 NaN NaN\n",
"Jack 144 59.0 31.0"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# pandas中的缺失值,使用NaN表示,NaN:not a number\n",
"cond = df >= 31\n",
"df2 = df[cond]\n",
"df2"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>数学</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>张三</th>\n",
" <td>104.0</td>\n",
" <td>60.0</td>\n",
" <td>72.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>李四</th>\n",
" <td>60.0</td>\n",
" <td>62.0</td>\n",
" <td>108.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>王五</th>\n",
" <td>79.0</td>\n",
" <td>60.0</td>\n",
" <td>98.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>老路</th>\n",
" <td>87.0</td>\n",
" <td>60.0</td>\n",
" <td>60.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jack</th>\n",
" <td>144.0</td>\n",
" <td>59.0</td>\n",
" <td>60.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En 数学\n",
"张三 104.0 60.0 72.0\n",
"李四 60.0 62.0 108.0\n",
"王五 79.0 60.0 98.0\n",
"老路 87.0 60.0 60.0\n",
"Jack 144.0 59.0 60.0"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 填充空值,fillna调用,返回\n",
"df2.fillna(60)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>数学</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>张三</th>\n",
" <td>104</td>\n",
" <td>31.0</td>\n",
" <td>72.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>李四</th>\n",
" <td>50</td>\n",
" <td>62.0</td>\n",
" <td>108.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>王五</th>\n",
" <td>79</td>\n",
" <td>51.0</td>\n",
" <td>98.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>老路</th>\n",
" <td>87</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jack</th>\n",
" <td>144</td>\n",
" <td>59.0</td>\n",
" <td>31.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En 数学\n",
"张三 104 31.0 72.0\n",
"李四 50 62.0 108.0\n",
"王五 79 51.0 98.0\n",
"老路 87 NaN NaN\n",
"Jack 144 59.0 31.0"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>数学</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>张三</th>\n",
" <td>104</td>\n",
" <td>31.0</td>\n",
" <td>72.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>李四</th>\n",
" <td>50</td>\n",
" <td>62.0</td>\n",
" <td>108.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>王五</th>\n",
" <td>79</td>\n",
" <td>51.0</td>\n",
" <td>98.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jack</th>\n",
" <td>144</td>\n",
" <td>59.0</td>\n",
" <td>31.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En 数学\n",
"张三 104 31.0 72.0\n",
"李四 50 62.0 108.0\n",
"王五 79 51.0 98.0\n",
"Jack 144 59.0 31.0"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# dropna删除空值\n",
"df2.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>数学</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>张三</th>\n",
" <td>104</td>\n",
" <td>31.0</td>\n",
" <td>72.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>李四</th>\n",
" <td>50</td>\n",
" <td>62.0</td>\n",
" <td>108.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>王五</th>\n",
" <td>79</td>\n",
" <td>51.0</td>\n",
" <td>98.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jack</th>\n",
" <td>144</td>\n",
" <td>59.0</td>\n",
" <td>31.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En 数学\n",
"张三 104 31.0 72.0\n",
"李四 50 62.0 108.0\n",
"王五 79 51.0 98.0\n",
"Jack 144 59.0 31.0"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# axis 轴,坐标:X轴,y轴\n",
"# DataFrame二维的:行(0),列(1)\n",
"df2.dropna(axis = 0 )"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>数学</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>张三</th>\n",
" <td>104</td>\n",
" <td>31.0</td>\n",
" <td>72.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>李四</th>\n",
" <td>50</td>\n",
" <td>62.0</td>\n",
" <td>108.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>王五</th>\n",
" <td>79</td>\n",
" <td>51.0</td>\n",
" <td>98.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>老路</th>\n",
" <td>87</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jack</th>\n",
" <td>144</td>\n",
" <td>59.0</td>\n",
" <td>31.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En 数学\n",
"张三 104 31.0 72.0\n",
"李四 50 62.0 108.0\n",
"王五 79 51.0 98.0\n",
"老路 87 NaN NaN\n",
"Jack 144 59.0 31.0"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>张三</th>\n",
" <td>104</td>\n",
" </tr>\n",
" <tr>\n",
" <th>李四</th>\n",
" <td>50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>王五</th>\n",
" <td>79</td>\n",
" </tr>\n",
" <tr>\n",
" <th>老路</th>\n",
" <td>87</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Jack</th>\n",
" <td>144</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python\n",
"张三 104\n",
"李四 50\n",
"王五 79\n",
"老路 87\n",
"Jack 144"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.dropna(axis = 1)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}