## pandas简介
Pandas中一共有三种数据结构,分别为:Series、DataFrame和MultiIndex。
Python Data Analysis Library,面板数据(panel data)和python数据分析(data analysis)。最初由AQR Capital Management于2008年4月开发,并于2009年底开源出来,目前由专注于Python数据包开发的PyData开发team继续开发和维护,属于PyData项目的一部分。Pandas最初被作为金融数据分析工具而开发出来,因此,pandas为时间序列分析提供了很好的支持。
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "9f7e44d2",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "23c5a15f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 0.959924\n",
"1 0.057490\n",
"2 0.395029\n",
"3 0.861239\n",
"4 0.332671\n",
"dtype: float64\n",
"0 0.959924\n",
"1 0.057490\n",
"2 0.395029\n",
"3 0.861239\n",
"dtype: float64\n",
"0 0.959924\n",
"1 0.057490\n",
"2 0.395029\n",
"3 0.861239\n",
"dtype: float64\n",
"0 0.959924\n",
"2 0.395029\n",
"4 0.332671\n",
"dtype: float64\n",
"0 20.000000\n",
"1 20.000000\n",
"2 20.000000\n",
"3 0.861239\n",
"4 0.332671\n",
"dtype: float64\n"
]
}
],
"source": [
"#切片索引\n",
"s=pd.Series(np.random.rand(5))\n",
"print(s)\n",
"print(s[0:4])\n",
"print(s[:-1])\n",
"print(s[::2])\n",
"#修改值\n",
"s[:-2]=20\n",
"print(s)\n"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "949b7461",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 64.755105\n",
"1 50.714969\n",
"2 52.834138\n",
"3 89.628520\n",
"4 69.999119\n",
"dtype: float64\n",
"##############################\n",
"0 64.755105\n",
"1 50.714969\n",
"2 52.834138\n",
"3 89.628520\n",
"4 NaN\n",
"dtype: float64\n",
"##############################\n",
"0 True\n",
"1 False\n",
"2 False\n",
"3 True\n",
"4 False\n",
"dtype: bool <class 'pandas.core.series.Series'>\n",
"##############################\n",
"1 50.714969\n",
"2 52.834138\n",
"dtype: float64\n",
"##############################\n",
"0 False\n",
"1 False\n",
"2 False\n",
"3 False\n",
"4 True\n",
"dtype: bool\n",
"##############################\n",
"0 True\n",
"1 True\n",
"2 True\n",
"3 True\n",
"4 False\n",
"dtype: bool\n",
"##############################\n",
"4 NaN\n",
"dtype: float64\n"
]
}
],
"source": [
"#布尔索引\n",
"np.random.seed(88)\n",
"s=pd.Series(np.random.rand(5)*100)\n",
"print(s)\n",
"print('#'*30)\n",
"s[4]=None\n",
"print(s)\n",
"print('#'*30)\n",
"bol=s>55\n",
"print(bol,type(bol))\n",
"print('#'*30)\n",
"#通过布尔series获取值\n",
"print(s[s<55])\n",
"print('#'*30)\n",
"#查看series的方法\n",
"bol2=s.isnull()\n",
"print(bol2)\n",
"'''返回false没有缺失值返回true有缺失值 s.nonull()函数则相反'''\n",
"print('#'*30)\n",
"bol3=s.notnull()\n",
"print(bol3)\n",
"print('#'*30)\n",
"#获取空值\n",
"print(s[bol2])"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "6791fc89",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 64.755105\n",
"1 50.714969\n",
"2 52.834138\n",
"3 89.628520\n",
"4 69.999119\n",
"5 71.429710\n",
"6 71.733838\n",
"7 22.281946\n",
"8 17.515452\n",
"9 45.684149\n",
"dtype: float64\n",
"##############################\n",
"0 64.755105\n",
"dtype: float64\n",
"9 45.684149\n",
"dtype: float64\n"
]
}
],
"source": [
"#pandas数据结构series技巧----数据查看,重新索引,对齐,增,删,改\n",
"#数据查看\n",
"np.random.seed(88)\n",
"s=pd.Series(np.random.rand(10)*100)\n",
"print(s)\n",
"print('#'*30)\n",
"'''head,tail方法'''\n",
"print(s.head(1))\n",
"print(s.tail(1))\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "c97af1c5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 64.755105\n",
"1 50.714969\n",
"2 52.834138\n",
"3 89.628520\n",
"4 69.999119\n",
"dtype: float64\n",
"##############################\n",
"c NaN\n",
"d NaN\n",
"a NaN\n",
"e NaN\n",
"f NaN\n",
"dtype: float64\n",
"c 11.000000\n",
"d 11.000000\n",
"a 11.000000\n",
"e 11.000000\n",
"f 11.000000\n",
"0 64.755105\n",
"1 50.714969\n",
"2 52.834138\n",
"3 89.628520\n",
"4 69.999119\n",
"5 11.000000\n",
"dtype: float64\n"
]
}
],
"source": [
"#重新索引\n",
"np.random.seed(88)\n",
"s=pd.Series(np.random.rand(5)*100)\n",
"print(s)\n",
"print('#'*30)\n",
"'''重新索引后面value为null'''\n",
"s1=s.reindex(['c','d','a','e','f'])\n",
"print(s1)\n",
"s2=s.reindex(['c','d','a','e','f',0,1,2,3,4,5],fill_value=11)\n",
"print(s2)"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "43356734",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"a 64.755105\n",
"b 50.714969\n",
"c 52.834138\n",
"dtype: float64\n",
"##############################\n",
"a 89.628520\n",
"e 69.999119\n",
"f 71.429710\n",
"dtype: float64\n",
"##############################\n",
"a 154.383625\n",
"b NaN\n",
"c NaN\n",
"e NaN\n",
"f NaN\n",
"dtype: float64\n"
]
}
],
"source": [
"#对齐\n",
"np.random.seed(88)\n",
"s1=pd.Series(np.random.rand(3)*100,index=['a','b','c'])\n",
"s2=pd.Series(np.random.rand(3)*100,index=['a','e','f'])\n",
"print(s1)\n",
"print('#'*30)\n",
"print(s2)\n",
"print('#'*30)\n",
"print(s1+s2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "faa8a408",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "e32f916a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "1d54af1d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5fcdd26",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "28345851",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "d1ff6b29",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "fb30e137",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "51ac6517",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa8f3c3a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5e09818",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "8fdfb24a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}