9 外部数据获取
# 1.csv或文本文件_读取
data = pd.read_csv("test.csv")
r = pd.read_table("test.csv",delimiter=",")
r.head()
# 写入
data = pd.DataFrame(data = np.random.randn(80,4),
index = pd.date_range(start = '27/6/2010',periods=80),
columns=list('ABCD'))
data.to_csv("test.csv",index=False)
# 2.excel文件_读取
#!pip3 install xlwt\!pip3 install xlrd
data=pd.read_excel("文件")
#写入
data.to_excel("aa.xls",sheet_name="Score",index=False)
# 3.读取mysql 数据(不建议用pymysql)_方法1
#!pip install pymysql #安装pymysql
#import pymysql #引用pymysql
#con =pymysql.connect(host="127.0.0.1", user="root" ,password="123456",port=3306,db="edu") #链接
#df1 = pd.read_sql_query("select * from dw_user",con)
#df1.head()
#mysql读取_方法2
import sqlalchemy
from sqlalchemy import create_engine
#con=create_engine("mysql+pymysql://用户名:密码@服务器端口/数据库名?charset=utf8")
con =create_engine("mysql+pymysql://root:123456@localhost:3306/stu?charset=utf8") #链接
pd.read_sql("select * from sanguo",con)
#写入到csv
#df1.to_csv("test1.txt",index=False)
#写入数据库
df1["学历"]="本科"
#df1.to_sql("表名",con)
pd.read_sql("show tables",con)
dataframe数据框创建
pandas.DataFrame( data, index, columns)
data:一组数据(ndarray,series,tuple, lists, dict 等类型)。
index:索引值,或者可以称为行标签。
columns:列标签,默认为 RangeIndex (0, 1, 2, …, n) 。
10.1.0.1 列表创建
import pandas as pd
import numpy as np
l1 = [[1,"a"],[2,"bbbb"],[3,"d"],[4,"c"]]
df1 = pd.DataFrame(l1,columns=["id","name"],index=["a1","b2","c3","d4"])
df1
id name
a1 1 a
b2 2 bbbb
c3 3 d
d4 4 c
10.1.0.2 元祖创建
t1 =((1,"a"),(2,"b"),(3,"b"),(4,"c"))
df1 = pd.DataFrame(t1,columns=["id","name"])
df1
id name
0 1 a
1 2 b
2 3 b
3 4 c
10.1.0.3 序列Series创建
#Series(一个元素s1\s2,为一行)
s1=pd.Series([1,2,3,4])
s2=pd.Series([1,2,3,4])
df1=pd.DataFrame([s1,s2])
df1
0 1 2 3
0 1 2 3 4
1 1 2 3 4
10.1.0.4 字典创建
# 字典
dic = {"id":[4,5,6,7,8,9],"age":[20,23,31,35,None,10],
"name":['a', 'None', 'c', 'd', None, 'f']}
df = pd.DataFrame(dic)
df
id age name
0 4 20.0 a
1 5 23.0 None
2 6 31.0 c
3 7 35.0 d
4 8 NaN None
5 9 10.0 f