22. pandas 中 sql 语句
两种:
sqlite3
sqlalchemy
22.1 sqlite3使用案例
>>> snd = pd.read_csv("sndHsPr.csv")
>>> snd
dist roomnum halls AREA floor subway school price
0 chaoyang 1 0 46.06 middle 1 0 48850
1 chaoyang 1 1 59.09 middle 1 0 46540
... ... ... ... ... ... ... ... ...
16208 chaoyang 1 1 42.89 high 1 0 52693
16209 fengtai 2 1 89.90 middle 1 0 46719
16210 rows × 8 columns
>>> import sqlite3
# 在内存中读取
>>> con = sqlite3.connect(":memory:")
>>> con
<sqlite3.Connection at 0x2737b028f10>
# 将读取进来的csv文件 ------> dataframe对象 ------> 可以使用sql语句的表
# 将dataframe对象转化成表
>>> snd.to_sql('snddb',con)
# 使用sql语句来进行查询
>>> pd.read_sql_query('select * from snddb',con)
index dist roomnum halls AREA floor subway school price
0 0 chaoyang 1 0 46.06 middle 1 0 48850
1 1 chaoyang 1 1 59.09 middle 1 0 46540
2 2 haidian 5 2 278.95 high 1 1 71662
... ... ... ... ... ... ... ... ... ...
16207 16207 chaoyang 2 1 61.79 high 1 0 50170
16208 16208 chaoyang 1 1 42.89 high 1 0 52693
16209 16209 fengtai 2 1 89.90 middle 1 0 46719
16210 rows × 9 columns
# 取出dist对象中的数据,并且去重
>>> pd.read_sql('select distinct dist from snddb',con)
dist
0 chaoyang
1 haidian
2 fengtai
3 xicheng
4 dongcheng
5 shijingshan
# 每个地区的均价,按照均价进行排序 升序
>>> pd.read_sql('select dist,avg(price) from snddb group by dist order by
avg(price)',con)
dist avg(price)
0 shijingshan 40286.889574
1 fengtai 42500.904309
2 chaoyang 52800.624651
3 haidian 68757.602261
4 dongcheng 71883.595041
5 xicheng 85674.778545
22.2 sqlalchemy使用案例
>>> from sqlalchemy import create_engine
# 建立连接
>>> engine = create_engine('mysql+pymysql://root:rootadmin@localhost/new_db?charset=utf8')
# 读取csv
>>> sale = pd.read_csv('One.csv',encoding='gbk')
>>> sale
x a
0 1 a
1 1 a
2 1 b
3 2 c
4 3 v
5 4 e
6 6 g
#将dataframe对象填写进入数据库
>>> sale.to_sql('One',engine)
>>> pd.read_sql('select * from One',engine)
index x a
0 0 1 a
1 1 1 a
2 2 1 b
3 3 2 c
4 4 3 v
5 5 4 e
6 6 6 g