import pandas as pd
import os
# os.chdir(r"D:\Python_Training\script_Python\5Preprocessing")
sale=pd.read_csv(r".\sale.csv",encoding="gbk")
sale.head()
year
market
sale
profit
0
2010
东
33912
2641
1
2010
南
32246
2699
2
2010
西
34792
2574
3
2010
北
31884
2673
4
2011
东
31651
2437
5.1 SQL语句介绍
SQL2数据过滤与排序
选择表中指定列
5.1.1 SQL数据库连接
import sqlite3 # sqlite3相当于轻量版,更多功能可使用SQLAlchemy
con = sqlite3.connect(':memory:')# 数据库连接
sale.to_sql('sale', con)# 将DataFrame注册成可用sql查询的表
newTable = pd.read_sql_query("select year, market, sale, profit from sale", con)# 也可使用read_sql
newTable.head()
year
market
sale
profit
0
2010
东
33912
2641
1
2010
南
32246
2699
2
2010
西
34792
2574
3
2010
北
31884
2673
4
2011
东
31651
2437
5.1.2 选择表中所有列
sqlResult = pd.read_sql_query('select * from sale', con)
sqlResult.head()
index
year
market
sale
profit
0
0
2010
东
33912
2641
1
1
2010
南
32246
2699
2
2
2010
西
34792
2574
3
3
2010
北
31884
2673
4
4
2011
东
31651
2437
5.1.3 删除重复的行
pd.read_sql_query("select DISTINCT year from sale", con)
year
0
2010
1
2011
2
2012
5.1.4 选择满足条件的行
pd.read_sql_query("select * from sale where market in ('东','西') and year=2012", con)
index
year
market
sale
profit
0
8
2012
东
31619
2106
1
10
2012
西
32103
2593
5.1.5 对行进行排序
sql ='''select year, market, sale, profit
from sale
order by sale desc'''
pd.read_sql_query(sql, con).head()
year
market
sale
profit
0
2010
西
34792
2574
1
2011
西
34175
2877
2
2010
东
33912
2641
3
2012
南
32443
3124
4
2010
南
32246
2699
5.2纵向连接表
one = pd.read_csv(r"One.csv")
one.to_sql('One', con, index=False)
one.T
0
1
2
3
4
5
6
x
1
1
1
2
3
4
6
a
a
a
b
c
v
e
g
two = pd.read_csv(r"Two.csv")
two.to_sql('Two', con, index=False)
two.T
0
1
2
3
4
x
1
2
3
3
5
b
x
y
z
v
w
5.2.1 union 和 union all
UNION:返回两个结果集的并集
union剔除重复值
union all不剔除重复值
union = pd.read_sql('select * from one UNION select * from two', con)
union_all = pd.read_sql('select * from one UNION ALL select * from two', con)
union.T
0
1
2
3
4
5
6
7
8
9
x
1
1
1
2
2
3
3
4
5
6
a
a
b
x
c
y
v
z
e
w
g
union_all.T
0
1
2
3
4
5
6
7
8
9
10
11
x
1
1
1
2
3
4
6
1
2
3
3
5
a
a
a
b
c
v
e
g
x
y
z
v
w
5.2.2 EXCEPT 和 INTERSECT
EXCEPT: 返回两个结果集的差(即从左查询中返回右查询没有找到的所有非重复值)
INTERSECT: 返回两个结果集的交集(即两个查询都返回的所有非重复值)
exceptTable = pd.read_sql('select * from one EXCEPT select * from two', con)
intersectTable = pd.read_sql('select * from one INTERSECT select * from two', con)
exceptTable.T
pd.read_sql("select * from table1 as a inner join table2 as b on a.id=b.id", con)# pd.read_sql("select * from table1 as a, table2 as b where a.id=b.id", con)
id
a
id
b
0
3
c
3
e
4、左连接
pd.read_sql("select * from table1 as a left join table2 as b on a.id=b.id", con)