SELECT,WHERE,DISTINCT,LIMIT
SQL写法:
SELECT * FROM table_name --取出table中所有数据
SELECT * FROM table_name limit 3 --取出table中的前3行数据
SELECT * FROM table_name WHERE column_name = 'abc' --从table中取出 column 为 ‘abc’ 的所有数据
SELECT DISTINCT column_name from table_name --取出table中以 column 为唯一项的所有行
Pandas写法:
#这里的df_table就是table所有数据,保存在一个dataframe中
df_table #取出table中所有数据
df_table.head(3) #取出table中的前3行数据
df_table[df_table.column_name == 'abc'] #从table中取出 column 为 ‘abc’ 的所有数据
df_table.column_name.unique() #取出table中以 column 为唯一项的所有行
SELECT 时有多个条件时的写法
SQL写法:
-- 选择 column0 = ‘abc’ 并且 column1 = ‘123’ 的所有数据
SELECT * FROM table_name WHERE column_name0 = 'abc' AND column_name1 = '123'
-- 选择 table 的指定列,并且 column0 = ‘abc’ 且 column1 = ‘123’
SELECT column_name2, column_name3, column_name4 FROM table_name WHERE column_name0 = 'abc' AND column_name1 = '123'
Pandas写法:
# 选择 column0 = ‘abc’ 并且 column1 = ‘123’ 的所有数据
df_table[(column_name0 == 'abc') & (column_name1 == '123')]
# 选择 table 的指定列,并且 column0 = ‘abc’ 且 column1 = ‘123’
df_table[(column_name0 == 'abc') & (column_name1 == '123')][['column_name2', 'column_name3', 'column_name4']]
ORDER BY
SQL写法:
SELECT * FROM table_name WHERE column_name0 = 'abc' ORDER BY column_name1
SELECT * FROM table_name WHERE column_name0 = 'abc' ORDER BY column_name1 DESC
Pandas写法:
df_table[df_table.column_name0 == 'abc'].sort_values('column_name1')
df_table[df_table.column_name0 == 'abc'].sort_values('column_name1', ascending=False)
IN... NOT IN
SQL写法:
SELECT * FROM table_name WHERE column_name0 IN ('abc','bcd','cde')
SELECT * FROM table_name WHERE column_name0 NOT IN ('abc', 'bcd', 'cde')
Pandas写法:
df_table[df_table.column0_name.isin(['abc', 'bcd', 'cde'])]
df_table[~df_table.column0_name.isin(['abc', 'bcd', 'ced'])]
GROUP BY,COUNT,ORDER BY
SQL写法:
SELECT column_name0, column_name1, count(*) FROM table_name GROUP BY column_name0,column_name1 ORDER BY column_name0,column_name1
SELECT column_name0, column_name1, count(*) FROM table_name GROUP BY column_name0,column_name1 ORDER BY column_name0,count(*) desc
Pandas写法:
df_table.groupby(['column_name0', 'column_name1']).size()
df_table.groupby(['column_name0', 'column_name1']).size().to_frame('size').reset_index().sort_values(['column_name0', 'size'], ascending=[True, False])
# .to_frame()和 .reset_index()说明
# 因为我们想要按计算字段(大小)排序,所以此字段需要成
# 为DataFrame的一部分。在Pandas中进行分组后,我们得到了一个不同的类型,称为GroupByObject。所以
# 我们需要将其转换回DataFrame。使用 .reset_index(),我们重新启动数据帧的行编号。
HAVING
SQL写法:
SELECT column_name0,count(*)
FROM table_name
WHERE column_name1 = 'abc'
GROUP BY column_name0
HAVING count(*)>100
ORDER BY count(*) DESC
Pandas写法:
df_table[df_table.column_name1 == 'abc'].groupby('column_name0').filter(lamda x:len(x)>100).groupby('column_name0').size().sort_values(ascending=FALSE)
获取第N条数据
SQL写法:
SELECT column_name0 FROM table_name ORDER BY SIZE DESC LIMIT 5
SELECT column_name0 FROM table_name ORDER BY SIZE DESC LIMIT 5 OFFSET 5
Pandas写法:
df_table.nlargest(5, columns='column_name0')
df_table.nlargest(10, columns='column_name0').tail(5)
最大值、最小值、平均值、中值
SQL写法:
SELECT max(column_name0), min(column_name0), mean(column_name0), median(column_name0) FROM table_name
Pandas写法:
df_table.agg({
'column_name0': ['min', 'max', 'mean', 'median']
})
JOIN
SQL写法:
SELECT column_name0, column_name1, column_name2, column_name3
FROM table_name0
JOIN table_name1
ON table_name0.column_name0 = table_name1.column_name0
WHERE table_name1.column_name1 = 'abc'
Pandas写法:
df_table0.merge(
df_table1[df_table1.column_name1=='abc'][['column_name0']],
left_on='column_name0',
right_on='column_name0',
how='inner'
)[['column_name0','column_name1','column_name2','column_name3']]
UNION ALL、UNION
SQL写法:
SELECT name,muni FROM table_name WHERE id='abc' UNION ALL SELECT name,muni FROM table_name WHERE id='bcd'
Pandas写法:
pd.concat(
[
df_table[df_table.id=='abc'][['name', 'muni']],
df_table[df_table.id=='bcd'][['name', 'muni']]
]
)
INSERT
SQL写法:
CREATE TABLE heros(id INTEGER, name TEXT)
INSERT INTO heros VALUES(1, 'abc')
INSERT INTO heros VALUES(2, 'bcd')
INSERT INTO heros VALUES(3, 'cde')
Pandas写法:
df1 = pd.DataFrame({'id':[1,2], 'name':['abc', 'bcd']})
df2 = pd.DataFrame({'id':[3], 'name':['cde']})
pd.concat([df1, df2]).reset_index(drop=True)
UPDATE
SQL写法:
UPDATE apt SET home='abc' WHERE id=1
Pandas写法:
df_apt.loc[df_apt['id']==1, 'home']='abc'
DELETE
SQL写法:
DELETE FROM table_name WHERE type='abc'
Pandas写法:
df_table = df_table[df_table.type != 'abc']
或者:
df_table.drop(df_table[df_table.type=='abc'].index)