Pandas SQL 关联对比
select 操作
sql: pandas:
select * from table
select colA,colB from table df.loc [ : , [ 'colA' , 'colB' ] ] 或 df [ [ 'colA' , ' colB ' ] ] 关键字:loc
where 操作
sql: pandas:
select * from table where colA = 'B' df [ df [ 'colA' ] == ' B ' ]
select * from table where colA = 'A' AND colB = 'B' df [ ( df [ 'colA' ] == ' A ') & ( df [ 'colB' ] == ' B ') ]
select colA,colB from table where colA = 'A' AND colB = 'B' df [ [ 'colA' , 'colB'] ] df2 = df [ ( df [ 'colA' ] == ' A ') & ( df [ 'colB' ] == ' B ') ]
in 操作
sql: pandas:
select * from table where colA in ('A' , 'B') df [ df [ 'colA' ] ]. isin ( [ 'A','B' ] ) ] 关键字:isin
select * from table where colA not in ('A' , 'B') df [ ~ df [ 'colA' ] ]. isin ( [ 'A','B' ] ) ] 关键字:~ 取反
group by 操作
sql: pandas:
select colA , sum( colC ) from table group by colA df.groupby( [ 'colA' ] ) [ 'colC' ].sum( ).reset_index( ) 关键字:reset_index( )的作用是将groupby后的index进行重置,以保持数据二维表结构
order by 操作
sql: pandas:
select colA , sum( colC ) from table group by colA order by sum ( colC ) df.groupby( [ 'colA' ] ) [ 'colC' ].sum( ).reset_index( ).sort_values( by = [ 'colC' ] , ascending = True ) 关键字:sort_values
limit 操作
sql: pandas:
select * from table limit 20 df [ :20] 关键字:df切片
case when 操作
sql: pandas:
select colA, ( case when colC > 60 then '及格' else '叫家长' end ) from table df [ '成绩' ] = df.colC.map (lambda x : '及格' if x > 60 else '叫家长' ) 关键字:map() apply() applymap()
join 操作
sql: pandas:
select A.* , B.* from tableA left join tableB on A.col = B.col df3 = pd.merge(df,df2,how='left',on=[ 'colA' ] 关键字:pd.merge() how参数可以改变join模式 {‘left’,'right','outer','inner'}
union all 操作
sql: pandas:
A union B pd.concat( [ A , B ] )
A union B union all C pd.concat( [ A , B , C ] ) 关键字:pd.concat( )
distinct 操作
sql: pandas:
select distinct colA from table where colB is null df.loc[ df ['colB'].isnull( ) ,'colA' ].unique() 关键字:unique( )
多列算术 操作
sql: pandas:
select distinct colA from table df.loc[ df ['colB'].isnull( ) ,'colA' ].unique() 关键字:unique( )