pandas和SQL对应关系

import pandas as pd
import numpy as np
#from pandasql import sqldf

url = 'https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/tips.csv'
tips = pd.read_csv(url)
tips.head()

#   SELECT
#       'total_bill','tip','sex','smoker','time'
#   FROM TIPS
#   LIMIT 5;
tips[['total_bill','tip','sex','smoker','time']].head()

#    SELECT
#    *
#    FROM TIPS
#    WHERE TIME='Dinner'
#    LIMIT 5;

tips[tips['time']=='Dinner'].head()

#    SELECT * 
#    FROM TIPS
#    WHERE TIME ='Dinner' AND TIP >5.00

tips[(tips['time']=='Dinner') & (tips['tip']>5.00)].head()

#    SELECT *
#    FROM TIPS
#    WHERE SIZE >=5 OR TOTAL_BILL >45;
tips[(tips['size']>=5) | (tips['total_bill']>45.00)]


#    SELECT SEX,COUNT(*)
#    FROM TIPS
#    GROUP BY SEX
tips.groupby('sex').size()
tips.groupby('sex').count()
tips.groupby('sex')['total_bill'].count()

#    SELECT DAY,AVG(TIP),COUNT(*)
#    FROM TIPS
#    GROUP BY DAY;
tips.groupby('day').agg({'tip':np.mean,'day':np.size})
    
#    SELECT SMOKER,DAY,COUNT(*),AVG(TIP)
#    FROM TIPS
#    GROUP BY SMOKER,DAY
#   tips.groupby(['smoker','day']).agg({'tip':np.mean,'day':np.size})
tips.groupby(['smoker','day']).agg({'tip':[np.mean,np.size]})



#   JOIN
df1 = pd.DataFrame({'key':['A','B','C','D'],
                    'value':np.random.randn(4)})
df2 = pd.DataFrame({'key':['B','D','D','E'],
                    'value':np.random.randn(4)})

# INNER JOIN

#    SELECT * 
#    FROM DF1
#        INNER JOIN DF2
#            ON DF1.KEY = DF2.KEY

pd.merge(df1,df2,on='key')

#LEFT OUTER JOIN

#    SELECT *
#    FROM DF1
#      LEFT OUTER JOIN DF2
#        ON DF1.KEY = DF2.KEY
pd.merge(df1,df2,on='key',how='left')

#RIGHT JOIN

#    SELECT *
#    FROM DF1
#      RIGHT OUTER JOIN DF2
#        ON DF1.KEY = DF2.KEY
pd.merge(df1,df2,on='key',how='right')

#FULL JOIN

#    SELECT *
#    FROM DF1
#      FULL OUTER JOIN DF2
#        ON DF1.KEY = DF2.KEY
pd.merge(df1,df2,on='key',how='outer')

#UNOIN

df1 = pd.DataFrame({'city':['Chicago','San Francisco','New York City'],
                    'rank':range(1,4)})

df2 = pd.DataFrame({'city':['Chicago','Boston','Los Angeles'],
                    'rank':[1,4,5]})

#    SELECT CITY,RANK
#    FROM DF1
#    UNION ALL
#    SELECT CITY,RANK
#    FROM DF2
pd.concat([df1,df2])

#    SELECT CITY,RANK
#    FROM DF1
#    UNION 
#    SELECT CITY,RANK
#    FROM DF2

pd.concat([df1,df2]).drop_duplicates()

#TOP N ROWS WITH OFFSET(MYSQL)

#    SELECT *
#    FROM TIPS
#    ORDER BY TIP DESC
#    LIMIT 10 OFFSET 5
tips.nlargest(10+5,columns='tip').tail(10)


#TOP N ROWS PER GROUP(ORACLE`S ROW_NUMBER() ANALYTIC FUNCTION)

#    SELECT * FROM(
#            SELECT T.*
#            ROW_NUM() OVER( PARTITION BY DAY ORDER BY TOTAL_BILL DESC) AS RN
#            FROM TIPS T
#            )
#    WHERE RN<3
#    ORDER BY DAY,RN
(tips.assign(rn=tips.sort_values(['total_bill'],ascending=False)
                    .groupby(['day'])
                    .cumcount()+1)
        .query('rn<3')
        .sort_values(['day','rn']))

# the same using rank(method='first')function,
(tips.assign(rnk=tips.groupby(['day'])['total_bill']
                    .rank(method='first',ascending=False))
        .query('rnk<3')
        .sort_values(['day','rnk']))

#ORACLE`S RANK() ANALYTIC FUNCITON
#    SELECT * FROM(
#            SELECT
#                T.*,
#                RANK() OVER(PATITION BY SEX ORDER BY TIP) AS RNK
#            FROM TIPS T
#            WHERE TIP < 2
#            )
#    WHERE RNK < 3
#    ORDER BY SEX,RNK

(tips[tips['tip']<2]
    .assign(rnk_min=tips.groupby(['sex'])['tip']
                        .rank(method='min'))
    .query('rnk_min<3')
    .sort_values(['sex','rnk_min']))

#update
    
#    UPDATE TIPS
#    SET TIP = TIP*2
#    WHERE TIP<2    
tips.loc[tips['tip']<2,'tip']*=2

#DELETE

#    DELETE FROM TIPS
#    WHERE TIP >9
tips = tips.loc[tips['tip']<=9]
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值