study SQL by using pyspark(part one)

#%%
from pyspark import SparkConf,SparkContext
from pyspark import sql
from pyspark.sql import SparkSession
spark = SparkSession.builder.config(conf=SparkConf()).getOrCreate()
sc = spark.sparkContext

# %%
rdd1 = sc.parallelize([(22,'xiaobao'),(23,'chuyu'),(23,'caffe')])
df1 = spark.createDataFrame(rdd1,['age','name'])
# %%
df1.createOrReplaceTempView("mytable")
df1.show()
# %%
spark.sql("select * from mytable where name='xiaobao'").show()
# %%
jdbcDF = spark.read.format("jdbc")\
    .option("driver","com.mysql.jdbc.Driver")\
    .option("url","jdbc:mysql://***.***.**.***:3306/Gadaite")\
    .option("dbtable","audi")\
    .option("user","root")\
    .option("password","*******")\
    .load()
# %%
jdbcDF.createOrReplaceTempView("temptable")
spark.sql("select * from temptable").show(10)
# %%
#所有奥迪A1的数量
spark.sql("select * from temptable").show()
# %%
spark.sql("select * from temptable where engineSize = 2.0").show()
#%%
spark.sql("select * from temptable where year=2018").show()
# %%
spark.sql("select * from temptable where model='A4'").show()
# %%
spark.sql("select count(*) from temptable where trim(model)='A4'").show()

# %%
spark.sql("select * from temptable where trim(model)='A4'").show()

# %%
spark.sql("""
    select * 
    from temptable 
    where trim(model)='A4' 
    order by 
    price limit 15
    """).show()
# %%

#判断是否区分大小写
#结果区分大小写
spark.sql("""
    select * from temptable 
    where trim(transmission)='Manual' 
    limit 15
    """).show()
spark.sql("""
    select * from temptable 
    where trim(transmission)='manual' 
    limit 15
    """).show()
# %%
#-------%%%通配符
spark.sql("""
    select * from temptable 
    where fuelType like 'Pet%'
    """).show()
# %%
spark.sql("""
    select * from temptable 
    where mpg like '%1.4%'
    """).show()
# %%
spark.sql("""
    select * from temptable 
    where not engineSize like '%2.0%'
    """).show()
# %%
spark.sql("""
    select * from temptable 
    where ((engineSize like '%2.0%') 
    and (fuelType = 'Petrol'))
    """).show()
# %%
#组合计算
#此处明显验证出model前面有空格
spark.sql("""
    select Concat(transmission,model) 
    from temptable 
    order by price desc 
    limit 10
    """).show()
# %%
spark.sql("""
    select Concat(transmission,ltrim(model)) 
    from temptable 
    order by price desc 
    limit 10
    """).show()
#%%
spark.sql("""
    select Concat(transmission,'------',ltrim(model)) 
    from temptable 
    order by price desc 
    limit 10
    """).show()
# %%
spark.sql("""
    select Concat(transmission,'------',ltrim(model)) 
    as car_info 
    from temptable 
    order by price desc 
    limit 10
    """).show()
# %%
spark.sql("""
    select model,year,price,mileage,engineSize, 
    price / engineSize as per_engine_price 
    from temptable 
    order by price desc 
    limit 10
    """).show()
# %%
#测试计算
spark.sql("select 3*2,trim(' riven  ')").show()
# %%
"""
    以下不可用,会报错
    spark.sql("select Curdate()")
"""
# %%
spark.sql("select * from temptable").show()
# %%
#转换成大写字符串
spark.sql("""select  
    transmission ,fuelType,
    upper(transmission) as TRANSMISSION,
    UPPER(fuelType) AS FUELTYPE  
    from temptable 
    LIMIT 15 
    """).show()
# %%
#转换成小写字符串
spark.sql("""select  
    transmission ,fuelType,
    lower(transmission) as transmission,
    lower(fuelType) AS fueltype  
    from temptable 
    LIMIT 15 
    """).show()
# %%
#返回字符串的长度
spark.sql("""select  
    transmission ,fuelType,
    length(transmission) as transmission_lens,
    length(fuelType) AS fuelType_lens  
    from temptable 
    LIMIT 15 
    """).show()
#%%
#返回字符串左起的的n个字符
spark.sql("""select  
    transmission ,fuelType,
    left(transmission,2) as transmission_two,
    left(fuelType,3) AS fueltype_3  
    from temptable 
    LIMIT 15 
    """).show()
# %%
#返回字符串右起的n个字符
spark.sql("""select  
    transmission ,fuelType,
    right(transmission,3) as transmission_two,
    right(fuelType,3) AS fueltype_3  
    from temptable 
    LIMIT 15 
    """).show()
#%%
#提取字符串的组成部分,类似于python的切片操作
#substr(a,b,c),y也可用substring
spark.sql("""select  
    transmission ,fuelType,
    substr(transmission,2,2) as transmission_res,
    substr(fuelType,-2,3) AS fueltype_res   
    from temptable 
    LIMIT 15 
    """).show()
spark.sql("""select  
    transmission ,fuelType,
    substring(transmission,2,2) as transmission_res,
    substring(fuelType,-2,3) AS fueltype_res   
    from temptable 
    LIMIT 15 
    """).show()
# %%
# #返回字符串长度的其它函数写法,pyspark不支持
# spark.sql("""select  
#     transmission ,fuelType,
#     len(transmission) as transmission_lens,
#     len(fuelType) AS fuelType_lens  
#     from temptable 
#     LIMIT 15 
#     """).show()
# %%
#soundex描述其语音表示字母数字模式的算法
spark.sql("""select  
    fuelType 
    from temptable 
    where soundex(fuelType) = soundex("Petol") 
    LIMIT 15 
    """).show()
# %%
spark.sql("""
    select * from temptable limit 10
    """).show()
#%%
from pyspark.sql.functions import *
#%%
#日期和时间处理函数
spark.sql("""
    select * from temptable limit 5
    """).show()
# %%
#换个带时间的表
jdbcdf = spark.read.format("jdbc")\
    .option("driver","com.mysql.jdbc.Driver")\
    .option("url","jdbc:mysql://***.***.**.***:3306/Gadaite")\
    .option("dbtable","footbshootouts")\
    .option("user","root")\
    .option("password","*******")\
    .load()
# %%
jdbcdf.createOrReplaceTempView("mytable")
# %%
spark.sql("""select 
    * from mytable limit 20   
    """).show()
# %%
#字段需要时date,不是varchar
# spark.sql("""select 
#     winner from mytable 
#     where datepart(yy,date)=1973
#     """).show()

# %%
spark.sql("""
    select winner,away_team,date,home_team,
    upper(left(away_team,2) || left(winner,3)) 
    as winer_info from mytable 
    limit 15
    """).show()
# %%
spark.sql("""
    select winner,away_team,
    left(away_team,2) || left(winner,3) as winer_info 
    from mytable 
    limit 15
    """).show()
# %%
spark.sql("""
    select winner,away_team,
    left(away_team,2) as winer_info 
    from mytable 
    limit 15
    """).show()
# %%
spark.sql("""
    select winner,away_team,
    left(winner,3) as winer_info 
    from mytable 
    limit 15
    """).show()
# %%
spark.sql("""
    select winner,away_team,
    (away_team || winner) as winer_info 
    from mytable 
    limit 15
    """).show()
# %%
spark.sql("""
    select winner,away_team,
    (away_team || winner) as winer_info 
    from mytable 
    limit 15
    """).show()
# %%
spark.sql("select count(*) as count from mytable").show()
# %%
spark.sql("""
    select winner,away_team,
    (away_team || winner) as winer_info 
    from mytable 
    limit 15
    """).show()
# %%
spark.sql("""
    select winner,away_team,date,home_team,
    upper(left(away_team,2) || "---" || left(winner,3)) 
    as winer_info from mytable 
    limit 15
    """).show()
# %%

就不截图了,仅作记录

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值