pyspark到mysql

1 先上整体代码

import sys
sys.path.append('/data/gtair/tools/DWH')
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine('mysql+pymysql://edu_ws:QbvkrBHKXO@chb2gc-pd-edu-dws.mysql.rds.aliyuncs.com:3306/edu_platform')

df_spark =ss.sql("""
select 

--行为项
pdate as statistic,
---教学行为-核心行为:备课
case when fngroup ='202002' then '编辑课程标准'
when fngroup in ('209001' , '209002' , '209003' , '209004',  '209005' , '209006' , '209007'  ,'209008')   then '上传课程资源'
when fngroup in ('202003', '203003') then '新建/编排教案'
---教学行为-核心行为:课堂授课
when fncode in  ('2070010007','2070010008','2070010009','2070010010') then '浏览前导知识点'
when fncode in  ('2070010014','2070010015','2070010016','2070010017','2070010018','2070010019','2070010020','2070010021',
                 '2070010022','2070010023','2070010024','2070010025','2070010026','2070010027','2070010028') then '执行教案'
when fncode in  ('2070010001','2070010002','2070010003','2070010004','2070010005','2070010006','2070010011','2070010012') then '组织课堂互动'
when fncode ='2070010013' then '使用工具软件'
---教学行为-核心行为:课下辅导
when fncode in  ('2070010150','2070010151','2070010152','2070010153') then '发布课前预习任务'
when fncode in  ('2070010250','2070010251','2070010252','2070010253') then '发布课后作业任务'
when fncode in  ('2070010154','2070010254','2070010354') then '查看学习结果'
when fncode in  ('2070010900','2070010901','2070010902','2070010903','2070010904','2070010905','2070010906','2070010907','2070010908','2070010909',
                 '2070030900','2070030901','2070030902','2070030903','2070030904','2070030905','2070030906','2070030907','2070030908','2070030909') then '批阅作业'
---教学行为-核心行为:考评行为
--'浏览班级成绩'
--'导出考评结果'
---管理行为-核心行为:教务管理
when fngroup ='208001' then '组建班级'
when fngroup ='208002' then '导入学生'
---管理行为核心行为:教学质量分析
--'浏览学情分析、查看课堂记录'
---非业务行为-核心行为:登录等
when fncode ='2010010000' then '登录等' 
else '未知'
end as action_list,

--租户id,院校id,使用课程id,用户id,子行为集合id
get_json_object(query,'$.tenantId') as tenant_id, 
get_json_object(query,'$.schoolId') as colg_idcd, 
get_json_object(query,'$.courseId') as course_id,  
get_json_object(query,'$.userId')   as user_id,  
get_json_object(query,'$.opSetId')  as opset_id, 
--求时间差(单位为秒‘1631557318’)
round((sort_array(collect_set(unix_timestamp(trigertime,'yyyy-MM-dd HH:mm:ss')))[size(sort_array(collect_set(unix_timestamp(trigertime,'yyyy-MM-dd HH:mm:ss'))))-1]-
sort_array(collect_set(unix_timestamp(trigertime,'yyyy-MM-dd HH:mm:ss')))[0])/60,2) as duration_minutes ,
count(*) as pv
from pub.dm_pub_view_userlog_gux_func_add  where query <>'' and pdate='20210914' and pcode ='81978'  and get_json_object(query,'$.utype') ='teacher'

group by pdate,
---教学行为-核心行为:备课
case when fngroup ='202002' then '编辑课程标准'
when fngroup in ('209001' , '209002' , '209003' , '209004',  '209005' , '209006' , '209007'  ,'209008')   then '上传课程资源'
when fngroup in ('202003', '203003') then '新建/编排教案'
---教学行为-核心行为:课堂授课
when fncode in  ('2070010007','2070010008','2070010009','2070010010') then '浏览前导知识点'
when fncode in  ('2070010014','2070010015','2070010016','2070010017','2070010018','2070010019','2070010020','2070010021',
                 '2070010022','2070010023','2070010024','2070010025','2070010026','2070010027','2070010028') then '执行教案'
when fncode in  ('2070010001','2070010002','2070010003','2070010004','2070010005','2070010006','2070010011','2070010012') then '组织课堂互动'
when fncode ='2070010013' then '使用工具软件'
---教学行为-核心行为:课下辅导
when fncode in  ('2070010150','2070010151','2070010152','2070010153') then '发布课前预习任务'
when fncode in  ('2070010250','2070010251','2070010252','2070010253') then '发布课后作业任务'
when fncode in  ('2070010154','2070010254','2070010354') then '查看学习结果'
when fncode in  ('2070010900','2070010901','2070010902','2070010903','2070010904','2070010905','2070010906','2070010907','2070010908','2070010909',
                 '2070030900','2070030901','2070030902','2070030903','2070030904','2070030905','2070030906','2070030907','2070030908','2070030909') then '批阅作业'
---教学行为-核心行为:考评行为
--'浏览班级成绩'
--'导出考评结果'   
---管理行为-核心行为:教务管理
when fngroup ='208001' then '组建班级'
when fngroup ='208002' then '导入学生'
---管理行为核心行为:教学质量分析
--'浏览学情分析、查看课堂记录'
---非业务行为-核心行为:登录等
when fncode ='2010010000' then '登录等'
else '未知'
end,
get_json_object(query,'$.tenantId'), 
get_json_object(query,'$.schoolId'), 
get_json_object(query,'$.courseId'),  
get_json_object(query,'$.userId')  ,
get_json_object(query,'$.opSetId') 
--limit 220 

""").toPandas()
                 
          
pd.io.sql.to_sql(df_spark, 'ads_jxpt_teacher_actions', engine,if_exists='append',index=False)
print('done')

2 hql应用数组
2.1 本sql是解决多行数据中最大时间与最小时间的时间差是多少分钟。原数据样本如下

id	type	version	user_id	trigertime	partition
1	Collector_WebApp	0.3.23	--502c636d3713	2021-09-12 10:10:20.531	20210912
2	Collector_WebApp	0.3.23	--502c636d3713	2021-09-12 10:10:22.531	20210912
3	Collector_WebApp	0.3.23	--502c636d3713	2021-09-12 10:10:23.531	20210912
4	Collector_WebApp	0.3.23	--502c636d3713	2021-09-12 10:10:24.531	20210912
5	Collector_WebApp	0.3.23	--502c636d3713	2021-09-12 10:10:30.531	20210912

2.2 sql思路:
a 转换trigertime字段类型为unix_timestamp,名为trigertime。
b collect_set(trigertime) 行转列,这里也可以用collect_list。此时多个trigertime 变为无序的数组,名为trigertime。
c sort_array(trigertime)使之从小到大有序排列。依然是数组属性。名为trigertime。
d 此时数组中 trigertime[size(trigertime-1]是最大;trigertime[0]是最小。相减即为时间差。/60 转变单位为分钟即可。

round((sort_array(collect_set(unix_timestamp(trigertime,'yyyy-MM-dd HH:mm:ss')))[size(sort_array(collect_set(unix_timestamp(trigertime,'yyyy-MM-dd HH:mm:ss'))))-1]-
sort_array(collect_set(unix_timestamp(trigertime,'yyyy-MM-dd HH:mm:ss')))[0])/60,2) as duration_minutes 

3 df写入mysql

pd.io.sql.to_sql(df_spark, 'ads_jxpt_teacher_actions', engine,if_exists='append',index=False)
print('done')
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

独狐游清湖

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值