KSQL实战之music_demo

--The STREAM and TABLE names are prefixed with `ksql_` to enable you to run this demo
--concurrently with the Kafka Streams Music Demo java application, to avoid conflicting names


--The play-events Kafka topic is a feed of song plays, generated by KafkaMusicExampleDriver
--We need to identify fields in schema, even with Avro, because this is a script (https://github.com/confluentinc/ksql/issues/1031)
CREATE STREAM ksql_playevents (SONG_ID BIGINT, DURATION BIGINT) WITH (KAFKA_TOPIC='play-events', VALUE_FORMAT='AVRO');

--Filter the play events to only accept events where the duration is >= 30 seconds
CREATE STREAM ksql_playevents_min_duration AS SELECT * FROM ksql_playevents WHERE DURATION > 30000;

--The song-feed Kafka topic contains all of the songs available in the streaming service, generated by KafkaMusicExampleDriver
--The messages in this Kafka topic have keys of type Long but TABLE requires keys of type String
--The following three KSQL commands result in a TABLE with Long keys
CREATE STREAM ksql_songfeed (ID BIGINT, ALBUM VARCHAR, ARTIST VARCHAR, NAME VARCHAR, GENRE VARCHAR) WITH (KAFKA_TOPIC='song-feed', VALUE_FORMAT='AVRO');
CREATE STREAM ksql_songfeedwithkey WITH (KAFKA_TOPIC='KSQL_SONGFEEDWITHKEY', VALUE_FORMAT='AVRO') AS SELECT CAST(ID AS STRING) as ID, ALBUM, ARTIST, NAME, GENRE FROM ksql_songfeed PARTITION BY ID;
CREATE TABLE ksql_songtable (ID VARCHAR, ALBUM VARCHAR, ARTIST VARCHAR, NAME VARCHAR, GENRE VARCHAR) WITH (KAFKA_TOPIC='KSQL_SONGFEEDWITHKEY', VALUE_FORMAT='Avro', KEY='ID');

--Join the plays with song as we will use it later for charting
--Also create a fixed key `KEYCOL` for global view across multiple partitions (https://github.com/confluentinc/ksql/issues/1053)
CREATE STREAM ksql_songplays AS SELECT plays.SONG_ID AS ID, ALBUM, ARTIST, NAME, GENRE, DURATION, 1 AS KEYCOL FROM ksql_playevents_min_duration plays LEFT JOIN ksql_songtable songtable ON plays.SONG_ID = songtable.ID;

--Track song play counts in 30 second intervals
CREATE TABLE ksql_songplaycounts30 AS SELECT ID, NAME, GENRE, KEYCOL, COUNT(*) AS COUNT FROM ksql_songplays WINDOW TUMBLING (size 30 second) GROUP BY ID, NAME, GENRE, KEYCOL;
--Convert TABLE to STREAM
CREATE STREAM ksql_songplaycounts30stream (ID BIGINT, NAME VARCHAR, GENRE VARCHAR, KEYCOL INT, COUNT BIGINT) WITH (kafka_topic='KSQL_SONGPLAYCOUNTS30', value_format='AVRO');
--Get all data into a STREAM with a single partition, using the `KEYCOL` field described earlier
CREATE STREAM ksql_songplaycounts30streampart AS SELECT * FROM ksql_songplaycounts30stream WHERE ROWTIME is not null PARTITION BY KEYCOL;

--Track song play counts for all time
CREATE TABLE ksql_songplaycounts AS SELECT ID, NAME, GENRE, KEYCOL, COUNT(*) AS COUNT FROM ksql_songplays GROUP BY ID, NAME, GENRE, KEYCOL;
--Convert TABLE to STREAM
CREATE STREAM ksql_songplaycountsstream (ID BIGINT, NAME VARCHAR, GENRE VARCHAR, KEYCOL INT, COUNT BIGINT) WITH (kafka_topic='KSQL_SONGPLAYCOUNTS', value_format='AVRO');
--Get all data into a STREAM with a single partition, using the `KEYCOL` field described earlier
CREATE STREAM ksql_songplaycountsstreampart AS SELECT * FROM ksql_songplaycountsstream WHERE ROWTIME is not null PARTITION BY KEYCOL;

--Top Five song counts for all time based on ksql_songplaycountsstreampart
--At this time, `TOPK` does not support sorting by one column and selecting the value of another column (https://github.com/confluentinc/ksql/issues/403)
--So the results are just counts but not names of the songs associated with the counts
CREATE TABLE ksql_top5 AS SELECT KEYCOL, TOPK(COUNT,5) FROM ksql_songplaycountsstreampart GROUP BY KEYCOL;
--Top Five songs for each genre based on each WINDOW of ksql_songplaycounts
CREATE TABLE ksql_top5bygenre AS SELECT GENRE, TOPK(COUNT,5) FROM ksql_songplaycountsstreampart GROUP BY GENRE;
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值