Kafka Stream将一个topic的数据进行业务处理写入到另一个topic中(拆分开每个id的分类关系)

1. 编写flume执行文件

vi eventsattend-flume-kafka.conf

eventattend.sources=eventAttendSource
eventattend.channels=eventAttendChannel
eventattend.sinks=eventAttendSink

eventattend.sources.eventAttendSource.type=spooldir
eventattend.sources.eventAttendSource.spoolDir=/opt/flume160/conf/jobkb09/dataSourceFile/eventAttend
eventattend.sources.eventAttendSource.deserializer=LINE
eventattend.sources.eventAttendSource.deserializer.maxLineLength=320000
eventattend.sources.eventAttendSource.includePattern=eventAttend_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
eventattend.sources.eventAttendSource.interceptors=head_filter
eventattend.sources.eventAttendSource.interceptors.head_filter.type=regex_filter
eventattend.sources.eventAttendSource.interceptors.head_filter.regex=^event,yes*
eventattend.sources.eventAttendSource.interceptors.head_filter.excludeEvents=true

eventattend.channels.eventAttendChannel.type=file
eventattend.channels.eventAttendChannel.checkpointDir=/opt/flume160/conf/jobkb09/checkPointFile/eventAttend
eventattend.channels.eventAttendChannel.dataDirs=/opt/flume160/conf/jobkb09/dataChannelFile/eventAttend

eventattend.sinks.eventAttendSink.type=org.apache.flume.sink.kafka.KafkaSink
eventattend.sinks.eventAttendSink.batchSize=640
eventattend.sinks.eventAttendSink.brokerList=192.168.116.60:9092
eventattend.sinks.eventAttendSink.topic=eventAttend_raw

eventattend.sources.eventAttendSource.channels=eventAttendChannel
eventattend.sinks.eventAttendSink.channel=eventAttendChannel

执行见博客:https://blog.csdn.net/weixin_43434273/article/details/110880886

2. kafka stream 将一个topic里的消息进行业务处理,再写入到另一个topic进行消费(拆分开每个id的分类关系)

2.1 以 12112,1211,yes 的格式消费出来

2.2 代码:

package cn.bright.kafka;

import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.*;
import org.apache.kafka.streams.kstream.KStream;

import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;

/**
 * @Author Bright
 * @Date 2020/12/18
 * @Description
 */
public class EventAttendStream {
    public static void main(String[] args) {
        Properties prop = new Properties();
        prop.put(StreamsConfig.APPLICATION_ID_CONFIG,"eventattendapp1");
        prop.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.116.60:9092");
        prop.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
        prop.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG,Serdes.String().getClass());

        StreamsBuilder builder = new StreamsBuilder();
        KStream<Object, Object> ear = builder.stream("eventAttend_raw");

        KStream<String, String> eventKStream = ear.flatMap((k, v) -> {               //event,yes,maybe,invited,no
            System.out.println(k + " " + v);   //去除 121, 的情况
            String[] split = v.toString().split(",");
            List<KeyValue<String, String>> list = new ArrayList<>();
            if (split.length >= 2 && split[1].trim().length() > 0) {
                String[] yes = split[1].split("\\s+");
                for (String y :
                        yes) {
                    list.add(new KeyValue<String, String>(null, split[0] + "," + y + ",yes"));  //12112,1211,yes
                }
            }
            if (split.length >= 3 && split[2].trim().length() > 0) {
                String[] maybe = split[2].split("\\s+");
                for (String mb :
                        maybe) {
                    list.add(new KeyValue<String, String>(null, split[0] + "," + mb + ",maybe"));  //12112,1211,maybe
                }
            }
            if (split.length >= 4 && split[3].trim().length() > 0) {
                String[] invited = split[3].split("\\s+");
                for (String inv :
                        invited) {
                    list.add(new KeyValue<String, String>(null, split[0] + "," + inv + ",invited"));  //12112,1211,invited
                }
            }
            if (split.length >= 5 && split[4].trim().length() > 0) {
                String[] no = split[4].split("\\s+");
                for (String n :
                        no) {
                    list.add(new KeyValue<String, String>(null, split[0] + "," + n + ",no"));  //12112,1211,no
                }
            }

            return list;
        });

        eventKStream.to("event_attendees");

        final Topology topo =builder.build();
        final KafkaStreams streams = new KafkaStreams(topo, prop);

        final CountDownLatch latch = new CountDownLatch(1);
        Runtime.getRuntime().addShutdownHook(new Thread("stream"){

            @Override
            public void run() {
                streams.close();
                latch.countDown();
            }
        });
        streams.start();
        try {
            latch.await();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        System.exit(0);
    }
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是一个基本的Spark程序,用于消费Kafka数据并将其写入HBase: ```scala import org.apache.spark.SparkConf import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.kafka.KafkaUtils import org.apache.spark.sql.SparkSession import org.apache.spark.sql.functions._ import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.hadoop.hbase.client.{Connection, ConnectionFactory, Put} import org.apache.hadoop.hbase.util.Bytes object KafkaToHBase { def main(args: Array[String]) { val conf = new SparkConf().setAppName("KafkaToHBase") val ssc = new StreamingContext(conf, Seconds(5)) val kafkaParams = Map("metadata.broker.list" -> "localhost:9092") val topics = Set("my-topic") val stream = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder]( ssc, kafkaParams, topics) val spark = SparkSession.builder().appName("KafkaToHBase").getOrCreate() stream.foreachRDD { rdd => if (!rdd.isEmpty()) { val df = spark.read.json(rdd.map(_._2)) val hbaseConf = HBaseConfiguration.create() val connection: Connection = ConnectionFactory.createConnection(hbaseConf) val table = connection.getTable(TableName.valueOf("my-table")) df.select("id", "name", "age").foreach { row => val put = new Put(Bytes.toBytes(row.getAs[String]("id"))) put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("name"), Bytes.toBytes(row.getAs[String]("name"))) put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("age"), Bytes.toBytes(row.getAs[Int]("age"))) table.put(put) } table.close() connection.close() } } ssc.start() ssc.awaitTermination() } } ``` 这个程序假设Kafka主题的消息是JSON格式的,并将其读入一个Spark DataFrame。然后,它将DataFrame的每一行写入到HBase表。在此示例,HBase表有一个名为“cf”的列族,包含两个列:“name”和“age”。您需要将程序的“my-topic”和“my-table”替换为您自己的Kafka主题和HBase表名。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值