SparkStreaming、Kafka实战

最新推荐文章于 2021-04-22 00:35:22 发布

小猪的博客

最新推荐文章于 2021-04-22 00:35:22 发布

阅读量163

点赞数 1

分类专栏： Spark 文章标签：大数据 SparkStreaming Kafka 流计算

本文链接：https://blog.csdn.net/qq_32470399/article/details/91049256

版权

Spark 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

主要内容

SparkStreaming与SparkSQL结合使用
用FastJsonUtil工具类解析Json

环境

Spark2.1.0

Scala2.11.8

Kafka 0.10.0

Maven Dependencys

<repositories>        
    <repository>
            <id>cloudera</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
    </repository>    
<repositories>  
<dependencies>   
    <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql-kafka-0-10_2.11</artifactId>
            <version>2.1.0</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.44</version>
        </dependency>
 </dependencies>

FastJsonUtil工具类

package com.spark.kafka.utils;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.List;
import java.util.Map;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.TypeReference;
import com.alibaba.fastjson.serializer.JSONLibDataFormatSerializer;
import com.alibaba.fastjson.serializer.SerializeConfig;
import com.alibaba.fastjson.serializer.SerializerFeature;

/**
 *   
 *
 * @author Xiaozhu 
 * @version V1.0  
 * @Title: FastJsonUtil.java
 * @Package com.xiaozhu.base.util
 * @Description:fastjson工具类
 */
public class FastJsonUtil {

    private static final SerializeConfig config;

    static {
        config = new SerializeConfig();
        config.put(java.util.Date.class, new JSONLibDataFormatSerializer()); // 使用和json-lib兼容的日期输出格式
        config.put(java.sql.Date.class, new JSONLibDataFormatSerializer()); // 使用和json-lib兼容的日期输出格式
    }

    private static final SerializerFeature[] features = {SerializerFeature.WriteMapNullValue, // 输出空置字段
            SerializerFeature.WriteNullListAsEmpty, // list字段如果为null，输出为[]，而不是null
            SerializerFeature.WriteNullNumberAsZero, // 数值字段如果为null，输出为0，而不是null
            SerializerFeature.WriteNullBooleanAsFalse, // Boolean字段如果为null，输出为false，而不是null
            SerializerFeature.WriteNullStringAsEmpty, // 字符类型字段如果为null，输出为""，而不是null
            SerializerFeature.PrettyFormat  //是否需要格式化输出Json数据
    };

    /**
     *
     * @param object
     * @return Return:String Description:将对象转成成Json对象
     */
    public static String toJSONString(Object object) {
        return JSON.toJSONString(object, config, features);
    }

    /**
     *
     * @param object
     * @return Return:String Description:使用和json-lib兼容的日期输出格式
     */
    public static String toJSONNoFeatures(Object object) {
        return JSON.toJSONString(object, config);
    }

    /**
     *
     * @param jsonStr
     * @return Return:Object Description:将Json数据转换成JSONObject
     */
    public static JSONObject toJsonObj(String jsonStr) {
        return (JSONObject) JSON.parse(jsonStr);
    }

    /**
     *
     * @param jsonStr
     * @param clazz
     * @return Return:T Description:将Json数据转换成Object
     */
    public static <T> T toBean(String jsonStr, Class<T> clazz) {
        return JSON.parseObject(jsonStr, clazz);
    }

    /**
     *
     * @param jsonStr
     * @return Return:Object[] Description:将Json数据转换为数组
     */
    public static <T> Object[] toArray(String jsonStr) {
        return toArray(jsonStr, null);
    }

    /**
     *
     * @param jsonStr
     * @param clazz
     * @return Return:Object[] Description:将Json数据转换为数组
     */
    public static <T> Object[] toArray(String jsonStr, Class<T> clazz) {
        return JSON.parseArray(jsonStr, clazz).toArray();
    }

    /**
     *
     * @param jsonStr
     * @param clazz
     * @return Return:List<T> Description:将Json数据转换为List
     */
    public static <T> List<T> toList(String jsonStr, Class<T> clazz) {
        return JSON.parseArray(jsonStr, clazz);
    }

    /**
     *
     * @param
     * @return
     */
    public static JSONObject beanToJsonObj(Object bean) {
        String jsonStr = JSON.toJSONString(bean);
        JSONObject objectJson = (JSONObject) JSON.parse(jsonStr);
        return objectJson;
    }

    /**
     * json字符串转化为map
     *
     * @return
     */
    public static Map<?, ?> stringToCollect(String jsonStr) {
        Map<?, ?> map = JSONObject.parseObject(jsonStr);
        return map;
    }

    /**
     * 将map转化为string
     *
     * @return
     */
    public static String collectToString(Map<?, ?> map) {
        String jsonStr = JSONObject.toJSONString(map);
        return jsonStr;
    }

    /**
     *
     * @param t
     * @param file
     * @throws IOException Return:void Description:将对象的Json数据写入文件。
     */
    public static <T> void writeJsonToFile(T t, File file) throws IOException {
        String jsonStr = JSONObject.toJSONString(t, SerializerFeature.PrettyFormat);
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file)));
        bw.write(jsonStr);
        bw.close();
    }

    /**
     *
     * @param t
     * @param filename
     * @throws IOException Return:void Description:将对象的Json数据写入文件。
     */
    public static <T> void writeJsonToFile(T t, String filename) throws IOException {
        writeJsonToFile(t, new File(filename));
    }

    /**
     *
     * @param cls
     * @param file
     * @return
     * @throws IOException Return:T Description:将文件中的Json数据转换成Object对象
     */
    public static <T> T readJsonFromFile(Class<T> cls, File file) throws IOException {
        StringBuilder strBuilder = new StringBuilder();
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
        String line = null;
        while ((line = br.readLine()) != null) {
            strBuilder.append(line);
        }
        br.close();
        return JSONObject.parseObject(strBuilder.toString(), cls);
    }

    /**
     *
     * @param cls
     * @param filename
     * @return
     * @throws IOException Return:T Description:将文件中的Json数据转换成Object对象
     */
    public static <T> T readJsonFromFile(Class<T> cls, String filename) throws IOException {
        return readJsonFromFile(cls, new File(filename));
    }

    /**
     *
     * @param typeReference
     * @param file
     * @return
     * @throws IOException Return:T Description:从文件中读取出Json对象
     */
    public static <T> T readJsonFromFile(TypeReference<T> typeReference, File file) throws IOException {
        StringBuilder strBuilder = new StringBuilder();
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
        String line = null;
        while ((line = br.readLine()) != null) {
            strBuilder.append(line);
        }
        br.close();
        return JSONObject.parseObject(strBuilder.toString(), typeReference);
    }

    /**
     *
     * @param typeReference
     * @param filename
     * @return
     * @throws IOException Return:T Description:从文件中读取出Json对象
     */
    public static <T> T readJsonFromFile(TypeReference<T> typeReference, String filename) throws IOException {
        return readJsonFromFile(typeReference, new File(filename));
    }

}

LieNiGwSparkStreamingToSql代码

package com.spark.kafka.loganalysis

import com.spark.kafka.domain.{LieniLog, LieniMessageLog}
import com.spark.kafka.utils.FastJsonUtil
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}

object LieNiGwSparkStreamingToSql extends Serializable {
  def main(args: Array[String]) = {
    val conf = new SparkConf()
      .setAppName("LieNiGwSparkStreamingToSql")
      .setMaster("local[2]")
    val ssc: StreamingContext = new StreamingContext(conf, Seconds(5))
    ssc.checkpoint("D:/1/checkpoint")
    val topics = Array("topic_all").toSet
    val brokers: String = "hadoop-02:9092,hadoop-03:9092,hadoop-04:9092"
    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> brokers,
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "qq",
      "auto.offset.reset" -> "latest", //latest earliest
      "enable.auto.commit" -> (false: java.lang.Boolean)
    )
    /**
      * 创建Kafka数据流
      */
    val stream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(ssc,
      LocationStrategies.PreferConsistent,
      ConsumerStrategies.Subscribe[String, String](topics, kafkaParams))

    /**
      * 用FastJsonUtil工具类 解析Json
      * scala的最后一行就是返回值
      */
    val jsonData: DStream[String] = stream.map(x => {
      val lieNiLog = FastJsonUtil.toBean(x.value(), classOf[LieniLog])
      val lieNiMessageLog = FastJsonUtil.toBean(lieNiLog.getMessage, classOf[LieniMessageLog])
      (lieNiMessageLog.getIp + "," + lieNiMessageLog.getEvt)
    })

    /**
      * foreachRDD处理DStream
      */
    jsonData.foreachRDD(rdd => {
      // Get the singleton instance of SparkSession
      val spark = SparkSession.builder()
        .config(rdd.sparkContext.getConf)
        .config("spark.default.parallelism", "1")
        .config("spark.sql.shuffle.partitions", "1")
        .getOrCreate()
      import spark.implicits._ //用于RDD To DF
      // Convert RDD[String] to DataFrame
      val wordsDataFrame = rdd.map(w => w.split(",")).map(w => Record(w(0), w(1))).toDF()
      // Create a temporary view
      wordsDataFrame.createOrReplaceTempView("table")
      // Do word count on DataFrame using SQL and print it
      val wordCountsDataFrame = spark.sql("select * from table")
      wordCountsDataFrame.show()
    })

    ssc.start()
    ssc.awaitTermination()
  }

  case class Record(ip: String, evt: String)

}

代码已验证，可执行

感谢阅读，希望能帮助到大家，谢谢大家的支持！

FastJsonUtil工具类在网上下载的，改了改忘记是哪位博主=-=

小猪的博客

关注

1
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
SparkStreaming、Kafka实战

主要内容SparkStreaming与SparkSQL结合使用用FastJsonUtil工具类解析Json环境Spark2.1.0Scala2.11.8Kafka 0.10.0MavenDependencys<repositories> <repository> <id>cloud...
复制链接

扫一扫