主要内容
- SparkStreaming与SparkSQL结合使用
- 用FastJsonUtil工具类解析Json
环境
Spark2.1.0
Scala2.11.8
Kafka 0.10.0
Maven Dependencys
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
<repositories>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql-kafka-0-10_2.11</artifactId>
<version>2.1.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.44</version>
</dependency>
</dependencies>
FastJsonUtil工具类
package com.spark.kafka.utils;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.List;
import java.util.Map;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.TypeReference;
import com.alibaba.fastjson.serializer.JSONLibDataFormatSerializer;
import com.alibaba.fastjson.serializer.SerializeConfig;
import com.alibaba.fastjson.serializer.SerializerFeature;
/**
*
*
* @author Xiaozhu
* @version V1.0
* @Title: FastJsonUtil.java
* @Package com.xiaozhu.base.util
* @Description:fastjson工具类
*/
public class FastJsonUtil {
private static final SerializeConfig config;
static {
config = new SerializeConfig();
config.put(java.util.Date.class, new JSONLibDataFormatSerializer()); // 使用和json-lib兼容的日期输出格式
config.put(java.sql.Date.class, new JSONLibDataFormatSerializer()); // 使用和json-lib兼容的日期输出格式
}
private static final SerializerFeature[] features = {SerializerFeature.WriteMapNullValue, // 输出空置字段
SerializerFeature.WriteNullListAsEmpty, // list字段如果为null,输出为[],而不是null
SerializerFeature.WriteNullNumberAsZero, // 数值字段如果为null,输出为0,而不是null
SerializerFeature.WriteNullBooleanAsFalse, // Boolean字段如果为null,输出为false,而不是null
SerializerFeature.WriteNullStringAsEmpty, // 字符类型字段如果为null,输出为"",而不是null
SerializerFeature.PrettyFormat //是否需要格式化输出Json数据
};
/**
*
* @param object
* @return Return:String Description:将对象转成成Json对象
*/
public static String toJSONString(Object object) {
return JSON.toJSONString(object, config, features);
}
/**
*
* @param object
* @return Return:String Description:使用和json-lib兼容的日期输出格式
*/
public static String toJSONNoFeatures(Object object) {
return JSON.toJSONString(object, config);
}
/**
*
* @param jsonStr
* @return Return:Object Description:将Json数据转换成JSONObject
*/
public static JSONObject toJsonObj(String jsonStr) {
return (JSONObject) JSON.parse(jsonStr);
}
/**
*
* @param jsonStr
* @param clazz
* @return Return:T Description:将Json数据转换成Object
*/
public static <T> T toBean(String jsonStr, Class<T> clazz) {
return JSON.parseObject(jsonStr, clazz);
}
/**
*
* @param jsonStr
* @return Return:Object[] Description:将Json数据转换为数组
*/
public static <T> Object[] toArray(String jsonStr) {
return toArray(jsonStr, null);
}
/**
*
* @param jsonStr
* @param clazz
* @return Return:Object[] Description:将Json数据转换为数组
*/
public static <T> Object[] toArray(String jsonStr, Class<T> clazz) {
return JSON.parseArray(jsonStr, clazz).toArray();
}
/**
*
* @param jsonStr
* @param clazz
* @return Return:List<T> Description:将Json数据转换为List
*/
public static <T> List<T> toList(String jsonStr, Class<T> clazz) {
return JSON.parseArray(jsonStr, clazz);
}
/**
*
* @param
* @return
*/
public static JSONObject beanToJsonObj(Object bean) {
String jsonStr = JSON.toJSONString(bean);
JSONObject objectJson = (JSONObject) JSON.parse(jsonStr);
return objectJson;
}
/**
* json字符串转化为map
*
* @return
*/
public static Map<?, ?> stringToCollect(String jsonStr) {
Map<?, ?> map = JSONObject.parseObject(jsonStr);
return map;
}
/**
* 将map转化为string
*
* @return
*/
public static String collectToString(Map<?, ?> map) {
String jsonStr = JSONObject.toJSONString(map);
return jsonStr;
}
/**
*
* @param t
* @param file
* @throws IOException Return:void Description:将对象的Json数据写入文件。
*/
public static <T> void writeJsonToFile(T t, File file) throws IOException {
String jsonStr = JSONObject.toJSONString(t, SerializerFeature.PrettyFormat);
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file)));
bw.write(jsonStr);
bw.close();
}
/**
*
* @param t
* @param filename
* @throws IOException Return:void Description:将对象的Json数据写入文件。
*/
public static <T> void writeJsonToFile(T t, String filename) throws IOException {
writeJsonToFile(t, new File(filename));
}
/**
*
* @param cls
* @param file
* @return
* @throws IOException Return:T Description:将文件中的Json数据转换成Object对象
*/
public static <T> T readJsonFromFile(Class<T> cls, File file) throws IOException {
StringBuilder strBuilder = new StringBuilder();
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
String line = null;
while ((line = br.readLine()) != null) {
strBuilder.append(line);
}
br.close();
return JSONObject.parseObject(strBuilder.toString(), cls);
}
/**
*
* @param cls
* @param filename
* @return
* @throws IOException Return:T Description:将文件中的Json数据转换成Object对象
*/
public static <T> T readJsonFromFile(Class<T> cls, String filename) throws IOException {
return readJsonFromFile(cls, new File(filename));
}
/**
*
* @param typeReference
* @param file
* @return
* @throws IOException Return:T Description:从文件中读取出Json对象
*/
public static <T> T readJsonFromFile(TypeReference<T> typeReference, File file) throws IOException {
StringBuilder strBuilder = new StringBuilder();
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
String line = null;
while ((line = br.readLine()) != null) {
strBuilder.append(line);
}
br.close();
return JSONObject.parseObject(strBuilder.toString(), typeReference);
}
/**
*
* @param typeReference
* @param filename
* @return
* @throws IOException Return:T Description:从文件中读取出Json对象
*/
public static <T> T readJsonFromFile(TypeReference<T> typeReference, String filename) throws IOException {
return readJsonFromFile(typeReference, new File(filename));
}
}
LieNiGwSparkStreamingToSql代码
package com.spark.kafka.loganalysis
import com.spark.kafka.domain.{LieniLog, LieniMessageLog}
import com.spark.kafka.utils.FastJsonUtil
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
object LieNiGwSparkStreamingToSql extends Serializable {
def main(args: Array[String]) = {
val conf = new SparkConf()
.setAppName("LieNiGwSparkStreamingToSql")
.setMaster("local[2]")
val ssc: StreamingContext = new StreamingContext(conf, Seconds(5))
ssc.checkpoint("D:/1/checkpoint")
val topics = Array("topic_all").toSet
val brokers: String = "hadoop-02:9092,hadoop-03:9092,hadoop-04:9092"
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> brokers,
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "qq",
"auto.offset.reset" -> "latest", //latest earliest
"enable.auto.commit" -> (false: java.lang.Boolean)
)
/**
* 创建Kafka数据流
*/
val stream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](topics, kafkaParams))
/**
* 用FastJsonUtil工具类 解析Json
* scala的最后一行就是返回值
*/
val jsonData: DStream[String] = stream.map(x => {
val lieNiLog = FastJsonUtil.toBean(x.value(), classOf[LieniLog])
val lieNiMessageLog = FastJsonUtil.toBean(lieNiLog.getMessage, classOf[LieniMessageLog])
(lieNiMessageLog.getIp + "," + lieNiMessageLog.getEvt)
})
/**
* foreachRDD处理DStream
*/
jsonData.foreachRDD(rdd => {
// Get the singleton instance of SparkSession
val spark = SparkSession.builder()
.config(rdd.sparkContext.getConf)
.config("spark.default.parallelism", "1")
.config("spark.sql.shuffle.partitions", "1")
.getOrCreate()
import spark.implicits._ //用于RDD To DF
// Convert RDD[String] to DataFrame
val wordsDataFrame = rdd.map(w => w.split(",")).map(w => Record(w(0), w(1))).toDF()
// Create a temporary view
wordsDataFrame.createOrReplaceTempView("table")
// Do word count on DataFrame using SQL and print it
val wordCountsDataFrame = spark.sql("select * from table")
wordCountsDataFrame.show()
})
ssc.start()
ssc.awaitTermination()
}
case class Record(ip: String, evt: String)
}
代码已验证,可执行
感谢阅读,希望能帮助到大家,谢谢大家的支持!
FastJsonUtil工具类在网上下载的,改了改忘记是哪位博主=-=