手机APP分析平台项目(一)
手机APP分析平台项目(二)
手机APP分析平台项目(三)
手机APP分析平台项目(四)
5.1.9 将数据存储到hive和hbase中
启动hive客户端,创建数据库:create database youmeng;
创建日志表:
create external table youmeng.apperrorlog(
createtime string,
appId string,
deviceId string,
appVersion string,
appChannel string,
appPlatform string,
osType string,
deviceStyle string,
errorBrief string,
errorDetail string,
timesecond string
)
PARTITIONED BY (dates String)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/flume/appErrorLog/';
插入数据:alter table youmeng.apperrorlog add partition (dates="20-09-15") location '20-09-15';
删除数据:alter table youmeng.apperrorlog drop partition (dates="20-09-15");
HbaseUtils
package com.hbase;
import java.io.IOException;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import com.entry.AppErrorBatchInfoCount;
import com.entry.AppErrorBatchInfoHotErrorWord;
import net.sf.json.JSONObject;
/**
* 将数据进行转换,之后保存到hbase中
* 提前创建hbase的表: create 'apperrorinfo','youmeng77'
*/
public class HbaseUtils {
// 创建hbase配置对象
private static Configuration conf = HBaseConfiguration.create();
// 通过连接工厂创建连接对象
private static Connection conn = null;
// 获得table对象
private static Table table = null;
static {
// 使用eclipse时必须添加这个,否则无法定位
conf.set("hbase.zookeeper.quorum","node7-1,node7-2,node7-3");
conf.set("hbase.client.scanner.timeout.period", "6000");
conf.set("hbase.rpc.timeout", "6000");
try {
conn = ConnectionFactory.createConnection(conf);
// 数据将要存储到apperrorinfo表中
table = conn.getTable(TableName.valueOf("apperrorinfo"));
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 每个设备的错误出现次数
* @throws IOException
*/
public static void insertCountData(AppErrorBatchInfoCount appErrorBatchInfoCount) throws IOException{
// 获取数据
String time = appErrorBatchInfoCount.getTime();
String diveceId = appErrorBatchInfoCount.getDeviceId();
String appChannel = appErrorBatchInfoCount.getAppChannel();
String appId = appErrorBatchInfoCount.getAppId();
String appPlatform = appErrorBatchInfoCount.getAppPlatform();
String appVersion = appErrorBatchInfoCount.getAppVersion();
String deviceStyle = appErrorBatchInfoCount.getDeviceStyle();
String osType = appErrorBatchInfoCount.getOsType();
long count = appErrorBatchInfoCount.getCount();
// 设备id当做rowkey
byte[] rowkey = Bytes.toBytes(diveceId);
Put put = new Put(rowkey);
// 对数据进行转换,存储到hbase中
put.addColumn(Bytes.toBytes(appId), Bytes.toBytes("time"), Bytes.toBytes(time));
put.addColumn(Bytes.toBytes(appId), Bytes.toBytes("appChannel"), Bytes.toBytes(appChannel));
put.addColumn(Bytes.toBytes(appId), Bytes.toBytes("appId"), Bytes.toBytes(appId));
put.addColumn(Bytes.toBytes(appId), Bytes.toBytes("appPlatform"), Bytes.toBytes(appPlatform));
put.addColumn(Bytes.toBytes(appId), Bytes.toBytes("appVersion"), Bytes.toBytes(appVersion));
put.addColumn(Bytes.toBytes(appId), Bytes.toBytes("deviceStyle"), Bytes.toBytes(deviceStyle));
put.addColumn(Bytes.toBytes(appId), Bytes.toBytes("osType"), Bytes.toBytes(osType));
put.addColumn(Bytes.toBytes(appId), Bytes.toBytes("count"), Bytes.toBytes(count + ""));
table.put(put);
table.close();
}
/**
* 每个设备在定制错误词汇的出现热度
* @throws IOException
*/
public static void insertHotWordData(AppErrorBatchInfoHotErrorWord appErrorBatchInfoHotErrorWord) throws IOException{
// 存储错误词汇
Map<String,Long> wordMap = appErrorBatchInfoHotErrorWord.getWordMap();
// 将jsonObject数据转换为字符串
JSONObject jsonObject = JSONObject.fromObject(wordMap);
String jsonString = jsonObject.toString();
// 获取数据
Long totalNumber = appErrorBatchInfoHotErrorWord.getTotalNumber();
String time = appErrorBatchInfoHotErrorWord.getTime();
String appId = appErrorBatchInfoHotErrorWord.getAppId();
String diveceId = appErrorBatchInfoHotErrorWord.getDeviceId();
String appVersion = appErrorBatchInfoHotErrorWord.getAppVersion();
String appChannel = appErrorBatchInfoHotErrorWord.getAppChannel();
String appPlatform = appErrorBatchInfoHotErrorWord.getAppPlatform();
String osType = appErrorBatchInfoHotErrorWord.getOsType();
String deviceStyle = appErrorBatchInfoHotErrorWord.getDeviceStyle();
// 将diveceId设置为rokey
byte[] rowkey = Bytes.toBytes(diveceId);
Put put = new Put(rowkey);
// 对数据进行转换,存储到hbase中
put.addColumn(Bytes.toBytes(appId), Bytes.toBytes("hotWordMap"), Bytes.toBytes(jsonString + ""));
put.addColumn(Bytes.toBytes(appId), Bytes.toBytes("totalNumber"), Bytes.toBytes(totalNumber + ""));
put.addColumn(Bytes.toBytes(appId), Bytes.toBytes("time"), Bytes.toBytes(time));
put.addColumn(Bytes.toBytes(appId), Bytes.toBytes("appId"), Bytes.toBytes(appId));
put.addColumn(Bytes.toBytes(appId), Bytes.toBytes("appVersion"), Bytes.toBytes(appVersion));
put.addColumn(Bytes.toBytes(appId), Bytes.toBytes("appChannel"), Bytes.toBytes(appChannel));
put.addColumn(Bytes.toBytes(appId), Bytes.toBytes("appPlatform"), Bytes.toBytes(appPlatform));
put.addColumn(Bytes.toBytes(appId), Bytes.toBytes("osType"), Bytes.toBytes(osType));
put.addColumn(Bytes.toBytes(appId), Bytes.toBytes("deviceStyle"), Bytes.toBytes(deviceStyle));
table.put(put);
table.close();
}
}
ErrorCountAnaly
package com.hive;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import com.entry.AppErrorBatchInfoCount;
import com.hbase.HbaseUtils;
/**
* 从hive中获取数据,统计错误信息出现的次数
*/
public class ErrorCountAnaly {
public static void main(String[] args) throws Exception {
// java连接hive
Class.forName("org.apache.hive.jdbc.HiveDriver");
Connection connection = DriverManager.getConnection("jdbc:hive2://node7-4:10000/youmeng","root","123456");
Statement statement = connection.createStatement();
// 查询的sql语句
String sql = "select "
+ "appId,deviceId,appVersion,appChannel,appPlatform,osType,deviceStyle,dates,count(1) as number "
+ "from youmeng.apperrorlog "
+ "where dates = '20-09-15' "
+ "group by appId,deviceId,appVersion,appChannel,appPlatform,osType,deviceStyle,dates";
ResultSet resultSet = statement.executeQuery(sql);
System.out.println("start run sql: " + sql);
// 对查询到的数据进行转换
while (resultSet.next()) {
String appId = resultSet.getString(1);
String deviceId = resultSet.getString(2);
String appVersion = resultSet.getString(3);
String appChannel = resultSet.getString(4);
String appPlatform = resultSet.getString(5);
String osType = resultSet.getString(6);
String deviceStyle = resultSet.getString(7);
String dates = resultSet.getString(8);
String count = resultSet.getString(9);
// 创建appErrorBatchInfoCount对象
AppErrorBatchInfoCount appErrorBatchInfoCount = new AppErrorBatchInfoCount();
// 将整体数据转换为appErrorBatchInfoCount形式
appErrorBatchInfoCount.setAppId(appId);
appErrorBatchInfoCount.setDeviceId(deviceId);
appErrorBatchInfoCount.setAppVersion(appVersion);
appErrorBatchInfoCount.setAppChannel(appChannel);
appErrorBatchInfoCount.setAppPlatform(appPlatform);
appErrorBatchInfoCount.setOsType(osType);
appErrorBatchInfoCount.setDeviceStyle(deviceStyle);
appErrorBatchInfoCount.setTime(dates);
appErrorBatchInfoCount.setCount(Long.valueOf(count));
System.err.println(appErrorBatchInfoCount);
// 将数据导入到hbase
HbaseUtils.insertCountData(appErrorBatchInfoCount);
}
}
}
创建永久函数,注意进入相应的数据库:create function finderrorcount as 'com.hive.MyCoustom' using jar 'hdfs:///home/errorhotword.jar';
创建完永久函数后,需要重新启动服务
删除永久函数:drop function youmeng.finderrorcount;
热点词汇定制错误分析
分析errorDetail里面出现热点词汇的次数
select appId,deviceId,appVersion,appChannel,appPlatform,osType,deviceStyle,dates,youmeng.finderrorcount(errorDetail, '/home/youmeng/hotword/hot.txt') as hotstring from youmeng.appErrorLog where dates = '20-09-15'
MyCoustom
package com.hive;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* 对定制词汇进行统计
* 返回结果(例:java:4====info:2====totalResult:1)
* 将此文件打为jar包,存放在hdfs中(例:hdfs:///home/errorhotword.jar)
*/
public class MyCoustom extends UDF {
public static String evaluate(String errordetail, String fileName) {
HashMap<String, Long> resultMap = new HashMap<String,Long>();
// 存放的都是定制词汇
List<String> result = parseFile(fileName);
for(String temp : result) {
// 将定制词汇转换为小写,然后执行模式匹配
int count = pattern(errordetail.toLowerCase(), temp.toLowerCase());
// 如果统计的定制词汇的次数>0,就将此条数据临时存储到resultMap中,方便进行后续计算
if (count > 0) {
resultMap.put(temp, Long.valueOf(count + ""));
}
}
Set<Entry<String,Long>> set = resultMap.entrySet();
// 记录定制词汇出现的次数
int sum = 0;
String resultString = "";
for(Entry<String, Long> entry : set) {
String keyString = entry.getKey();
Long count = entry.getValue();
// 对统计出来的数据进行拼接 (例:java:4====info:2====)
resultString += keyString + ":" + count + "====";
sum ++;
}
// 如果定制词汇出现的次数>0,则对数据再次进行修改,往后进行追加 (例:java:4====info:2====totalResult:1)
if(sum > 0) {
resultString = resultString + "totalResult:" + sum;
}
return resultString;
}
/*
* 模式匹配
* 匹配包含定制词汇的信息,并返回出现的次数
*/
private static int pattern(String input, String regex) {
// 将给定的正则表达式编译为模式
Pattern pattern = Pattern.compile(regex);
// 创建匹配器,根据此模式匹配给定的输入
Matcher matcher = pattern.matcher(input);
int count = 0;
while (matcher.find()) {
count ++;
}
return count;
}
private static List<String> parseFile(String fileName) {
ArrayList<String> result = new ArrayList<String>();
// 读取文件,拿出定制词汇,放入到result集合
FileReader fileReader = null;
BufferedReader reader = null;
try {
fileReader = new FileReader(fileName);
reader = new BufferedReader(fileReader);
String temp = "";
while((temp=reader.readLine()) != null){
result.add(temp.trim());
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if(fileReader != null) {
try {
fileReader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if(reader != null) {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return result;
}
}
ErrorHotwordAnaly
package com.hive;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.entry.AppErrorBatchInfoHotErrorWord;
import com.hbase.HbaseUtils;
/**
* 从hive中读取数据,打印出定制的错误词出现的次数
*/
public class ErrorHotwordAnaly {
public static void main(String[] args) throws Exception {
// java连接hive
Class.forName("org.apache.hive.jdbc.HiveDriver");
Connection connection = DriverManager.getConnection("jdbc:hive2://node7-4:10000/youmeng","root","123456");
Statement statement = connection.createStatement();
// 查询的sql语句(其中用到了自定义函数finderrorcount)
// 创建永久函数,注意进入相应的数据库
// create function finderrorcount as 'com.hive.MyCoustom' using jar 'hdfs:///home/errorhotword.jar';
String sql = "select appId,deviceId,appVersion,appChannel,appPlatform,osType,deviceStyle,dates,"
+ "youmeng.finderrorcount(errorDetail,'/home/youmeng/hotword/hot.txt') as hotstring "
+ "from youmeng.apperrorlog "
+ "where dates = '20-09-15'";
ResultSet rs = statement.executeQuery(sql);
// 对查询到的数据进行转换
while (rs.next()) {
String appId = rs.getString(1);
String deviceId = rs.getString(2);
String appVersion = rs.getString(3);
String appChannel = rs.getString(4);
String appPlatform = rs.getString(5);
String osType = rs.getString(6);
String deviceStyle = rs.getString(7);
String date = rs.getString(8);
String hotstring = rs.getString(9);
// 创建appErrorBatchInfoHotErrorWord对象
AppErrorBatchInfoHotErrorWord appErrorBatchInfoHotErrorWord = new AppErrorBatchInfoHotErrorWord();
// 将整体数据转换为appErrorBatchInfoHotErrorWord形式
appErrorBatchInfoHotErrorWord.setAppId(appId);
appErrorBatchInfoHotErrorWord.setDeviceId(deviceId);
appErrorBatchInfoHotErrorWord.setAppVersion(appVersion);
appErrorBatchInfoHotErrorWord.setAppChannel(appChannel);
appErrorBatchInfoHotErrorWord.setAppPlatform(appPlatform);
appErrorBatchInfoHotErrorWord.setOsType(osType);
appErrorBatchInfoHotErrorWord.setDeviceStyle(deviceStyle);
appErrorBatchInfoHotErrorWord.setTime(date);
List list = processHotString(hotstring);
long sum = (Long)list.get(0);
Map<String, Long> reusltMap = (Map<String, Long>)list.get(1);
appErrorBatchInfoHotErrorWord.setTotalNumber(sum);
appErrorBatchInfoHotErrorWord.setWordMap(reusltMap);
System.out.println(appErrorBatchInfoHotErrorWord);
// 将数据导入到hbase
HbaseUtils.insertHotWordData(appErrorBatchInfoHotErrorWord);
}
}
/*
* 对定制的错误词进行转换
* java:4====info:2====totalResult:1
*
*/
private static List processHotString(String hotstring) {
// 存放最终信息:错误出现总数,(错误信息,出现次数)
List result = new ArrayList();
// 存放信息:错误信息,出现次数
Map<String, Long> reusltMap = new HashMap<String, Long>();
Long sum = 0L;
String key = "";
Long count = 0L;
// 数据按照 ==== 进行切分
String[] temparrays = hotstring.split("====");
for(String wordCount : temparrays) {
// 按照 ==== 切分后的数据(java:4),再次按照(:)进行切分
String[] splited = wordCount.split(":");
// 如果第一个字段等于totalResult
if (splited[0].equals("totalResult")) {
// 对出错总和进行累加
sum = Long.valueOf(splited[1]);
} else {
// 错误信息
key = splited[0];
// 出现次数
count = Long.valueOf(splited[1]);
// 将错误信息,出现次数存放到reusltMap中
reusltMap.put(key, count);
}
}
result.add(sum);
result.add(reusltMap);
return result;
}
}