1 在实习公司写了300行的日志分析的代码,不得不说真是太丑了,不少问题,确实还差的很远!
</pre><pre code_snippet_id="1752919" snippet_file_name="blog_20160708_3_6683349" name="code" class="java">package meachine_learning;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class countOfToken {
public static void main(String[] args) throws IOException, ParseException {
String path1 = "C:\\Users\\liuchaoqun\\Desktop\\log\\eros_extract1.log";
String path2 = "C:\\Users\\liuchaoqun\\Desktop\\log\\eros_extract2.log";
/*BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
System.out.println("请输入第一个文件的绝对路径:");
String path1 = br.readLine();
System.out.println("请输入第二个文件的绝对路径:");
String path2 = br.readLine();
//br.close();
*/
Set<String>set1 = getCount(path1);
Set<String>set2 = getCount(path2);
Set<String> finallySet = Union(set1,set2);
System.out.println("token去重后统计的结果总数为:" + finallySet.size());
System.out.println("第一个文件去重后的结果:<day,set<String>>");
HashMap<String, Set<String>> map1 = getCountByDay(path1);
System.out.println("第二个文件去重后的结果:<day,set<String>>");
HashMap<String, Set<String>> map2 = getCountByDay(path2);
//合并去重后的集合
HashMap<String,Set<String>> res = HashMapUnion(map1,map2);
List<Map.Entry<String,Set<String>>> resInformation= new ArrayList<Map.Entry<String,Set<String>>>(res.entrySet());
Collections.sort(resInformation,new Comparator<Map.Entry<String, Set<String>>>() {
@Override
public int compare(Entry<String, Set<String>> o1, Entry<String, Set<String>> o2) {
// TODO Auto-generated method stub
return o1.getKey().compareTo(o2.getKey());
}
});
System.out.println("去重后统计结果个数打印:");
for(int i = 0;i < resInformation.size();++i) {
System.out.println("时间:2016" + resInformation.get(i).getKey() +":数量"+ resInformation.get(i).getValue().size());
}
/***********************************************************************************/
System.out.println("以下是给定该年起始时间再按周统计的结果打印,起始时间为\"0524\":");
System.out.println("请输入起始时间,比如:0524表示5月24日");
BufferedReader readTime = new BufferedReader(new InputStreamReader(System.in));
String startTime = readTime.readLine();
readTime.close();
printByWeek(resInformation,startTime);
/**************************************************************************************************/
System.out.println("以下是对于每天不去重相关结果的统计:");
System.out.println("统计第一个文件不去重token后的结果:<day,Integer>");
HashMap<String, Integer> mapA = getCountByDay2(path1);
System.out.println("统计第一个文件不去重token后的结果:<day,Integer>");
HashMap<String, Integer> mapB = getCountByDay2(path2);
//合并不重复集合,Integer相加;
HashMap<String,Integer> ans = HashMapUnionByNumber(mapA,mapB);
List<Map.Entry<String,Integer>> information = new ArrayList<Map.Entry<String,Integer>>(ans.entrySet());
Collections.sort(information,new Comparator<Map.Entry<String, Integer>>() {
@Override
public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
// TODO Auto-generated method stub
return o1.getKey().compareTo(o2.getKey());
}
});
System.out.println("不重复最终结果打印:");
for(int i = 0;i < information.size();++i) {
System.out.println("时间:2016" + information.get(i).getKey() +":数量"+ information.get(i).getValue());
}
}
/*
* 打印最终的结果;
*/
public static void printByWeek(List<Entry<String, Set<String>>> infoIds,String startTime) throws IOException, ParseException {
//这里表示一年中最多有54周
Set<String>[] week = new Set[54];
SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyyMMdd");
SimpleDateFormat w = new SimpleDateFormat("w");
boolean flag = false;
for(int i = 0;i < infoIds.size();++i) {
// pf:startTime = "0524";
if(infoIds.get(i).getKey().equals(startTime)) {
flag = true;
}
if(flag) {
String dateTime = "2016" + infoIds.get(i).getKey();
Date date = dateFormatter.parse(dateTime);
int weekNum = Integer.parseInt(w.format(date));
//解析周的参数
Set<String> set = week[weekNum];
if(set == null){
week[weekNum] = infoIds.get(i).getValue();
}else{
//集合合并
Set<String> unionSet = Union(set, infoIds.get(i).getValue());
week[weekNum] = unionSet;
}
}
}
System.out.println("这里是从20160524开始统计的结果:");
for (int i = 0;i < week.length;++i) {
if(week[i] != null) {
System.out.println("这是该年第"+ i+"周的统计结果:"+ week[i].size());
}
}
}
/*
* 给定路径path,获取文件去重后token个数;
*/
public static Set<String> getCount(String path) throws IOException {
File file = new File(path);
BufferedReader reader = null;
Set<String> set = new HashSet<String>();
try {
System.out.println("以行为单位读取文件内容,一次读一整行:");
InputStreamReader isr = new InputStreamReader(new FileInputStream(file));
reader = new BufferedReader(isr);
String tempString = null;
Map<String,Object> map = new HashMap<String, Object>();
String regex = "\"token\":\"[a-z0-9]*\"";
Pattern pattern = Pattern.compile(regex);
while((tempString = reader.readLine()) != null) {
Matcher m = pattern.matcher(tempString);
while(m.find()){
set.add(m.group(0));
}
}
isr.close();
} finally {
System.out.println("success!");
}
return set;
}
/*
* set union 集合合并;
*/
public static Set<String> Union(Set<String>setA,Set<String>setB) {
setB.addAll(setA);
return setB;
}
/*
* HashMap union, Hashmap合并
*/
public static HashMap<String,Set<String>> HashMapUnion(HashMap<String,Set<String>>setA,HashMap<String,Set<String>>setB) {
//统计最后的结果为:
HashMap<String, Set<String>> ans = new HashMap<String,Set<String>>();
Set<String> keySetA = setA.keySet();
for(String key:keySetA){
if(!ans.containsKey(key)){
ans.put(key, setA.get(key));
}else{
//合并集合
ans.put(key,Union(setA.get(key),ans.get(key)));
}
}
Set<String> keySetB = setB.keySet();
for(String key:keySetB){
if(!ans.containsKey(key)){
ans.put(key, setB.get(key));
}else{
//合并集合
ans.put(key,Union(setB.get(key),ans.get(key)));
}
}
return ans;
}
/*
* 按照每日的数据进行去重
*/
public static HashMap<String, Set<String>> getCountByDay(String path) throws IOException {
HashMap<String,Set<String>> map = new HashMap<String,Set<String>>();
File file = new File(path);
BufferedReader reader = null;
try {
System.out.println("以行为单位读取文件内容,一次读一整行:");
InputStreamReader isr = new InputStreamReader(new FileInputStream(file));
reader = new BufferedReader(isr);
String tempString = null;
//一次读入一行,直到读入null为文件结束
String regex = "\"token\":\"[a-z0-9]*\"";
Pattern pattern = Pattern.compile(regex);
while((tempString = reader.readLine()) != null) {
if(tempString.length() >=5){
String dateTime = tempString.substring(1,5);
if(dateTime.charAt(dateTime.length()-1) >= '0' && dateTime.charAt(dateTime.length()-1) <= '9'){
if(!map.containsKey(dateTime)) {
Set<String> tempSet = new HashSet();
map.put(dateTime,tempSet);
}
Matcher m = pattern.matcher(tempString);
while(m.find()){
Set s = map.get(dateTime);
s.add(m.group(0));
map.put(dateTime,s);
}
}
}
}
isr.close();
} finally{
System.out.println("success!");
}
return map;
}
/*
*按照每日的数据不去重
*/
public static HashMap<String, Integer> getCountByDay2(String path) throws IOException {
HashMap<String,List<String>> map = new HashMap<String,List<String>>();
HashMap<String, Integer> ansMap = new HashMap<String, Integer>();
File file = new File(path);
BufferedReader reader = null;
Set<String> set = new HashSet<String>();
try {
System.out.println("以行为单位读取文件内容,一次读一整行:");
InputStreamReader isr = new InputStreamReader(new FileInputStream(file));
reader = new BufferedReader(isr);
String tempString = null;
//一次读入一行,直到读入null为文件结束
String regex = "\"token\":\"[a-z0-9]*\"";
Pattern pattern = Pattern.compile(regex);
while((tempString = reader.readLine()) != null) {
if(tempString.length() >=5){
String dateTime = tempString.substring(1,5);
if(dateTime.charAt(dateTime.length()-1) >= '0' && dateTime.charAt(dateTime.length()-1) <= '9'){
if(!map.containsKey(dateTime)) {
List<String> list = new ArrayList<String>();
map.put(dateTime,list);
}
Matcher m = pattern.matcher(tempString);
while(m.find()){
List<String> s = map.get(dateTime);
s.add(m.group(0));
map.put(dateTime,s);
}
//map.put(dateTime,(map.get(dateTime)+set.size()));
}
}
}
isr.close();
} finally {
System.out.println("success!");
Set<String> s = map.keySet();
for (String key : s) {
ansMap.put(key,map.get(key).size());
}
}
return ansMap;
}
/*
* Union HashMap by number;
*/
public static HashMap<String,Integer> HashMapUnionByNumber(HashMap<String,Integer> mapA,HashMap<String,Integer> mapB) {
HashMap<String, Integer> map = new HashMap<String,Integer>();
Set<String> setA = mapA.keySet();
for (String key : setA) {
if(!map.containsKey(key)){
map.put(key, mapA.get(key));
}else {
int tempNum = map.get(key);
map.put(key,(tempNum + mapA.get(key)));
}
}
Set<String> setB = mapB.keySet();
for (String key : setB) {
if(!map.containsKey(key)) {
map.put(key, mapB.get(key));
}else {
int tempNum = map.get(key);
map.put(key,(tempNum + mapB.get(key)));
}
}
return map;
}
}
</pre><pre code_snippet_id="1752919" snippet_file_name="blog_20160708_5_1111215" name="code" class="java"> 南无大慈大悲观世音菩萨