import java.nio.file.Files;
import java.nio.file.Paths;
import java.time.Duration;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Locale;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* <p>
* 在一个日志文件2013-05-30.log中有如下数据:
* <p>
* 27.19.74.143 - - [30/May/2013:17:38:25
* 27.19.74.143 - - [30/May/2013:17:38:22
* 27.19.74.143 - - [30/May/2013:17:38:23
* 27.19.74.143 - - [30/May/2013:17:38:21
* 27.19.74.143 - - [30/May/2013:17:38:28
* 27.19.74.143 - - [30/May/2013:16:38:20
* 27.19.74.143 - - [30/May/2013:18:38:20
* 27.19.74.143 - - [30/May/2013:16:39:20
* 27.19.74.143 - - [30/May/2013:16:41:20
* 27.19.74.143 - - [30/May/2013:17:42:20
* 27.19.74.143 - - [30/May/2013:19:38:21
* 27.19.74.143 - - [31/May/2013:12:38:21
* 27.19.74.143 - - [31/May/2013:16:38:21
* 27.19.74.143 - - [31/May/2013:16:38:22
* 27.19.74.143 - - [30/May/2013:14:38:21
* 27.19.74.143 - - [30/May/2013:14:38:21
* <p>
* 该文件中的数据是一段截取自web服务器日志中的数据,每一行包含两个信息:
* 1、网站访问者的ip
* 2、网站访问者一次请求的时间
* <p>
* <p>
* 定义:连续的请求属于一次“独立访问”,如“30/May/2013:17:38:22”和“30/May/2013:17:38:25”两次请求属于同一次独立访问
* 时间相邻的两次请求如果间隔超过30分钟,则视为分属两次不同的独立访问
* 需求:统计出数据中的所有"独立访问",输出每一次访问的起始请求时间,结束请求时间,及停留时长(毫秒)
* 输出结果示例:
* 31/May/2013:16:38:21 30/May/2013:16:41:20 181000
*/
public class Exam01 {
private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("dd/MMM/yyyy:HH:mm:ss", Locale.ENGLISH);
private final Set<String> result = new TreeSet<>();
public static void main(String[] args) {
Exam01 exam01 = new Exam01();
Set<LocalDateTime> data = exam01.doParseData("data/logs/2013-05-30.log");
data.forEach(System.out::println);
Set<String> result = exam01.doStatistics(data.toArray(new LocalDateTime[]{}));
result.forEach(System.out::println);
}
public Set<LocalDateTime> doParseData(String path) {
try {
Set<LocalDateTime> data = new TreeSet();
String input = new String(Files.readAllBytes(Paths.get(path)));
Matcher matcher = Pattern.compile("((.+)( - - \\[)(.+)\n)?").matcher(input);
while (matcher.find()) {
String dateTime = matcher.group(4);
if (dateTime == null) break;
data.add(LocalDateTime.parse(dateTime, FORMATTER));
}
return data;
} catch (Exception e) {
throw new RuntimeException("data parsing exception");
}
}
public Set<String> doStatistics(LocalDateTime[] arr) {
for (int i = 0; i < arr.length - 1; i++) {
for (int j = i + 1; j < arr.length; j++) {
long minutes = Duration.between(arr[i], arr[j]).toMinutes();
if (minutes > 30) {
String result = String.format("%s\t%s\t%s", arr[i].format(FORMATTER), arr[j].format(FORMATTER),
Duration.between(arr[i], arr[j]).toMillis());
this.result.add(result);
j = ++i;
}
}
}
return this.result;
}
public Set<String> doStatistics(LocalDateTime[] arr, int start, int end) {
if (end >= arr.length) return this.result;
long minutes = Duration.between(arr[start], arr[end]).toMinutes();
if (minutes > 30) {
String result = String.format("%s\t%s\t%s", arr[start].format(FORMATTER), arr[end].format(FORMATTER),
Duration.between(arr[start], arr[end]).toMillis());
this.result.add(result);
return doStatistics(arr, start + 1, start + 2);
} else {
return doStatistics(arr, start, end + 1);
}
}
}
数据统计小demo
最新推荐文章于 2022-12-13 13:48:26 发布