数据统计小demo

import java.nio.file.Files;
import java.nio.file.Paths;
import java.time.Duration;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Locale;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * <p>
 * 在一个日志文件2013-05-30.log中有如下数据:
 * <p>
 * 27.19.74.143 - - [30/May/2013:17:38:25
 * 27.19.74.143 - - [30/May/2013:17:38:22
 * 27.19.74.143 - - [30/May/2013:17:38:23
 * 27.19.74.143 - - [30/May/2013:17:38:21
 * 27.19.74.143 - - [30/May/2013:17:38:28
 * 27.19.74.143 - - [30/May/2013:16:38:20
 * 27.19.74.143 - - [30/May/2013:18:38:20
 * 27.19.74.143 - - [30/May/2013:16:39:20
 * 27.19.74.143 - - [30/May/2013:16:41:20
 * 27.19.74.143 - - [30/May/2013:17:42:20
 * 27.19.74.143 - - [30/May/2013:19:38:21
 * 27.19.74.143 - - [31/May/2013:12:38:21
 * 27.19.74.143 - - [31/May/2013:16:38:21
 * 27.19.74.143 - - [31/May/2013:16:38:22
 * 27.19.74.143 - - [30/May/2013:14:38:21
 * 27.19.74.143 - - [30/May/2013:14:38:21
 * <p>
 * 该文件中的数据是一段截取自web服务器日志中的数据,每一行包含两个信息:
 * 1、网站访问者的ip
 * 2、网站访问者一次请求的时间
 * <p>
 * <p>
 * 定义:连续的请求属于一次“独立访问”,如“30/May/2013:17:38:22”和“30/May/2013:17:38:25”两次请求属于同一次独立访问
 * 时间相邻的两次请求如果间隔超过30分钟,则视为分属两次不同的独立访问
 * 需求:统计出数据中的所有"独立访问",输出每一次访问的起始请求时间,结束请求时间,及停留时长(毫秒)
 * 输出结果示例:
 * 31/May/2013:16:38:21    30/May/2013:16:41:20      181000
 */
public class Exam01 {

    private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("dd/MMM/yyyy:HH:mm:ss", Locale.ENGLISH);
    private final Set<String> result = new TreeSet<>();

    public static void main(String[] args) {
        Exam01 exam01 = new Exam01();
        Set<LocalDateTime> data = exam01.doParseData("data/logs/2013-05-30.log");
        data.forEach(System.out::println);
        Set<String> result = exam01.doStatistics(data.toArray(new LocalDateTime[]{}));
        result.forEach(System.out::println);
    }

    public Set<LocalDateTime> doParseData(String path) {
        try {
            Set<LocalDateTime> data = new TreeSet();
            String input = new String(Files.readAllBytes(Paths.get(path)));
            Matcher matcher = Pattern.compile("((.+)( - - \\[)(.+)\n)?").matcher(input);
            while (matcher.find()) {
                String dateTime = matcher.group(4);
                if (dateTime == null) break;
                data.add(LocalDateTime.parse(dateTime, FORMATTER));
            }
            return data;
        } catch (Exception e) {
            throw new RuntimeException("data parsing exception");
        }
    }

    public Set<String> doStatistics(LocalDateTime[] arr) {
        for (int i = 0; i < arr.length - 1; i++) {
            for (int j = i + 1; j < arr.length; j++) {
                long minutes = Duration.between(arr[i], arr[j]).toMinutes();
                if (minutes > 30) {
                    String result = String.format("%s\t%s\t%s", arr[i].format(FORMATTER), arr[j].format(FORMATTER),
                            Duration.between(arr[i], arr[j]).toMillis());
                    this.result.add(result);
                    j = ++i;
                }
            }
        }
        return this.result;
    }

    public Set<String> doStatistics(LocalDateTime[] arr, int start, int end) {
        if (end >= arr.length) return this.result;
        long minutes = Duration.between(arr[start], arr[end]).toMinutes();
        if (minutes > 30) {
            String result = String.format("%s\t%s\t%s", arr[start].format(FORMATTER), arr[end].format(FORMATTER),
                    Duration.between(arr[start], arr[end]).toMillis());
            this.result.add(result);
            return doStatistics(arr, start + 1, start + 2);
        } else {
            return doStatistics(arr, start, end + 1);
        }
    }

}
 
 

                
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值