项目背景
最近接到一个需求:将数据数据统计分析后按不同维度输出一个饼图,而这个饼图的区间并不是平均分的,那么问题来了,怎么做可以得到最好简介的方法,话不多说先上代码
直接处理
首先我们理解这个需求要我们做什么,获取各用户数据信息,先求和所有数据信息,或得总数据,再求出每个用户数据和,每个范围新建一个集合,再循环筛选,将对应范围内的数据添加进对应集合中,对每个范围的集合进行求和,结合总数据量可算出每个范围内的占比量,并将对应范围和占比添加进反参结果集中,实现代码如下:
public List<UserResDto> getDataCircleOld(QueryDto dto) {
QueryModel model = changeDto2Model.change(dto, QueryModel.class);
//单个对象的求和已在数据库中实现
List<ResModel> resModelList=mapper.getDataFromSql(model);
//通过Collectors.summarizingInt()提供的方法对数据进行求和,这是个很有意思的方法,后面会详说
long totalCount = resModelList.stream().collect(Collectors.summarizingInt(ResModel::getTotalCount)).getSum();
//新建每个范围对应的集合
List<ResModel> dataBetween1and200List=new ArrayList<>();
List<ResModel> dataBetween201and500List=new ArrayList<>();
List<ResModel> dataBetween501and1000List=new ArrayList<>();
List<ResModel> dataBetween1001and2000List=new ArrayList<>();
List<ResModel> dataBetween2001and6000List=new ArrayList<>();
List<ResModel> dataBetween6001and10000List=new ArrayList<>();
List<ResModel> dataGreatThen10000List=new ArrayList<>();
//通过循环判断数据量属于哪个范围并添加进对应范围内
for (ResModel model : resModelList) {
if(1<=model.getAmount()&&200>= model.getAmount()){
dataBetween1and200List.add(model);
}else if (201<=model.getAmount()&&500>= model.getAmount()){
dataBetween201and500List.add(model);
}else if(501<=model.getAmount()&&1000>= model.getAmount()){
dataBetween501and1000List.add(model);
}else if (1001<=model.getAmount()&&2000>= model.getAmount()){
dataBetween1001and2000List.add(model);
}else if(2001<=model.getAmount()&&6000>= model.getAmount()){
dataBetween2001and6000List.add(model);
}else if (6001<=model.getAmount()&&10000>= model.getAmount()){
dataBetween6001and10000List.add(model);
}else{
dataGreatThen10000List.add(model);
}
}
//将处理后的集合进行求和,计算出每个范围内的数据量的和
long calculate1to200 = dataBetween1and200List.stream()
.collect(Collectors.summarizingInt(ResModel::getTotalCount)).getSum();
long calculate201to500 = dataBetween201and500List.stream()
.collect(Collectors.summarizingInt(ResModel::getTotalCount)).getSum();
long calculate501to1000 = dataBetween501and1000List.stream()
.collect(Collectors.summarizingInt(ResModel::getTotalCount)).getSum();
long calculate1001to2000 = dataBetween1001and2000List.stream()
.collect(Collectors.summarizingInt(ResModel::getTotalCount)).getSum();
long calculate2001to6000 = dataBetween2001and6000List.stream()
.collect(Collectors.summarizingInt(ResModel::getTotalCount)).getSum();
long calculate6001to10000 = dataBetween6001and10000List.stream()
.collect(Collectors.summarizingInt(ResModel::getTotalCount)).getSum();
long calculateUp10000 = dataGreatThen10000List.stream()
.collect(Collectors.summarizingInt(ResModel::getTotalCount)).getSum();
//计算出每个范围内的数据量的占比
BigDecimal count1to200 = new BigDecimal(calculate1to200)
.divide(new BigDecimal(totalCount), 4, BigDecimal.ROUND_HALF_UP);
BigDecimal count201to500 = new BigDecimal(calculate201to500)
.divide(new BigDecimal(totalCount), 4, BigDecimal.ROUND_HALF_UP);
BigDecimal count501to1000 = new BigDecimal(calculate501to1000)
.divide(new BigDecimal(totalCount), 4, BigDecimal.ROUND_HALF_UP);
BigDecimal count1001to2000 = new BigDecimal(calculate1001to2000)
.divide(new BigDecimal(totalCount), 4, BigDecimal.ROUND_HALF_UP);
BigDecimal count2001to6000 = new BigDecimal(calculate2001to6000)
.divide(new BigDecimal(totalCount), 4, BigDecimal.ROUND_HALF_UP);
BigDecimal count6001to10000 = new BigDecimal(calculate6001to10000)
.divide(new BigDecimal(totalCount), 4, BigDecimal.ROUND_HALF_UP);
BigDecimal countUp10000 = new BigDecimal(calculateUp10000)
.divide(new BigDecimal(totalCount), 4, BigDecimal.ROUND_HALF_UP);
//将计算结果转为百分比,并将对应的范围标识和占比添加进结果集
DecimalFormat df = new DecimalFormat("0.00%");
List<UserResDto>resultList=new ArrayList<>();
UserResDto result1to200=new UserResDto();
result1to200.setRange("[1,200]");
result1to200.setProportion(df.format(count1to200));
resultList.add(result1to200);
UserResDto result201to500=new UserResDto();
result201to500.setRange("[201,500]");
result201to500.setProportion(df.format(count201to500));
resultList.add(result201to500);
UserResDto result501to1000=new UserResDto();
result501to1000.setRange("[501,1000]");
result501to1000.setProportion(df.format(count501to1000));
resultList.add(result501to1000);
UserResDto result1001to2000=new UserResDto();
result1001to2000.setRange("[1001,2000]");
result1001to2000.setProportion(df.format(count1001to2000));
resultList.add(result1001to2000);
UserResDto result2001to6000=new UserResDto();
result2001to6000.setRange("[2001,6000]");
result2001to6000.setProportion(df.format(count2001to6000));
resultList.add(result2001to6000);
UserResDto result6001to10000=new UserResDto();
result6001to10000.setRange("[6001,10000]");
result6001to10000.setProportion(df.format(count6001to10000));
resultList.add(result6001to10000);
UserResDto resultUp10000=new UserResDto();
resultUp10000.setRange("(10000,-]");
resultUp10000.setProportion(df.format(countUp10000));
resultList.add(resultUp10000);
return resultList;
}
so~~代码冗余量过大,虽然能得到最终的结果,但是仔细观察方法会发现,因为每个范围的计算逻辑都一致,这就导致了很多代码都重复了,解决这个问题我们直接强硬的用反射,但是牺牲性能,节约代码量有点南辕北辙的感觉。思考之下其实我们还是有方法可以解决的。
方法优化
我们可以看到其实为了实现这个需求,我们要做的其实就是三件事:
- 求和总数据量;
- 求出每个范围数据量;
- 计算每个范围数据占比。
那么基于此思考,是否存在一个集合分组的方法呢?分组完了还能求和的方法呢?巧了,还真有。具体实现代码如下:
public List<UserResDto> getDataCircle(QueryDto dto) {
QueryModel model = changeDto2Model.change(dto, QueryModel.class);
List<ResModel> resModelList=mapper.getDataFromSql(queryModel);
//至此与老方法一致
long amount = resModelList.stream().collect(Collectors.summarizingInt(ResModel::getAmount)).getSum();
//新建结果集,并将sql查询数据循环添加进结果集中
List<UserResDto>amountList=new ArrayList<>();
for (ResModel model : resModelList) {
if(1<=model.getAmount()&&200>= model.getAmount()){
amountList.add(new UserResDto("[1,200]",model.getAmount()));
}else if (201<=model.getAmount()&&500>= model.getAmount()){
amountList.add(new UserResDto("[201,500]",model.getAmount()));
}else if(501<=model.getAmount()&&1000>= model.getAmount()){
amountList.add(new UserResDto("[501,1000]",model.getAmount()));
}else if (1001<=model.getAmount()&&2000>= model.getAmount()){
amountList.add(new UserResDto("[1001,2000]",model.getAmount()));
}else if(2001<=model.getAmount()&&6000>= model.getAmount()){
amountList.add(new UserResDto("[2001,6000]",model.getAmount()));
}else if (6001<=model.getAmount()&&10000>= model.getAmount()){
amountList.add(new UserResDto("[6001,10000]",model.getAmount()));
}else{
amountList.add(new UserResDto("[10001,-]",model.getAmount()));
}
}
//通过stream().collect(Collectors.groupingBy())对结果集进行分组,并按分组条件进行求和
//此方法返回的结果为一个map,分组条件为key,求和的结果为value
//Map<Object,Object>map=stream().collect(Collectors.groupingBy(k,v))
Map<String, IntSummaryStatistics> resultMap = amountList.stream().collect(
Collectors.groupingBy(UserResDto::setRange, Collectors.summarizingInt(UserResDto::getAmount)));
for (UserResDto resDto : amountList) {
long total = resultMap.get(resDto.setRange()).getSum();
DecimalFormat df = new DecimalFormat("0.00%");
BigDecimal divide = new BigDecimal(sum).divide(new BigDecimal(amount), 4, BigDecimal.ROUND_HALF_UP);
resDto.setProportion(df.format(divide));
}
return amountList;
}
完美啊,节约代码量四十行,debug,直接测试下。等等,特么怎么反参里面会有这么重复值。。。这是怎么回事???
仔细看了下,原来是这里出了问题
List<UserResDto>amountList=new ArrayList<>();
for (ResModel model : resModelList) {
if(1<=model.getAmount()&&200>= model.getAmount()){
amountList.add(new UserResDto("[1,200]",model.getAmount()));
}else if (201<=model.getAmount()&&500>= model.getAmount()){
amountList.add(new UserResDto("[201,500]",model.getAmount()));
}else if(501<=model.getAmount()&&1000>= model.getAmount()){
amountList.add(new UserResDto("[501,1000]",model.getAmount()));
}else if (1001<=model.getAmount()&&2000>= model.getAmount()){
amountList.add(new UserResDto("[1001,2000]",model.getAmount()));
}else if(2001<=model.getAmount()&&6000>= model.getAmount()){
amountList.add(new UserResDto("[2001,6000]",model.getAmount()));
}else if (6001<=model.getAmount()&&10000>= model.getAmount()){
amountList.add(new UserResDto("[6001,10000]",model.getAmount()));
}else{
amountList.add(new UserResDto("[10001,-]",model.getAmount()));
}
}
因为在添加到结果集的时候是将数据库中的所有数据全部循环并添加到了结果集中了,而我在后续处理数据的时候也是对这个集合进行处理并返回了这个集合,因此导致了出现很多重复的结果。大意了啊。。。
然后直接用Collectors自带的distinct方法去重也无法实现,因为我在结果集中基本每个元素的总和都不一样,所以去重无法直接完成,那么我们只能自己搞一个针对关键字的去重了。
private static <T> Predicate<T> distinctByKey(Function<? super T, ?> keyExtractor) {
Map<Object,Boolean> seen = new ConcurrentHashMap<>();
return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null;
}
优化后的代码如下:
public List<UserResDto> getDataCircle(QueryDto dto) {
QueryModel model = changeDto2Model.change(dto, QueryModel.class);
List<ResModel> resModelList=mapper.getDataFromSql(queryModel);
//至此与老方法一致
long amount = resModelList.stream().collect(Collectors.summarizingInt(ResModel::getAmount)).getSum();
//新建结果集,并将sql查询数据循环添加进结果集中
List<UserResDto>amountList=new ArrayList<>();
for (ResModel model : resModelList) {
if(1<=model.getAmount()&&200>= model.getAmount()){
amountList.add(new UserResDto("[1,200]",model.getAmount()));
}else if (201<=model.getAmount()&&500>= model.getAmount()){
amountList.add(new UserResDto("[201,500]",model.getAmount()));
}else if(501<=model.getAmount()&&1000>= model.getAmount()){
amountList.add(new UserResDto("[501,1000]",model.getAmount()));
}else if (1001<=model.getAmount()&&2000>= model.getAmount()){
amountList.add(new UserResDto("[1001,2000]",model.getAmount()));
}else if(2001<=model.getAmount()&&6000>= model.getAmount()){
amountList.add(new UserResDto("[2001,6000]",model.getAmount()));
}else if (6001<=model.getAmount()&&10000>= model.getAmount()){
amountList.add(new UserResDto("[6001,10000]",model.getAmount()));
}else{
amountList.add(new UserResDto("[10001,-]",model.getAmount()));
}
}
//通过stream().collect(Collectors.groupingBy())对结果集进行分组,并按分组条件进行求和
//此方法返回的结果为一个map,分组条件为key,求和的结果为value
//Map<Object,Object>map=stream().collect(Collectors.groupingBy(k,v))
Map<String, IntSummaryStatistics> resultMap = amountList.stream().collect(
Collectors.groupingBy(UserResDto::setRange, Collectors.summarizingInt(UserResDto::getAmount)));
for (UserResDto resDto : amountList) {
long total = resultMap.get(resDto.setRange()).getSum();
DecimalFormat df = new DecimalFormat("0.00%");
BigDecimal divide = new BigDecimal(sum).divide(new BigDecimal(amount), 4, BigDecimal.ROUND_HALF_UP);
resDto.setProportion(df.format(divide));
}
return amountList.stream().filter(distinctByKey(UserResDto::getScope)).collect(Collectors.toList());;
}
完结
撒花
优化反思
上面有提到Collectors.summarizingInt()这个是个很有意思的方法。
在我完成这个需求查资料的时候发现,jdk1.8的提供了对集合的stream的流处理方式,而stream针对集合的计算提供了summarizingInt、summarizingLong、summarizingDouble三个方法,分别对应int、long、double的各种数据类型,借此我们可以看下summarizingInt的相关源码:
/**
* 返回一个收集器,该收集器对每个输入元素应用生成int的映射函数,并返回结果值的摘要统计信息。
* 参数:
* mapper–应用于每个元素的映射函数
* 类型参数:
* –输入元素的类型
* 返回:
* 实现汇总统计数据缩减的收集器
* 另见:
* SummaringDouble(ToDouble函数)、SummaringLong(ToLong函数)
*/
public static <T>
Collector<T, ?, IntSummaryStatistics> summarizingInt(ToIntFunction<? super T> mapper) {
return new CollectorImpl<T, IntSummaryStatistics, IntSummaryStatistics>(
IntSummaryStatistics::new,
(r, t) -> r.accept(mapper.applyAsInt(t)),
(l, r) -> { l.combine(r); return l; }, CH_ID);
}
而他的反参中如下:
/**
* 用于收集统计信息(如计数、最小值、最大值、总和和平均值)的状态对象。
* 此类设计用于处理(尽管不需要)流。例如,您可以使用以下方法计算整数流的摘要统计信息:
* IntSummaryStatistics=intStream.collect(IntSummaryStatistics::new,
* IntSummaryStatistics::接受,
* IntSummaryStatistics::combine);
* IntSummaryStatistics可以用作流的缩减目标。例如:
* IntSummaryStatistics=people.stream()
* .collect(Collectors.summaringint(Person::getDependents));
* 这将在一次传递中计算人数,以及受抚养人数量的最小值、最大值、总和和平均值。
* 实施说明:
* 此实现不是线程安全的。但是,在并行流上使用Collectors.toIntStatistics()是安全的,因为stream.collect()
* 的并行实现为安全高效的并行执行提供了必要的分区、隔离和结果合并。
* 此实现不检查总和是否溢出。
* 自: 1.8
*/
public class IntSummaryStatistics implements IntConsumer {
private long count;
private long sum;
private int min = Integer.MAX_VALUE;
private int max = Integer.MIN_VALUE;
/**
* Construct an empty instance with zero count, zero sum,
* {@code Integer.MAX_VALUE} min, {@code Integer.MIN_VALUE} max and zero
* average.
*/
public IntSummaryStatistics() { }
/**
* Records a new value into the summary information
*
* @param value the input value
*/
@Override
public void accept(int value) {
++count;
sum += value;
min = Math.min(min, value);
max = Math.max(max, value);
}
/**
* Combines the state of another {@code IntSummaryStatistics} into this one.
*
* @param other another {@code IntSummaryStatistics}
* @throws NullPointerException if {@code other} is null
*/
public void combine(IntSummaryStatistics other) {
count += other.count;
sum += other.sum;
min = Math.min(min, other.min);
max = Math.max(max, other.max);
}
/**
* Returns the count of values recorded.
*
* @return the count of values
*/
public final long getCount() {
return count;
}
/**
* Returns the sum of values recorded, or zero if no values have been
* recorded.
*
* @return the sum of values, or zero if none
*/
public final long getSum() {
return sum;
}
/**
* Returns the minimum value recorded, or {@code Integer.MAX_VALUE} if no
* values have been recorded.
*
* @return the minimum value, or {@code Integer.MAX_VALUE} if none
*/
public final int getMin() {
return min;
}
/**
* Returns the maximum value recorded, or {@code Integer.MIN_VALUE} if no
* values have been recorded.
*
* @return the maximum value, or {@code Integer.MIN_VALUE} if none
*/
public final int getMax() {
return max;
}
/**
* Returns the arithmetic mean of values recorded, or zero if no values have been
* recorded.
*
* @return the arithmetic mean of values, or zero if none
*/
public final double getAverage() {
return getCount() > 0 ? (double) getSum() / getCount() : 0.0d;
}
@Override
/**
* {@inheritDoc}
*
* Returns a non-empty string representation of this object suitable for
* debugging. The exact presentation format is unspecified and may vary
* between implementations and versions.
*/
public String toString() {
return String.format(
"%s{count=%d, sum=%d, min=%d, average=%f, max=%d}",
this.getClass().getSimpleName(),
getCount(),
getSum(),
getMin(),
getAverage(),
getMax());
}
}
各位使用的时候要注意这个方法是线程不安全的哦
至此,结束