背景:
城市的数据指标多种多样,人口数量,GDP,土地面积等等
我们有的时候想要找相似的城市,例如 人口体量大体相同 土地面积大体相同 连GDP也大体相同
sql方式:计算在数据库当中进行
在同一年内找相似城市
SELECT
area_code
FROM
(
SELECT
count(*) count,
area_code
FROM
(
SELECT
b.area_code area_code,
b.quota_code quota_code
FROM
(
SELECT
quota_code,
score_value
FROM
score_dat
WHERE
area_code = '420100'#城市码
AND static_year = 2016 #具体的年份
AND quota_code IN ('Z110101','Z170408','Z140606') #各种指标码
) a,
(
SELECT
quota_code,
area_code,
score_value
FROM
score_dat
WHERE
static_year = 2016 #具体的年份
AND quota_code IN ('Z110101','Z170408','Z140606') #各种指标码
) b
WHERE
a.quota_code = b.quota_code
AND a.score_value <= b.score_value * (1 +
(CASE WHEN b.quota_code='Z110101' THEN 0.2 WHEN b.quota_code = 'Z170408' THEN 0.2 WHEN b.quota_code = 'Z140606' THEN 0.2 END))#不同指标码可以设置不同的偏移量
AND a.score_value >= b.score_value * (1 -
(CASE WHEN b.quota_code='Z110101' THEN 0.2 WHEN b.quota_code = 'Z170408' THEN 0.2 WHEN b.quota_code = 'Z140606' THEN 0.2 END))#不同指标码可以设置不同的偏移量
) c
GROUP BY
area_code
) d
WHERE
count >= 3#指标码的个数
结果:
420100,510100
时间:0.06s
数据量20万条数据
把自己也给查询出来了,如果不想显示自己,在sql中添加个!=就可以了
在不同年代中找相似的城市
SELECT
area_code,static_year
FROM
(
SELECT
count(*) count,
area_code,
static_year
FROM
(
SELECT
b.area_code area_code,
b.quota_code quota_code,
b.static_year static_year
FROM
(
SELECT
quota_code,
score_value
FROM
score_data
WHERE
area_code = '420100'#城市码
AND static_year = 2016 #具体的年份
AND quota_code IN ('Z110101','Z170408','Z140606') #各种指标码
) a,
(
SELECT
quota_code,
area_code,
score_value,
static_year
FROM
score_data
WHERE
quota_code IN ('Z110101','Z170408','Z140606') #各种指标码
) b
WHERE
a.quota_code = b.quota_code
AND a.score_value <= b.score_value * (1 +
(CASE WHEN b.quota_code='Z110101' THEN 0.2 WHEN b.quota_code = 'Z170408' THEN 0.2 WHEN b.quota_code = 'Z140606' THEN 0.2 END))#不同指标码可以设置不同的偏移量
AND a.score_value >= b.score_value * (1 -
(CASE WHEN b.quota_code='Z110101' THEN 0.2 WHEN b.quota_code = 'Z170408' THEN 0.2 WHEN b.quota_code = 'Z140606' THEN 0.2 END))#不同指标码可以设置不同的偏移量
) c
GROUP BY
area_code,static_year
) d
WHERE
count >= 3#指标码的个数
420100 2016
420100 2017
510100 2016
510100 2017
时间:0.065s
数据量20万条数据
把自己也给查询出来了,如果不想显示自己,在sql中添加个!=就可以了
代码方式:在程序当中计算
在程序计算的前提是,需要加载整张数据表的数据到内存当中
数据库的数据的数据结构
//保存数据库的数据的数据结构
public class FBStructure {
private Map<String, Map<Integer, List<ScoreGrade>>> indexYearScore;
private Map<String, Map<Integer, List<ScoreGrade>>> cityYearScore;
public Map<String, Map<Integer, List<ScoreGrade>>> getIndexYearScore() {
return indexYearScore;
}
public void setIndexYearScore(Map<String, Map<Integer, List<ScoreGrade>>> indexYearScore) {
this.indexYearScore = indexYearScore;
}
public Map<String, Map<Integer, List<ScoreGrade>>> getCityYearScore() {
return cityYearScore;
}
public void setCityYearScore(Map<String, Map<Integer, List<ScoreGrade>>> cityYearScore) {
this.cityYearScore = cityYearScore;
}
}
用于前台展示的数据结构
//用于前台展示的数据结构
List<Map<String,Object>> frontBackList;
List<String> notExist;
public List<Map<String, Object>> getFrontBackList() {
return frontBackList;
}
public void setFrontBackList(List<Map<String, Object>> frontBackList) {
this.frontBackList = frontBackList;
}
public List<String> getNotExist() {
return notExist;
}
public void setNotExist(List<String> notExist) {
this.notExist = notExist;
}
核心代码:
/**
* 从DB中获取需要的数据结构
* @return
*/
private synchronized FBStructure getStructureByDB(){
long expireTime = SystemConfig.EXPIRE_TIME;
if(expiryMap==null) {
expiryMap = ExpiryCache.getExpiryMap();
}
//从内存当中查询数据
FBStructure fBStructure = (FBStructure)expiryMap.get("fBStructure");
List<ScoreGrade> selectAllScoreGrade = null;
if(fBStructure==null) {
fBStructure = new FBStructure();
//一个用于管理引用的方法 expireTime过期时间
expiryMap.put("fBStructure", fBStructure,expireTime);
//从redis中加载数据
String scoreJson = redisClientUtils.get("score_city", 0);
if(scoreJson==null) {
//从数据库当中查询数据
selectAllScoreGrade = conStaticDao.selectAllScoreGrade();
scoreJson = JacksonUtil.writeValueAsString(selectAllScoreGrade);
redisClientUtils.set("score_city", scoreJson, 0);
}else {
List<Map<String,Object>> scoreMapList = JacksonUtil.readValue(scoreJson, List.class);
selectAllScoreGrade = new LinkedList<ScoreGrade>();
ScoreGrade scoreGrade = null;
for (Map<String, Object> map : scoreMapList) {
scoreGrade = new ScoreGrade();
selectAllScoreGrade.add(scoreGrade);
scoreGrade.setAreaCode( map.get("areaCode").toString());
scoreGrade.setAreaName(map.get("areaName").toString());
scoreGrade.setIndexId(map.get("indexId").toString());
scoreGrade.setScoreValue(Float.parseFloat(map.get("scoreValue").toString()));
scoreGrade.setStaticYear(Integer.parseInt(map.get("staticYear").toString()));
}
}
//indexId:staticYear:scoreGrade
Map<String,Map<Integer,List<ScoreGrade>>> indexYearScore = new HashMap<String,Map<Integer,List<ScoreGrade>>>();
Map<String,Map<Integer,List<ScoreGrade>>> cityYearScore = new HashMap<String,Map<Integer,List<ScoreGrade>>>();
Map<Integer,List<ScoreGrade>> tIdYearScore = null;
Map<Integer,List<ScoreGrade>> rCodeYearScore = null;
List<ScoreGrade> tIdScoreList = null;
List<ScoreGrade> tCodeScoreList = null;
String tIndexId = null;
Integer tStaticYear = null;
String tAreaCode = null;
for (ScoreGrade scoreGrade : selectAllScoreGrade) {
tIndexId = scoreGrade.getIndexId();
tStaticYear = scoreGrade.getStaticYear();
tAreaCode = scoreGrade.getAreaCode();
tIdYearScore = indexYearScore.get(tIndexId);
if(tIdYearScore==null) {
tIdYearScore = new HashMap<Integer,List<ScoreGrade>>();
indexYearScore.put(tIndexId, tIdYearScore);
}
tIdScoreList = tIdYearScore.get(tStaticYear);
if(tIdScoreList==null) {
tIdScoreList = new LinkedList<ScoreGrade>();
tIdYearScore.put(tStaticYear, tIdScoreList);
}
tIdScoreList.add(scoreGrade);
rCodeYearScore = cityYearScore.get(tAreaCode);
if(rCodeYearScore==null) {
rCodeYearScore = new HashMap<Integer,List<ScoreGrade>>();
cityYearScore.put(tAreaCode, rCodeYearScore);
}
tCodeScoreList = rCodeYearScore.get(tStaticYear);
if(tCodeScoreList==null) {
tCodeScoreList = new LinkedList<ScoreGrade>();
rCodeYearScore.put(tStaticYear, tCodeScoreList);
}
tCodeScoreList.add(scoreGrade);
}
fBStructure.setCityYearScore(cityYearScore);
fBStructure.setIndexYearScore(indexYearScore);
}
expiryMap.setExpire("fBStructure", expireTime);
return fBStructure;
}
/**
* 查询相似城市indexIds 指标项数组 offsets偏移量数组 staticYear目标年份
* areaCode城市码 sortIndex用于排序的指标字段 frontBack:true返回排序指标前20名的数据
*frontBack:false返回排序指标后20名的数据 isSync:true查询staticYear年份的城市
*isSync:false查询不同年份的城市
*/
public FBResult selectFrontAndBackCity(String[] indexIds,Integer[] offsets,
Integer staticYear,String areaCode,String sortIndex,Boolean frontBack,Boolean isSync){
FBResult result = new FBResult();
FBStructure fBStructure = this.getStructureByDB();
Map<String, Map<Integer, List<ScoreGrade>>> indexYearScore = fBStructure.getIndexYearScore();
Map<String, Map<Integer, List<ScoreGrade>>> cityYearScore = fBStructure.getCityYearScore();
if(sortIndex == null) {
//默认排序
sortIndex = SystemConfig.SORT_INDEX_ID;
}
if(isSync==null) {
//默认同步
isSync = true;
}
//默认正面城市
if(frontBack==null) {
frontBack = true;
}
int flag =0;
for (String indexId : indexIds) {
if(sortIndex.equals(indexId)){
flag = 1;
break;
}
}
if(flag==0) {
sortIndex = indexIds[0];
}
int index = 0;
Float value = null;
Float offset = null;
//indexId:scoreValue
Map<String,Float> benchmarkingCityMap =new HashMap<String,Float>();
//获取指定年份城市指标
Map<Integer, List<ScoreGrade>> codeYearScore = cityYearScore.get(areaCode);
List<ScoreGrade> codeScoreList = codeYearScore.get(staticYear);
for (ScoreGrade scoreGrade : codeScoreList) {
benchmarkingCityMap.put(scoreGrade.getIndexId(), scoreGrade.getScoreValue());
}
Map<Integer, List<ScoreGrade>> yearScore = null;
Map<String,Map<Integer,Map<String,Object>>> codeYearCountValue = new HashMap<String,Map<Integer,Map<String,Object>>>();
Collection<List<ScoreGrade>> scoreListCollection = null;
List<ScoreGrade> scoreList = null;
String quotaName =null;
List<String> notExist = new LinkedList<String>();
Map<String, String> quotaMap = this.getQuotaMap();
for (String indexId : indexIds) {
value = benchmarkingCityMap.get(indexId);
if(value==null) {
quotaName = quotaMap.get(indexId);
notExist.add(quotaName);
}
}
if(!notExist.isEmpty()) {
result.setNotExist(notExist);
return result;
}
for (String indexId : indexIds) {
index++;
value = benchmarkingCityMap.get(indexId);
offset = Math.abs((offsets[index-1]/(float)100));
yearScore = indexYearScore.get(indexId);
if(isSync) {
scoreList = yearScore.get(staticYear);
inCodeYearCountValue(areaCode, sortIndex, value, offset, scoreList, codeYearCountValue, indexId);
}else {
scoreListCollection = yearScore.values();
for (List<ScoreGrade> list : scoreListCollection) {
inCodeYearCountValue(areaCode, sortIndex, value, offset, list, codeYearCountValue, indexId);
}
}
}
int length = indexIds.length;
Map<String, Map<Integer, Map<String, Object>>> codeYearSortCityValue = filterAndSort(length,
codeYearCountValue);
List<Map<String,Object>> sortList = new LinkedList<Map<String,Object>>();
intoBestYearArea(codeYearSortCityValue, sortList);
List<Map<String, Object>> frontBackList = frontBackSort(frontBack, sortList);
result.setFrontBackList(frontBackList);
return result;
}
/**
* 取最早或最迟的年份
* @param codeYearSortCityValue
* @param sortList
*/
private void intoBestYearArea(Map<String, Map<Integer, Map<String, Object>>> codeYearSortCityValue,
List<Map<String, Object>> sortList) {
Collection<Map<Integer, Map<String, Object>>> yearSortCityValueList = codeYearSortCityValue.values();
Collection<Map<String, Object>> sortCityValues = null;
Iterator<Map<String, Object>> iterator = null;
Map<String, Object> cityValue = null;
for (Map<Integer, Map<String, Object>> yearSortCityValue : yearSortCityValueList) {
sortCityValues = yearSortCityValue.values();
iterator = sortCityValues.iterator();
if(iterator.hasNext()) {
cityValue = iterator.next();
sortList.add(cityValue);
}
}
}
/**
* 筛选并排序返回
* @param length
* @param codeYearCountValue
* @return
*/
private Map<String, Map<Integer, Map<String, Object>>> filterAndSort(Integer length,
Map<String, Map<Integer, Map<String, Object>>> codeYearCountValue) {
Integer count = null;;
Map<String,Map<Integer,Map<String,Object>>> codeYearSortCityValue = new HashMap<String,Map<Integer,Map<String,Object>>>();
Map<Integer,Map<String,Object>> yearSortCityValue = null;
Map<String,Object> cityValue = null;
Map<Integer,Map<String,Object>> yearCountValue = null;
Set<Integer> years = null;
Map<String, Object> countValue = null;
Set<Entry<String, Map<Integer, Map<String, Object>>>> entrySet = codeYearCountValue.entrySet();
String nowAreaCode = null;
for (Entry<String, Map<Integer, Map<String, Object>>> entry : entrySet) {
nowAreaCode = entry.getKey();
yearCountValue = entry.getValue();
years = yearCountValue.keySet();
for (Integer yearKey : years) {
countValue = yearCountValue.get(yearKey);
count = (Integer) countValue.get("count");
//筛选
if(count==length) {
yearSortCityValue = codeYearSortCityValue.get(nowAreaCode);
if(yearSortCityValue==null) {
//排序
yearSortCityValue = new TreeMap<Integer,Map<String,Object>>();
codeYearSortCityValue.put(nowAreaCode, yearSortCityValue);
}
cityValue = yearSortCityValue.get(yearKey);
if(cityValue==null) {
cityValue = new HashMap<String,Object>();
yearSortCityValue.put(yearKey, cityValue);
cityValue.put("areaCode", nowAreaCode);
cityValue.put("value", countValue.get("value"));
cityValue.put("areaName", countValue.get("areaName"));
cityValue.put("staticYear", yearKey);
}
}
}
}
return codeYearSortCityValue;
}
/**
* 指定正反取20条以内数据,并排序
* @param frontBack
* @param sortList
* @return
*/
private List<Map<String, Object>> frontBackSort(final Boolean frontBack, List<Map<String, Object>> sortList) {
Collections.sort(sortList, new Comparator<Map<String,Object>>() {
@Override
public int compare(Map<String, Object> o1, Map<String, Object> o2) {
Float o1F = (Float) o1.get("value");
Float o2F = (Float) o2.get("value");
if(frontBack) {
return o2F.compareTo(o1F);
}else {
return o1F.compareTo(o2F);
}
}
});
int size = sortList.size();
if(size>40) {
size = 40;
}
int end;
if(frontBack&&size%2!=0) {
end = size/2+1;
}else {
end = size/2;
}
List<Map<String, Object>> frontBackList = sortList.subList(0, end);
return frontBackList;
}
/**
* 封装数据结构
* @param areaCode
* @param sortIndex
* @param value
* @param offset
* @param scoreList
* @param codeYearCountValue
* @param indexId
*/
private void inCodeYearCountValue(String areaCode, String sortIndex, Float value, Float offset,
List<ScoreGrade> scoreList, Map<String,Map<Integer,Map<String,Object>>> codeYearCountValue, String indexId) {
Map<Integer,Map<String, Object>> yearCountValue = null;
Map<String, Object> countValue;
Integer count;
Float nowValue = null;
String nowAreaCode = null;
String nowAreaName = null;
Integer nowYear = null;
for (ScoreGrade scoreGrade : scoreList) {
nowValue = scoreGrade.getScoreValue();
nowAreaCode = scoreGrade.getAreaCode();
nowAreaName = scoreGrade.getAreaName();
nowYear = scoreGrade.getStaticYear();
//排除对标城市
if(scoreGrade.equals(areaCode)) {
continue;
}
if(nowValue>=value*(1-offset)&&nowValue<=value*(1+offset)) {
yearCountValue = codeYearCountValue.get(nowAreaCode);
if(yearCountValue==null) {
yearCountValue = new HashMap<Integer,Map<String, Object>>();
codeYearCountValue.put(nowAreaCode, yearCountValue);
}
countValue = yearCountValue.get(nowYear);
if(countValue==null) {
countValue = new HashMap<String,Object>();
yearCountValue.put(nowYear, countValue);
countValue.put("count", 0);
countValue.put("areaName", nowAreaName);
}
count = (Integer) countValue.get("count");
countValue.put("count", count+1);
if(sortIndex.equals(indexId)) {
countValue.put("value", nowValue);
}
}
}
}
后言:
数据库计算方式:
优点:不需要加载数据到内存
缺点:1.数据表过大时,查询会慢到令人无法忍受(可以通过水平分表解决)
2.数据库压力大,并发能力有限
程序计算方式:
优点:速度快,并发承载能力强
缺点:占用内存资源
(20w的表,数据量也才占用几十M的内存(主要看一条数据占多少内存),数据表太大,内存无法放下可以通过分布式计算解决)
两种方式各有优劣,具体使用哪种方式还要看实际情况而定