随着storm发布TimeCacheMap缓存类也不在被storm推荐使用,取代TimeCacheMap的是RotatingMap。今天就来比较一些这两个类在使用场景上的区别
看TimeCacheMap
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package backtype.storm.utils;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.Map.Entry;
/**
* Expires keys that have not been updated in the configured number of seconds.
* The algorithm used will take between expirationSecs and
* expirationSecs * (1 + 1 / (numBuckets-1)) to actually expire the message.
*
* get, put, remove, containsKey, and size take O(numBuckets) time to run.
*
* The advantage of this design is that the expiration thread only locks the object
* for O(1) time, meaning the object is essentially always available for gets/puts.
*/
//deprecated in favor of non-threaded RotatingMap
@Deprecated
public class TimeCacheMap<K, V> {
//this default ensures things expire at most 50% past the expiration time
private static final int DEFAULT_NUM_BUCKETS = 3;
public static interface ExpiredCallback<K, V> {
public void expire(K key, V val);
}
private LinkedList<HashMap<K, V>> _buckets;
private final Object _lock = new Object();
private Thread _cleaner;
private ExpiredCallback _callback;
public TimeCacheMap(int expirationSecs, int numBuckets, ExpiredCallback<K, V> callback) {
if(numBuckets<2) {
throw new IllegalArgumentException("numBuckets must be >= 2");
}
_buckets = new LinkedList<HashMap<K, V>>();
for(int i=0; i<numBuckets; i++) {
_buckets.add(new HashMap<K, V>());
}
_callback = callback;
final long expirationMillis = expirationSecs * 1000L;
final long sleepTime = expirationMillis / (numBuckets-1);
_cleaner = new Thread(new Runnable() {
public void run() {
try {
while(true) {
Map<K, V> dead = null;
Time.sleep(sleepTime);
synchronized(_lock) {
dead = _buckets.removeLast();
_buckets.addFirst(new HashMap<K, V>());
}
if(_callback!=null) {
for(Entry<K, V> entry: dead.entrySet()) {
_callback.expire(entry.getKey(), entry.getValue());
}
}
}
} catch (InterruptedException ex) {
}
}
});
_cleaner.setDaemon(true);
_cleaner.start();
}
public TimeCacheMap(int expirationSecs, ExpiredCallback<K, V> callback) {
this(expirationSecs, DEFAULT_NUM_BUCKETS, callback);
}
public TimeCacheMap(int expirationSecs) {
this(expirationSecs, DEFAULT_NUM_BUCKETS);
}
public TimeCacheMap(int expirationSecs, int numBuckets) {
this(expirationSecs, numBuckets, null);
}
public boolean containsKey(K key) {
synchronized(_lock) {
for(HashMap<K, V> bucket: _buckets) {
if(bucket.containsKey(key)) {
return true;
}
}
return false;
}
}
public V get(K key) {
synchronized(_lock) {
for(HashMap<K, V> bucket: _buckets) {
if(bucket.containsKey(key)) {
return bucket.get(key);
}
}
return null;
}
}
public void put(K key, V value) {
synchronized(_lock) {
Iterator<HashMap<K, V>> it = _buckets.iterator();
HashMap<K, V> bucket = it.next();
bucket.put(key, value);
while(it.hasNext()) {
bucket = it.next();
bucket.remove(key);
}
}
}
public Object remove(K key) {
synchronized(_lock) {
for(HashMap<K, V> bucket: _buckets) {
if(bucket.containsKey(key)) {
return bucket.remove(key);
}
}
return null;
}
}
public int size() {
synchronized(_lock) {
int size = 0;
for(HashMap<K, V> bucket: _buckets) {
size+=bucket.size();
}
return size;
}
}
public void cleanup() {
_cleaner.interrupt();
}
}
RotatingMap代码:
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package backtype.storm.utils;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.Map.Entry;
/**
* Expires keys that have not been updated in the configured number of seconds.
* The algorithm used will take between expirationSecs and
* expirationSecs * (1 + 1 / (numBuckets-1)) to actually expire the message.
*
* get, put, remove, containsKey, and size take O(numBuckets) time to run.
*
* The advantage of this design is that the expiration thread only locks the object
* for O(1) time, meaning the object is essentially always available for gets/puts.
*/
public class RotatingMap<K, V> {
//this default ensures things expire at most 50% past the expiration time
private static final int DEFAULT_NUM_BUCKETS = 3;
public static interface ExpiredCallback<K, V> {
public void expire(K key, V val);
}
private LinkedList<HashMap<K, V>> _buckets;
private ExpiredCallback _callback;
public RotatingMap(int numBuckets, ExpiredCallback<K, V> callback) {
if(numBuckets<2) {
throw new IllegalArgumentException("numBuckets must be >= 2");
}
_buckets = new LinkedList<HashMap<K, V>>();
for(int i=0; i<numBuckets; i++) {
_buckets.add(new HashMap<K, V>());
}
_callback = callback;
}
public RotatingMap(ExpiredCallback<K, V> callback) {
this(DEFAULT_NUM_BUCKETS, callback);
}
public RotatingMap(int numBuckets) {
this(numBuckets, null);
}
public Map<K, V> rotate() {
Map<K, V> dead = _buckets.removeLast();
_buckets.addFirst(new HashMap<K, V>());
if(_callback!=null) {
for(Entry<K, V> entry: dead.entrySet()) {
_callback.expire(entry.getKey(), entry.getValue());
}
}
return dead;
}
public boolean containsKey(K key) {
for(HashMap<K, V> bucket: _buckets) {
if(bucket.containsKey(key)) {
return true;
}
}
return false;
}
public V get(K key) {
for(HashMap<K, V> bucket: _buckets) {
if(bucket.containsKey(key)) {
return bucket.get(key);
}
}
return null;
}
public void put(K key, V value) {
Iterator<HashMap<K, V>> it = _buckets.iterator();
HashMap<K, V> bucket = it.next();
bucket.put(key, value);
while(it.hasNext()) {
bucket = it.next();
bucket.remove(key);
}
}
public Object remove(K key) {
for(HashMap<K, V> bucket: _buckets) {
if(bucket.containsKey(key)) {
return bucket.remove(key);
}
}
return null;
}
public int size() {
int size = 0;
for(HashMap<K, V> bucket: _buckets) {
size+=bucket.size();
}
return size;
}
}
仔细比较这两个类,无论是代码逻辑还是接口声明,都是完全相同的。
这两个到底有什么不一样的:
眼尖的应该已经发现TimeCacheMap每个方法体内都加上了synchronized关键字,这是保证程序在并发时使用的最重的同步锁,现在已经不推荐使用了,可以根据不同的情况使用不同级别的锁,但是在RotatingMap类中这个synchronized关键却被删去了,是不是RotatingMap类已经解决了并发问题不需要加synchronized关键字呢?
从实际测试来发现RotatingMap并没有解决并发问题。
因此strom本意是将同步的任务交由开发者自己控制并发问题,所以为了要RotatingMap适应并发情况的存在,必须对代码进行改造,笔者从事的工作是对广告点击次数的统计,因此肯定是存在并发问题的!
TimeCacheMap是将synchronized关键字加在了每个方法体内部,因此在并发存在时效率过低,因此采用在RotatingMap进行改造。
分析代码可以知道,RotatingMap之所以存在并发问题是因为内部采用了一个单独的线程不断刷新过期的缓存,才导致的并发的问题,但是实际内部采用的类都是基本类型,可以考虑 采用concurrent替换基本结构类型,提高并发度,另外因为内部采用了迭代器,所以在RotatingMap内部进行迭代更新时,也会报异常因此也应替换更新!
大体思路定了,可以动手改造了!
改造后的代码:
import java.util.Iterator;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedDeque;
/**
* 缓存管理类 主要负责缓存中的管理,构造方法包含两个参数,一个是桶的数量,一个是键的有效期,已经解决并发问题,可以直接使用
*
* @author cong
*
* @param <K>保存键的值
* @param <V>保存值得类型
*/
public class UserCacheTemp<K, V> {
// this default ensures things expire at most 50% past the expiration time
private static final int DEFAULT_NUM_BUCKETS = 3;
/**
* 键到期回调接口
*
* @author cong
*
* @param <K>保存键的值
* @param <V>保存值得类型
*/
public static interface ExpiredCallback<K, V> {
/**
* @param key
* 更新的键值
* @param val
* 更新的值
*/
public void expire(K key, V val);
}
/**
* 存放数据的桶
*/
private ConcurrentLinkedDeque<ConcurrentHashMap<K, V>> _buckets;
/**
* 回调方法
*/
private ExpiredCallback _callback;
/**
* 清理线程
*/
private Thread _cleaner;
/**
* 构造方法
*
* @param expirationSecs
* 键的有效期
* @param numBuckets
* 存放的桶的数量
* @param callback
* 回调函数
*/
public UserCacheTemp(int expirationSecs, int numBuckets,
ExpiredCallback<K, V> callback) {
if (numBuckets < 2) {
throw new IllegalArgumentException("numBuckets must be >= 2");
}
_buckets = new ConcurrentLinkedDeque<ConcurrentHashMap<K, V>>();
for (int i = 0; i < numBuckets; i++) {
_buckets.add(new ConcurrentHashMap<K, V>());
}
_callback = callback;
final long expirationMillis = expirationSecs * 1000L;
final long sleepTime = expirationMillis / (numBuckets - 1);
_cleaner = new Thread(new Runnable() {
public void run() {
try {
while (true) {
ConcurrentHashMap<K, V> dead = null;
Thread.sleep(sleepTime);
rotate();
}
} catch (InterruptedException ex) {
}
}
});
_cleaner.setDaemon(true);
_cleaner.start();
}
public UserCacheTemp(int expirationSecs, ExpiredCallback<K, V> callback) {
this(expirationSecs, DEFAULT_NUM_BUCKETS, callback);
}
public UserCacheTemp(int expirationSecs, int numBuckets) {
this(expirationSecs, numBuckets, null);
}
/**
* 回调方法
*
* @return 返回更新的键的集合
*/
public ConcurrentHashMap<K, V> rotate() {
ConcurrentHashMap<K, V> dead = null;
dead = _buckets.removeLast();
_buckets.addFirst(new ConcurrentHashMap<K, V>());
if (_callback != null) {
for (Entry<K, V> entry : dead.entrySet()) {
_callback.expire(entry.getKey(), entry.getValue());
}
}
return dead;
}
/**
* @param key
* 查找键的对象
* @return 返回的键是否存在
*/
public boolean containsKey(K key) {
for (ConcurrentHashMap<K, V> bucket : _buckets) {
if (bucket.containsKey(key)) {
return true;
}
}
return false;
}
/**
* @param key
* 获得的键的对象
* @return 返回的键的值
*/
public V get(K key) {
for (ConcurrentHashMap<K, V> bucket : _buckets) {
if (bucket.containsKey(key)) {
return bucket.get(key);
}
}
return null;
}
/**
* @param key
* 写入的键
* @param value
* 写入键的值
*/
public void put(K key, V value) {
Iterator<ConcurrentHashMap<K, V>> it = _buckets.iterator();
ConcurrentHashMap<K, V> bucket = it.next();
bucket.put(key, value);
while (it.hasNext()) {
bucket = it.next();
bucket.remove(key);
}
}
/**
* @param key
* 删除的键
* @return
*/
public Object remove(K key) {
for (ConcurrentHashMap<K, V> bucket : _buckets) {
if (bucket.containsKey(key)) {
return bucket.remove(key);
}
}
return null;
}
/**
* 返回键的总数
*
* @return
*/
public int size() {
int size = 0;
for (ConcurrentHashMap<K, V> bucket : _buckets) {
size += bucket.size();
}
return size;
}
}
进行性能测试发现:
未加同步锁机制的RotatingMap存储速度最快,但是存在并发问题;
加了synchronized关键字的TimeCacheMap,但是性能最低;
而改造后的类性能介于两者之间,且解决了并发问题;
性能比大约是10:5:1;
如果应用程序不存在并发问题建议使用RotatingMap,存在并发问题要求实时性的采用TimeCacheMap,追求性能对实时性要求不敏感的采用改造后的代码!