20 散列表

最新推荐文章于 2023-06-02 14:53:44 发布

名字到底多长

最新推荐文章于 2023-06-02 14:53:44 发布

阅读量379

点赞数

分类专栏：数据结构与问题求解

本文链接：https://blog.csdn.net/mingzidaodiduochang/article/details/8206663

版权

数据结构与问题求解专栏收录该内容

16 篇文章 0 订阅

订阅专栏

散列表的本质就是：hash函数计算后得到的名值对已经当hash函数算到相同的名的时候，解决冲突的方法。最简单的就是字符ascii码当下标访问长度为128的数组，字符的值直接被当作名访问数组的对应项，即：index = hash（ch），这里很明显不存在冲突。

hash函数举例（实际上hash函数是很难设计的）：字符串得到hash值代码

package nuaa.hash;

public class Hash {
	public static int hash1(String key,int tableSize){
		int hashValue = 0;
		for(int i=0;i<key.length();i++){
			hashValue = (hashValue*128+key.charAt(i))%tableSize;
		}
		return hashValue;
	}
	
	public static int hash2(String key,int tableSize){
		int hashValue = 0;
		for(int i=0;i<key.length();i++){
			hashValue = hashValue*37+key.charAt(i);
		}
		hashValue = hashValue%tableSize; //%运算允许可以越界的
		if(hashValue<0)              //因此可能hashValue已经溢出变成负值
			hashValue += tableSize;
		return hashValue;
	}
	
	//String内部hashCode生成的方式，区别是String会缓存散列码
	//从hashCode是找不到对应的串的，hash函数其实也叫单向函数
	public static int hash3(String key,int tableSize){
		int hashValue = 0;
		for(int i=0;i<key.length();i++){
			hashValue = hashValue*31+key.charAt(i);
		}
		
		return hashValue;
	}
}

冲突解决：

首先都是%tableSize，tableSize数组的长度，存储项的方式：

1 图的存储方式：邻接表，冲突就邻接式挂链

2 数组式存储：冲突时候可以线性探测，每次加1看是否有元素，到底就循环至头，但这个很明显会造成初始聚类，推导可以得到插入平均分析单元数为（1+1/（1-a）^2）/2，a为负载因子，比如a为0.9时，前次和本次插入不是独立的，因此实际的平均分析单元数为50，而不是独立情况下的10个，性能是不可接受的。因此可以采用二次探测。

每次本次的位置加上i^2开始探测，i从1开始。本质就是相邻元素每次冲突寻找的位置不一样，因此不会造成初始聚类。当然也会造成二次聚类，会造成每次插入要加上额外的小于一半的探测，但这个只在高负载的情况下出现。

插入：数组元素个数为素数（非素数会造成可供选择的位置偏少，也就是回绕的时候导致重复项），载重因子不超过0.5时，二次探测总能找到位置插入元素。

删除：由于是hash操作，因此每个元素都不能动，也不能设置要删除的元素为null，null表示冲突寻找到结尾了，会影响寻找操作，删除操作只能是每个元素设置一个额外的标识，这种删除叫做迟删除。

package nuaa.ds;

import java.util.AbstractCollection;
import java.util.Collection;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Set;

public class HashSet<E> extends AbstractCollection<E> implements Set<E> {
	
	private static final int DEFAULT_TABLE_SIZE = 101;
	private int currentSize = 0; //当前实际元素容量
	private int occupied = 0;    //实际有值不为null的容量
	private int modCount = 0;    //修改次数
	private HashEntry[] array;
	
	public HashSet(){
		this.allocateArray(DEFAULT_TABLE_SIZE);
	}
	public HashSet(Collection<? extends E> other){
		allocateArray(nextPrime(other.size()*2));
		clear();
		for(E e:other)
			add(e);
	}
	
	@Override
	public int size(){
		return this.currentSize;
	}
	
	@Override
	public Iterator<E> iterator() {
		return new HashSetIterator();
	}

	public boolean contains(Object x){
		return isActive(array,findPos(x));
	}
	
	private static boolean isActive(HashEntry[] arr,int pos){
		return arr[pos]!=null && arr[pos].isActive;
	}
	
	public E getMatch(E x){
		int currentPos = findPos(x);
		if(isActive(array,currentPos)) {
			return (E)array[currentPos].element;
		}
		return null;
	}
	
	public boolean remove(Object x){
		int currentPos = findPos(x);  //得到的下标永远不会越界
		if(!isActive(array,currentPos))
			return false;
		
		array[currentPos].isActive = false;
		currentSize--;
		modCount++;
		
		//剩余项只有8分之一的时候rehash
		if(currentSize<array.length/8)
			rehash();
		return true;
	}
	
	public void clear(){
		currentSize = occupied = 0;
		modCount++;
		for(int i=0;i<array.length;i++)
			array[i] = null;
	}
	
	//不能增加重复项
	public boolean add(E x){
		int currentPos = findPos(x);
		if(isActive(array,currentPos))
			return false;
		if(array[currentPos]==null)
			occupied++;
		array[currentPos] = new HashEntry(x,true);
		currentSize++;
		modCount++;
		
		//实际占据超过数组长度的一半就rehash
		//不用真正有用的数据量size是下面的findPos函数的缘故
		//findPos根据null来查找是否有插入位置
		if(occupied>array.length/2) 
			rehash();
		return true;
	}
	
	private void rehash(){
		HashEntry[] oldArray = array;
		allocateArray(nextPrime(4*size()));//loadFactor变成0.25
		currentSize = 0;
		occupied = 0;
		
		for(int i=0;i<oldArray.length;i++)
			if(isActive(oldArray,i))
				add((E)oldArray[i].element);
	}
	
	/**
	 * 
	//程序控制数组的元素个数为素数
	//loadFactor不超过0.5
	//因此肯定能找到位置
	 * 要么是已经存在的，要么就是新的null位置
	 */
	private int findPos(Object x){
		int offset = 1;
		int currentPos =   //null的话hashCode为0
					(x==null) ? 0:Math.abs(x.hashCode()%array.length);
		while(array[currentPos]!=null){
			if(x==null){ //null可以重复插入,返回已经存在的位置
				if(array[currentPos].element==null)
					break;
			}else if(x.equals(array[currentPos].element))
				break;             //已经存在就返回存在的位置
			
			currentPos += offset;
			offset += 2;           //二次探测，每次增加2i-1
			if(currentPos>=array.length)//超过数组容量就从头开始
				currentPos -= array.length;
		}
		return currentPos;					
	}
	
	private void allocateArray(int arraySize){
		array = new HashEntry[arraySize];
	}
	
	private static int nextPrime(int n){
		if(n%2==0)
			n += 1;
		for(;isPrime(n);n+=2)
			;
		return n;
	}
	
	//一般情况下2特殊判断，然后3开始奇数判断是否能整除
	//n肯定不能被sqrt(n)到n的数整除
	//这里由nextPrime调用，传入的就是大于2的奇数
	private static boolean isPrime(int n){
		for(int i=3;i<=(int)Math.sqrt(n);i+=2)
			if(n%i==0)
				return false;
		return true;
	}
	
	private class HashSetIterator implements Iterator<E>{
		
		private int expectedModCount = modCount;
		private int currentPos = -1;
		private int visited = 0;
		
		@Override
		public boolean hasNext() {
			if(expectedModCount!=modCount)
				throw new ConcurrentModificationException();
			return visited!=size();
		}
        
		@Override
		public E next() {
			if(!hasNext())
				throw new NoSuchElementException();
			do{
				currentPos++;
			}while(currentPos<array.length&&!isActive(array,currentPos));
			visited++;
			return (E)array[currentPos].element;
		}

		@Override
		public void remove(){
			if(expectedModCount!=modCount)
				throw new ConcurrentModificationException();
			if(currentPos!=-1||isActive(array,currentPos))
				throw new IllegalStateException();
			array[currentPos].isActive = false;
			currentSize--;
			visited--;
			modCount++;
			expectedModCount++;
		}
	}
	
	//计算式，删除时候所有元素都不能动，不能像arrayList那样移动后面的覆盖掉
	//也不能设置为null，因为当前元素会影响到后面插入的元素的查找和删除
	//null是表示冲突检查到底了
	private static class HashEntry implements java.io.Serializable{
		public Object element;
		public boolean isActive;// 标识是否删除
		
		public HashEntry(Object e){
			this(e,true);
		}
		public HashEntry(Object e,boolean i){
			element = e;
			isActive = i;
		}
	}
}

名字到底多长

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
20 散列表

散列表的本质就是：hash函数计算后得到的名值对已经当hash函数算到相同的名的时候，解决冲突的方法。最简单的就是字符ascii码当下标访问长度为128的数组，字符的值直接被当作名访问数组的对应项，即：index = hash（ch），这里很明显不存在冲突。hash函数举例（实际上hash函数是很难设计的）：字符串得到hash值代码package nuaa.hash;pub
复制链接

扫一扫