Implement a spell checker with java

11 篇文章 0 订阅


  When you typing a word, many systems will suggest many words for you to select the word which you intend to type. This is spell checker.

It will calculate the edit distance between a dictionary. Every time when typing a word, the cost is too expensive if the word compare to the

whole dictionary. If we hash the dictionary with the word length as the key, and the words list as the value, it will be fast. There are also some

other method to improve the performance, here I just use hash.

  For comparation, the dictionary file can be downloaded from internet or use the linux's default file in /usr/share/dict/ directory. Here Levenshtein

distance is used to compute the edit distance between two words.


Below is my java code implementation.


package datastructure.test;


import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.List;

public class SpellChecker {
	private static final int DISTANCE_LEN = 2;	// the distance between two words, this can be changed for the request
	private static Hashtable<Integer, List<String>> ht;	// dictionary hash table
	
	/**
	 * constructor
	 */
	public SpellChecker() {
		
	}
	
	/**
	 * constructor
	 * @param dictFile
	 * @throws IOException
	 */
	public SpellChecker(String dictFile) throws IOException {
		this.ht = hashDictionary(dictFile);
	}
	
	public static Hashtable<Integer, List<String>> getHashtable(String dictFile) throws IOException {
		if(SpellChecker.ht != null) return SpellChecker.ht;
		
		SpellChecker.ht = hashDictionary(dictFile);
		return SpellChecker.ht;
	}
	
	/**
	 * hash the dictionary file with word length as key, the same length of word's list as value
	 * @param dictFile
	 * @return ht
	 * @throws IOException 
	 */
	public static Hashtable<Integer, List<String>> hashDictionary(String dictFile) throws IOException {
		Hashtable<Integer, List<String>> ht = new Hashtable<Integer, List<String>>();
		BufferedReader br = new BufferedReader(new FileReader(dictFile));
		
		String line = null;
		int wordlen = 0;
		
		while((line = br.readLine()) != null) {
			line = line.trim();
			wordlen = line.length();
			
			if(wordlen == 0) continue;	// skip the empty line
			
			if(!ht.containsKey(wordlen)) {
				List<String> lst1 = new ArrayList<String>();
				lst1.add(line);
				ht.put(wordlen, lst1);
			}
			else {
				List<String> lst2 = ht.get(wordlen);
				lst2.add(line);
				ht.put(wordlen, lst2);
 			}
		}
		
		return ht;
	}
	
	/**
	 * get a list of words with word length and length + 1 from dictionary file
	 * @param dictFile
	 * @param wordlen
	 * @return lst
	 * @throws IOException 
	 */
	public List<String> getDictList(int wordlen, String dictFile) throws IOException {
		List<String> lst = new ArrayList<String>();
		Hashtable<Integer, List<String>> ht = new Hashtable<Integer, List<String>>();
		ht = getHashtable(dictFile);	// cache the dictionary hash table
		//ht = hashDictionary(dictFile);
		
		// to get probably word list with the word to compare's length and length + 1 
		List<String> lst1 = ht.get(wordlen);
		List<String> lst2 = ht.get(wordlen + 1);
		
		if(lst1 != null) {
			lst = lst1;
		}
		
		// concatenate two lists together
		if(lst2 != null) {
			for(int i = 0; i < lst2.size(); i++) {
				lst.add(lst2.get(i));
			}
		}
		
		
		return lst;
	}
	
	/**
	 * do the compare between word and dictList
	 * if the Levenshtein Distance is less than DISTANCE_LEN
	 * add the probably words to the result list
	 * @param word
	 * @param dictList
	 * @return result
	 */
	public List<String> spellChecker(String word, List<String> dictList) {
		List<String> result = new ArrayList<String>();
		// get a instance of LevenshteinDistance class to compute the words' distance
		LevenshteinDistance ld = new LevenshteinDistance();
		int distance = 0;
		String dictWord = "";
		
		for(int i = 0; i < dictList.size(); i++) {
			dictWord = (String)dictList.get(i);
			distance = ld.dynLevenDistance(word, dictWord);
			
			if(distance < DISTANCE_LEN) {
				result.add(dictWord);
			}
		}
		
		return result;
	}
	
	/**
	 * read the dict list from dict file to compare with the word
	 * @param word
	 * @param dictFile
	 * @return result
	 * @throws IOException
	 */
	public List<String> spellChecker(String word, String dictFile) throws IOException {
		List<String> result = new ArrayList<String>();
		List<String> wordList = getDictList(word.length(), dictFile);
		result = spellChecker(word, wordList);
		return result;
	}
	
	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		//String dictFile = "/usr/share/dict/words.pre-dictionaries-common";
		String dictFile = "/root/myworld/devsapce/wkspace/workspace/JavaTestProject/src/datastructure/test/dictionary.txt";
		try {
			SpellChecker spellchecker = new SpellChecker(dictFile);
			String word = "football";
			List<String> lst = spellchecker.spellChecker(word, dictFile);
			
			System.out.println("The probably words maybe : ");
			for(int i = 0; i < lst.size(); i++) {
				System.out.print(lst.get(i) + " ");
			}
			
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	


}


Levenshtein distance:


package datastructure.test;

public class LevenshteinDistance {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String s = "kitten";
		String t = "sitting";
		
		LevenshteinDistance ld = new LevenshteinDistance();
		
		//int recurlen = ld.recurLevenDistance(s, t);
		int dynlen = ld.dynLevenDistance(s, t);
		//System.out.println("The length is : " + recurlen);
		System.out.println("The length is : " + dynlen);
		System.out.println(s.length());
	}
	
	/*public int minimum(int a, int b, int c){
		return Math.min(Math.min(a, b), c);
	}*/
	
	public int minimum(int a, int b, int c){
		if(a < b && a < c) return a;
		if(b < a && b < c) return b;
		return c;
	}
	
	// compute the distance use recursive way
	public int recurLevenDistance(String s, String t){
		int slen = s.length();
		int tlen = t.length();
		
		int ins = 0; // for recording insert length
		int del = 0; // for recording delete length
		int sub = 0; // for substitution length
		
		if(slen == 0 && tlen == 0) return 0;
		if(slen == 0) return tlen;
		if(tlen == 0) return slen;
		
		/*if(s.charAt(slen - 1) == t.charAt(tlen - 1)){
			sub = recurLevenDistance(s.substring(0, slen - 1), t.substring(0, tlen - 1));
		}
		else{
			sub = recurLevenDistance(s.substring(0, slen - 1), t.substring(0, tlen - 1)) + 1;
		}*/
		
		sub = recurLevenDistance(s.substring(0, slen - 1), t.substring(0, tlen - 1)) 
				+ ((s.charAt(slen - 1) == t.charAt(tlen - 1)) ? 0 : 1);
		
		ins = recurLevenDistance(s.substring(0, slen - 1), t) + 1;
		del = recurLevenDistance(s, t.substring(0, tlen - 1)) + 1;
		
		return minimum(ins, del, sub);
	}
	
	// compute the distance use dynamic way
	public int dynLevenDistance(String s, String t){
		int[][] distance = new int[s.length() + 1][t.length() + 1];
		
		int i, j;
		for(i = 0; i <= s.length(); i++)
			distance[i][0] = i;
		for(j = 0; j <= t.length(); j++)
			distance[0][j] = j;
		
		for(i = 1; i <= s.length(); i++){
			for(j = 1; j <= t.length(); j++){
				distance[i][j] = minimum(distance[i - 1][j] + 1,
						distance[i][j - 1] + 1,
						distance[i - 1][j - 1] + ((s.charAt(i - 1) == t.charAt(j - 1)) ? 0 : 1));
			}
		}
		
		return distance[s.length()][t.length()];
	}
}




  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
To implement a resource pool in C++, you can use the concept of object pooling. Object pooling is a design pattern that allows you to reuse objects instead of creating and destroying them frequently, which can be expensive in terms of memory allocation and deallocation. Here's a basic example of how you can implement a resource pool in C++: ```cpp #include <queue> #include <mutex> #include <condition_variable> template<typename T> class ResourcePool { private: std::queue<T*> pool; std::mutex mutex; std::condition_variable condition; public: ResourcePool(int initialSize) { for (int i = 0; i < initialSize; ++i) { pool.push(new T()); } } ~ResourcePool() { while (!pool.empty()) { delete pool.front(); pool.pop(); } } T* acquireResource() { std::unique_lock<std::mutex> lock(mutex); if (pool.empty()) { // Wait until a resource is available condition.wait(lock); } T* resource = pool.front(); pool.pop(); return resource; } void releaseResource(T* resource) { std::unique_lock<std::mutex> lock(mutex); pool.push(resource); // Notify waiting threads that a resource is available condition.notify_one(); } }; ``` In this example, the `ResourcePool` class uses a queue to store the resources. The `acquireResource` function is responsible for acquiring a resource from the pool. If the pool is empty, it waits until a resource becomes available using the `std::condition_variable`. The `releaseResource` function is used to release a resource back into the pool. You can customize this implementation based on your specific requirements. For example, you can add additional functions to initialize or resize the pool dynamically.
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值