集合和映射的内容
集合
集合中是不允许存在相同元素的,之前我实现的二分搜索树在加入相同元素时是直接进行覆盖
集合的接口函数
public interface Set<E> {
void add(E e);
void remove(e E);
boolean contains(E e);
int getSize();
boolean isEmpty();
}
使用二分搜索树实现集合
public class BSTSET<E extends Comparable<E>> implements Set<E> {
private BST<E> bst;
public BSTSET(){
bst = new BST<>();
}
@Override
public int getSize(){
return bst.size();
}
@Override
public boolean isEmpty(){
return bst.isEmpty();
}
@Override
public void add(E e){
bst.add(e);
}
@Override
public void remove(e E) {
return;
}
@Override
public boolean contains(E e){
return bst.contains(e);
}
}
只是将BST中原有的函数重新封装一遍。
映射
Map, 存储一种键值对关系的数据结构
映射的接口函数
public interface Map<K, V> {
void add(K key, V value);
V remove(K key);
boolean contains(K key);
V get(K key);
void set(K key, V newValue);
int getSize();
boolean isEmpty();
/*
最主要的几个方法:
增删改查
*/
}
使用链表实现的map
import java.util.ArrayList;
// 基于链表的映射实现
public class LinkedListMap<K, V> implements Map<K, V> {
private class Node{
public K key;
public V value;
public Node next;
public Node(K key, V value, Node next){
this.key = key;
this.value = value;
this.next = next;
}
public Node(K key){
this(key, null, null);
}
public Node(){
this(null, null, null);
}
@Override
public String toString(){
return key.toString() + ": " + value.toString();
}
}
private Node dummyHead; // 设置虚拟头节点
private int size;
public LinkedListMap(){
dummyHead = new Node();
size = 0;
}
// 获取链表元素个数
@Override
public int getSize(){
return size;
}
// 判断链表是否为空
@Override
public boolean isEmpty(){
return size == 0;
}
// 获得该键对应的节点
private Node getNode(K key){
Node cur = dummyHead.next;
while (cur != null){
if (cur.key.equals(key)){
return cur;
}
cur = cur.next;
}
return null;
}
@Override
public boolean contains(K key){
return getNode(key) != null;
}
@Override
public V get(K key){
Node node = getNode(key);
return node == null ? null : node.value;
}
@Override
public void add(K key, V value){
Node node = getNode(key);
if (node == null){
dummyHead.next = new Node(key, value, dummyHead.next);
size ++;
}
else{
node.value = value;
}
}
@Override
public void set(K key, V newValue){
Node node = getNode(key);
if (node ==null){
throw new IllegalArgumentException(key + " doesn't exits");
}
node.value = newValue;
}
@Override
public V remove(K key){
Node prev = dummyHead;
while (prev.next != null){
if (prev.next.key.equals(key))
break;
prev = prev.next;
}
if (prev.next != null){
Node delNode = prev.next;
prev.next = delNode.next;
delNode.next = null;
size --;
return delNode.value;
}
return null;
}
public static void main(String[] args) {
// write your code here
System.out.println("Pride and Prejudice");
ArrayList<String> words = new ArrayList<>();
if (FileOperation.readFile("e:/java/Preide and Prejudice.txt", words)){
System.out.println("Total words: " + words.size());
LinkedListMap<String, Integer> map = new LinkedListMap<>();
for (String word: words){
if (map.contains(word))
map.set(word, map.get(word) + 1);
else
map.add(word, 1);
}
System.out.println("Total different words: " + map.getSize());
System.out.println("Frequency of PRIDE: " + map.get("p") );
}
}
}
使用二分搜索树实现的map
import java.util.ArrayList;
// 基于二分搜索树的映射实现
public class BSTMap<K extends Comparable<K>, V> implements Map<K, V> {
private class Node{
public K key;
public V value;
public Node left, right;
public Node(K key, V value){
this.key = key;
this.value = value;
left = null;
right = null;
}
}
private Node root;
private int size;
@Override
public int getSize() {
return size;
}
@Override
public boolean isEmpty() {
return size == 0;
}
// 向二分搜索树中添加新的元素(key, value)
@Override
public void add(K key, V value) {
root = add(root, key, value);
}
// 向以node为根的二分搜索树中插入元素(key, value),递归算法
// 返回插入新节点后二分搜索树的根
private Node add(Node node, K key, V value){
if(node == null){
size ++;
return new Node(key, value);
}
if(key.compareTo(node.key) < 0)
node.left = add(node.left, key, value);
else if(key.compareTo(node.key) > 0)
node.right = add(node.right, key, value);
else // key.compareTo(node.key) == 0
node.value = value;
return node;
}
// 返回以node为根节点的二分搜索树中,key所在的节点
private Node getNode(Node node, K key){
if (node == null)
return null;
if (key.compareTo(node.key) == 0)
return node;
else if (key.compareTo(node.key) < 0)
return getNode(node.left, key);
else // if (key.compareTo(node.key)) > 0
return getNode(node.right, key);
}
@Override
public boolean contains(K key) {
return getNode(root, key) != null;
}
@Override
public V get(K key) {
Node node = getNode(root, key);
return node == null ? null : node.value;
}
// 更新操作
@Override
public void set(K key, V newValue) {
Node node = getNode(root, key);
if (node == null)
throw new IllegalArgumentException(key + " deesn`t exists");
node.value = newValue;
}
// 返回以node为根的二分搜索树的最小值所在的节点
private Node minimum(Node node){
if(node.left == null)
return node;
return minimum(node.left);
}
// 删除掉以node为根的二分搜索树中的最小节点
// 返回删除节点后新的二分搜索树的根
private Node removeMin(Node node){
if(node.left == null){
Node rightNode = node.right;
node.right = null;
size --;
return rightNode;
}
node.left = removeMin(node.left);
return node;
}
// 从二分搜索树中删除键为key的节点
@Override
public V remove(K key) {
Node node = getNode(root, key);
if (node != null){
root = remove(root, key);
return node.value;
}
return null;
}
// 删除以node为根的二分搜索树中键为key的节点,递归算法
// 返回删除节点后新的二分搜索树的根
private Node remove(Node node, K key){
if (node == null){
return null;
}
if (key.compareTo(node.key) < 0){
node.left = remove(node.left, key);
return node;
}
else if (key.compareTo(node.key) > 0){
node.right = remove(node.right, key);
return node;
}
else{ // key.compareTo(node.key) == 0
// 待删除节点左子树为空的情况
if (node.left == null){
// 此时的node为待删除节点
Node rightNode = node.right;
node.right = null;
size --;
return rightNode; // 返回右子树根节点
}
// 待删除节点右子树为空的情况
if (node.right == null){
Node leftNode = node.left;
node.left = null;
size --;
return leftNode;
}
// 待删除节点左右子树都不为空的情况
// 找到比待删除节点大的节点, 即待删除节点右子树的最小节点
// 或比待删除结点小的节点,即左子树最大节点
// 用这个节点顶替待删除节点的位置
Node successor = minimum(node.right);
successor.right = removeMin(node.right);
successor.left = node.left;
node.left = node.right = null;
return successor;
}
}
}
这是读文件的操作
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Locale;
import java.util.Scanner;
public class FileOperation {
// 读取文件名称为filename中的内容,并将其中包含的左右词语放进words中
public static boolean readFile(String filename, ArrayList<String> words){
if (filename == null || words == null){
System.out.println("filename is null or words is null");
return false;
}
// 文件读取
Scanner scanner;
try{
File file = new File(filename);
if (file.exists()){
FileInputStream fis = new FileInputStream(file);
scanner = new Scanner(new BufferedInputStream(fis), "UTF-8");
scanner.useLocale(Locale.ENGLISH);
}
else{
return false;
}
}
catch (IOException ioe){
System.out.println("Cannot open "+ filename);
return false;
}
// 简单分词
// 这个分词相对简陋,没有考虑文本处理中的特殊问题
// 在这里只做demo展示
if (scanner.hasNextLine()){
String contents = scanner.useDelimiter("\\A").next();
int start = firstCharacterIndex(contents, 0);
for (int i = start + 1; i <= contents.length(); ){
if (i == contents.length() || !Character.isLetter(contents.charAt(i))){
String word = contents.substring(start, i).toLowerCase();
words.add(word);
start = firstCharacterIndex(contents, i);
i = start + i;
}
else{
i++;
}
}
}
return true;
}
// 寻找字符串s中,从start的位置开始的第一个字母字符的位置
private static int firstCharacterIndex(String s, int start){
for (int i = start; i < s.length(); i ++){
if (Character.isLetter(s.charAt(i)))
return i;
}
return s.length();
}
}
还可以使用AVL平衡二叉树实现map,运算速度更快
// 通过底层为AVLTree实现AVLMap
public class AVLMap<K extends Comparable<K>, V> implements Map<K, V>{
private AVLTree<K, V> avl;
public AVLMap(){
avl = new AVLTree<>();
}
@Override
public void add(K key, V value) {
avl.add(key, value);
}
@Override
public V remove(K key) {
return avl.remove(key);
}
@Override
public boolean contains(K key) {
return avl.contains(key);
}
@Override
public V get(K key) {
return avl.get(key);
}
@Override
public void set(K key, V newValue) {
avl.set(key, newValue);
}
@Override
public int getSize() {
return avl.getSize();
}
@Override
public boolean isEmpty() {
return avl.isEmpty();
}
}
后面等学到AVL时写实现原理
这三种底层实现的map进行比较
import java.util.ArrayList;
public class Main {
private static double testMap(Map<String, Integer> map, String filename){
// 词频统计
long startTime = System.nanoTime();
System.out.println(filename);
ArrayList<String> words = new ArrayList<>();
if (FileOperation.readFile("e:/java/" + filename, words)){
System.out.println("Total words: " + words.size());
for (String word: words){
if (map.contains(word))
map.set(word, map.get(word) + 1);
else
map.add(word, 1);
}
System.out.println("Total different words: " + map.getSize());
System.out.println("Frequency of a: " + map.get("a") );
}
long endTime = System.nanoTime();
return (endTime - startTime) / 1000000000.0;
}
public static void main(String[] args) {
// write your code here
String filename = "E:/java/Map/PAP.txt";
BSTMap<String, Integer>bstMap = new BSTMap<>();
double time1 = testMap(bstMap, filename);
System.out.println("BSTMap : " + time1 + " s");
LinkedListMap<String, Integer> linkedListMap = new LinkedListMap<>();
double time2 = testMap(bstMap, filename);
System.out.println("LinkedListMap : " + time2 + " s");
AVLMap<String, Integer> avlMap = new AVLMap<>();
double time3 = testMap(avlMap, filename);
System.out.println("AVLMap : " + time3 + " s");
}
}