集合是一种高级的数据结构,它的特点是在集合中不能有相同的元素。
我在之前的文章已经详细的讲过链表了,只是贴出代码不再赘述了。
/**
* @Author: Cui
* @Date: 2020/12/21
* @Description:
*/
public class LinkedList<E> {
private class Node{
public E e;
public Node next;
public Node(E e, Node next){
this.e = e;
this.next = next;
}
public Node(E e){
this(e, null);
}
public Node(){
this(null, null);
}
@Override
public String toString(){
return e.toString();
}
}
private Node dummyHead;
private int size;
public LinkedList(){
dummyHead = new Node();
size = 0;
}
// 获取链表中的元素个数
public int getSize(){
return size;
}
// 返回链表是否为空
public boolean isEmpty(){
return size == 0;
}
// 在链表的index(0-based)位置添加新的元素e
// 在链表中不是一个常用的操作,练习用:)
public void add(int index, E e){
if(index < 0 || index > size)
throw new IllegalArgumentException("Add failed. Illegal index.");
Node prev = dummyHead;
for(int i = 0 ; i < index ; i ++)
prev = prev.next;
prev.next = new Node(e, prev.next);
size ++;
}
// 在链表头添加新的元素e
public void addFirst(E e){
add(0, e);
}
// 在链表末尾添加新的元素e
public void addLast(E e){
add(size, e);
}
// 获得链表的第index(0-based)个位置的元素
// 在链表中不是一个常用的操作,练习用:)
public E get(int index){
if(index < 0 || index >= size)
throw new IllegalArgumentException("Get failed. Illegal index.");
Node cur = dummyHead.next;
for(int i = 0 ; i < index ; i ++)
cur = cur.next;
return cur.e;
}
// 获得链表的第一个元素
public E getFirst(){
return get(0);
}
// 获得链表的最后一个元素
public E getLast(){
return get(size - 1);
}
// 修改链表的第index(0-based)个位置的元素为e
// 在链表中不是一个常用的操作,练习用:)
public void set(int index, E e){
if(index < 0 || index >= size)
throw new IllegalArgumentException("Set failed. Illegal index.");
Node cur = dummyHead.next;
for(int i = 0 ; i < index ; i ++)
cur = cur.next;
cur.e = e;
}
// 查找链表中是否有元素e
public boolean contains(E e){
Node cur = dummyHead.next;
while(cur != null){
if(cur.e.equals(e))
return true;
cur = cur.next;
}
return false;
}
// 从链表中删除index(0-based)位置的元素, 返回删除的元素
// 在链表中不是一个常用的操作,练习用:)
public E remove(int index){
if(index < 0 || index >= size)
throw new IllegalArgumentException("Remove failed. Index is illegal.");
Node prev = dummyHead;
for(int i = 0 ; i < index ; i ++)
prev = prev.next;
Node retNode = prev.next;
prev.next = retNode.next;
retNode.next = null;
size --;
return retNode.e;
}
// 从链表中删除第一个元素, 返回删除的元素
public E removeFirst(){
return remove(0);
}
// 从链表中删除最后一个元素, 返回删除的元素
public E removeLast(){
return remove(size - 1);
}
// 从链表中删除元素e
public void removeElement(E e){
Node prev = dummyHead;
while(prev.next != null){
if(prev.next.e.equals(e))
break;
prev = prev.next;
}
if(prev.next != null){
Node delNode = prev.next;
prev.next = delNode.next;
delNode.next = null;
size --;
}
}
@Override
public String toString(){
StringBuilder res = new StringBuilder();
Node cur = dummyHead.next;
while(cur != null){
res.append(cur + "->");
cur = cur.next;
}
res.append("NULL");
return res.toString();
}
}
实现集合
import java.util.ArrayList;
/**
* @Author: Cui
* @Date: 2020/12/21
* @Description:
*/
public class LinkedListSet<E> implements Set<E>{
private LinkedList<E> list;
public LinkedListSet(){
list = new LinkedList<>();
}
@Override
public void add(E e) {
if(!list.contains(e)){
list.addFirst(e);
}
}
@Override
public void remove(E e) {
list.removeElement(e);
}
@Override
public boolean contains(E e) {
return list.contains(e);
}
@Override
public int getSize() {
return list.getSize();
}
@Override
public boolean isEmpty() {
return list.isEmpty();
}
}
测试需要用到的文件读取代码
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Scanner;
import java.util.Locale;
import java.io.File;
import java.io.BufferedInputStream;
import java.io.IOException;
public class FileOperation {
// 读取文件名称为filename中的内容,并将其中包含的所有词语放进words中
public static boolean readFile(String filename, ArrayList<String> words){
if (filename == null || words == null){
System.out.println("filename is null or words is null");
return false;
}
// 文件读取
Scanner scanner;
try {
File file = new File(filename);
if(file.exists()){
FileInputStream fis = new FileInputStream(file);
scanner = new Scanner(new BufferedInputStream(fis), "UTF-8");
scanner.useLocale(Locale.ENGLISH);
}
else
return false;
}
catch(IOException ioe){
System.out.println("Cannot open " + filename);
return false;
}
// 简单分词
// 这个分词方式相对简陋, 没有考虑很多文本处理中的特殊问题
// 在这里只做demo展示用
if (scanner.hasNextLine()) {
String contents = scanner.useDelimiter("\\A").next();
int start = firstCharacterIndex(contents, 0);
for (int i = start + 1; i <= contents.length(); )
if (i == contents.length() || !Character.isLetter(contents.charAt(i))) {
String word = contents.substring(start, i).toLowerCase();
words.add(word);
start = firstCharacterIndex(contents, i);
i = start + 1;
} else
i++;
}
return true;
}
// 寻找字符串s中,从start的位置开始的第一个字母字符的位置
private static int firstCharacterIndex(String s, int start){
for( int i = start ; i < s.length() ; i ++ )
if( Character.isLetter(s.charAt(i)) )
return i;
return s.length();
}
}
读者测试可以在项目的根目录下放置一个文件用来读取。
我在这里放置的是傲慢与偏见和双城记的英文版
主函数
public static void main(String[] args) {
System.out.println("Pride and Prejudice");
ArrayList<String> words1 = new ArrayList<>();
if(FileOperation.readFile("pride-and-prejudice.txt", words1)) {
System.out.println("Total words: " + words1.size());
LinkedListSet<String> set1 = new LinkedListSet<>();
for (String word : words1)
set1.add(word);
System.out.println("Total different words: " + set1.getSize());
}
System.out.println();
System.out.println("A Tale of Two Cities");
ArrayList<String> words2 = new ArrayList<>();
if(FileOperation.readFile("a-tale-of-two-cities.txt", words2)){
System.out.println("Total words: " + words2.size());
LinkedListSet<String> set2 = new LinkedListSet<>();
for(String word: words2)
set2.add(word);
System.out.println("Total different words: " + set2.getSize());
}
}
可以很明显感觉到运行时间很慢
使用树来实现,先贴出树实现的代码
import java.util.LinkedList;
import java.util.Queue;
import java.util.Stack;
/**
* @Author: Cui
* @Date: 2020/12/14
* @Description:
*/
public class BST<E extends Comparable<E>> {
private class Node{
public E e;
public Node right,left;
public Node(E e){
this.e=e;
left=null;
right=null;
}
}
private Node root;
private int size;
public BST(){
root=null;
size=0;
}
public int size(){
return size;
}
public boolean isEmpty(){
return size==0;
}
public void add(E e){
if(root == null){
root = new Node(e);
size++;
}else
add(root,e);
}
private Node add(Node node,E e){
if(node==null){
size++;
return new Node(e);
}
if(e.compareTo(node.e) < 0)
node.left=add(node.left,e);
else if(e.compareTo(node.e) > 0)
node.right=add(node.right,e);
return node;
}
public boolean contains(E e){
return contains(root,e);
}
private boolean contains(Node node, E e) {
if(node == null)
return false;
if(e.compareTo(node.e) == 0){
return true;
}else if(e.compareTo(node.e) < 0){
return contains(node.left,e);
}else {
return contains(node.right,e);
}
}
//递归型前序遍历
public void preOrder(){
preOrder(root);
}
private void preOrder(Node node) {
if(node == null){
return;
}
System.out.println(node.e);
preOrder(node.left);
preOrder(node.right);
}
//非递归型前序遍历
public void preOrderNR(){
Stack<Node> stack = new Stack<>();
stack.push(root);
while (!stack.isEmpty()){
Node cur = stack.pop();
System.out.println(cur.e);
if(cur.right!=null)
stack.push(cur.right);
if(cur.left!=null)
stack.push(cur.left);
}
}
//二分搜索树的层序遍历,即广度遍历
public void levelOrder(){
Queue<Node> q = new LinkedList<>();
q.add(root);
while (!q.isEmpty()){
Node cur = q.remove();
System.out.println(cur.e);
if(cur.left!=null){
q.add(cur.left);
}
if(cur.right!=null){
q.add(cur.right);
}
}
}
//寻找二分搜索树中的最小元素
public E minimum(){
if(size==0){
throw new IllegalArgumentException("BST is empty");
}
return minimum(root).e;
}
//返回以node为根的二分搜索树的最小值所在的结点
private Node minimum(Node node) {
if(node.left == null){
return node;
}
return minimum(node.left);
}
//寻找二分搜索树中的最大元素
public E maximum(){
if(size==0){
throw new IllegalArgumentException("BST is empty");
}
return maximum(root).e;
}
//返回以node为根的二分搜索树的最小值所在的结点
private Node maximum(Node node) {
if(node.right == null){
return node;
}
return minimum(node.right);
}
//二分搜索树中删除最小结点所在结点
public E removeMin(){
E ret = minimum();
root = removeMin(root);
return ret;
}
//删除掉以node为根的二分搜索树的最小结点
//返回删除结点后的新的二分搜索树的根
private Node removeMin(Node node) {
if(node.left==null){
Node rightNode = node.right;
node.right = null;
size--;
return rightNode;
}
node.left = removeMin(node.left);
return node;
}
//二分搜索树中删除最大结点所在结点
public E removeMax(){
E ret = maximum();
root = removeMax(root);
return ret;
}
//删除掉以node为根的二分搜索树的最大结点
//返回删除结点后的新的二分搜索树的根
private Node removeMax(Node node) {
if(node.right==null){
Node rightNode = node.left;
node.left = null;
size--;
return rightNode;
}
node.right = removeMin(node.right);
return node;
}
public void remove(E e){
root = remove(root,e);
}
private Node remove(Node node, E e) {
if(node==null)
return null;
if(e.compareTo(node.e)<0){
node.left = remove(node.left,e);
return node;
}else if(e.compareTo(node.e)>0){
node.right = remove(node.right,e);
return node;
}else {//e==node.e
//待删除的结点左子树为空
if(node.left == null){
Node rightNode = node.right;
node.right = null;
size--;
return rightNode;
}
//待删除的结点右子树为空
if(node.right == null){
Node leftNode = node.left;
node.right = null;
size--;
return leftNode;
}
//待删除的结点左右子树均不为空
//找到比待删除结点大的最小结点,即删除结点右子树的最小节点
//用这个结点来顶替待删除结点的位置
Node successor = minimum(node.right);
successor.right = removeMin(node.right);
successor.left = node.left;
node.left = null;
node.right = null;
return successor;
}
}
//从二叉搜索树中寻找一个结点,此结点是不小于且最接近传入参数的结点
public Node ceil(E e){
Node node = floor(root,e);
return node;
}
//传入一个结点和一个值,从以这个结点为根节点的二叉树中递归的寻找值
private Node ceil(Node node, E e) {
if(node == null){
return null;
}
//待寻找的结点和此结点值相等,即此结点为待寻找的结点最接近的值
if(node.e.compareTo(e) == 0){
return node;
}
//此结点大于待寻找的结点,进入此结点的左孩子继续寻找
if(node.e.compareTo(e) < 0){
return ceil(node.right,e);
}
//此结点的值大于待寻找的值,进入左孩子继续寻找
Node tempNode = ceil(node.left, e);
//从左孩子里找到的值,如果不为空,则此值为解,如果为空,则返回此结点的值
if(tempNode != null){
return tempNode;
}
return node;
}
//从二叉搜索树中寻找一个结点,此结点是不大于且最接近传入参数的结点
public Node floor(E e){
Node node = floor(root,e);
return node;
}
private Node floor(Node node, E e) {
if(node == null){
return null;
}
//待寻找的结点和此结点值相等,即此结点为待寻找的结点最接近的值
if(node.e.compareTo(e) == 0){
return node;
}
//此结点大于待寻找的结点,进入此结点的左孩子继续寻找
if(node.e.compareTo(e) > 0){
return floor(node.left,e);
}
//此结点的值小于待寻找的值,进入右孩子继续寻找
Node tempNode = floor(node.right, e);
//从右孩子里找到的值,如果不为空,则此值为解,如果为空,则返回此结点的值
if(tempNode != null){
return tempNode;
}
return node;
}
@Override
public String toString() {
StringBuilder res = new StringBuilder();
generateBSTString(root,0,res);
return res.toString();
}
//生成以node为根节点,深度为depth的描述二叉树的字符串
private void generateBSTString(Node node, int depth, StringBuilder res) {
if(node == null){
res.append(generateDepthString(depth)+"null\n");
return;
}
res.append(generateDepthString(depth)+node.e+"\n");
generateBSTString(node.left,depth+1,res);
generateBSTString(node.right,depth+1,res);
}
private String generateDepthString(int depth) {
StringBuilder res = new StringBuilder();
for(int i=0;i<depth;i++){
res.append("--");
}
return res.toString();
}
}
实现的集合
/**
* @Author: Cui
* @Date: 2020/12/21
* @Description:
*/
public class BSTSet<E extends Comparable<E>> implements Set<E>{
private BST<E> bst;
public BSTSet(){
bst = new BST<>();
}
@Override
public void add(E e) {
bst.add(e);
}
@Override
public void remove(E e) {
bst.remove(e);
}
@Override
public boolean contains(E e) {
return bst.contains(e);
}
@Override
public int getSize() {
return bst.size();
}
@Override
public boolean isEmpty() {
return bst.isEmpty();
}
}
继续使用刚刚那个测试的例子进行性能比较
public class Main {
public static double testSet(Set<String> set,String filename){
long startTime = System.nanoTime();
ArrayList<String> words = new ArrayList<>();
if(FileOperation.readFile(filename, words)) {
System.out.println("Total words: " + words.size());
for (String word : words)
set.add(word);
System.out.println("Total different words: " + set.getSize());
}
long endTime = System.nanoTime();
return (endTime-startTime)/1000000000.0;
}
public static void main(String[] args) {
String filename = "pride-and-prejudice.txt";
BSTSet<String> bstSet = new BSTSet<>();
double time = testSet(bstSet,filename);
System.out.println("BST Set:"+time+"s");
System.out.println();
LinkedListSet<String> linkedListSet = new LinkedListSet<>();
double time1 = testSet(linkedListSet,filename);
System.out.println("LinkedList Set:"+time1+"s");
}
}
可以看出使用树实现的集合进行统计傲慢与偏见时需要0.1秒,而链表需要2.2秒,22倍。。。
可以看出二分搜索树的性能在实现集合的这个场景下是比链表要高效很多的。