本文主要是介绍从html文件中获取到所有标签:
- 主要还是字符串的操作
- 使用了Map
- 使用了Formatter类对输出格式进行控制,可以固定输出的长度以及设置对齐方式
import java.util.*;
import java.io.*;
public class ShowTags {
public static Map<String , Integer> SelectTags(String filePath){
Map<String , Integer> map = new HashMap<String, Integer>();
//读取文件
try{
String encoding = "UTF-8";
File file = new File(filePath);
if(file.exists() && file.isFile()){
InputStreamReader read = new InputStreamReader(new FileInputStream(file),encoding);
BufferedReader bufferedReader = new BufferedReader(read);
String lineTxt = null;
while((lineTxt = bufferedReader.readLine())!=null){
//System.out.println("lineTxt: "+ lineTxt);
for(int i = 1 ; i < lineTxt.length() ; i++){
String temp1 = lineTxt.substring(i-1,i);
// if(i == 1){
// System.out.println("temp1: " + temp1);
// }
//判断是否符合标签的第一个要求,以<开头
if(temp1.equals("<")){
for(int j = i+1 ; j < lineTxt.length() ; j++){
String temp2 = lineTxt.substring(j-1,j);
// if(j==2){
// System.out.println("temp2: " + temp2);
// }
//具体的一些判断
boolean ano_con = false;
if(temp2.equals("!") || temp2.equals("/") || temp2.equals("!") || temp2.equals("'") || temp2.equals(" ")){
ano_con = true;
}
if(j == i+1 && ano_con){
i = j - 1;
break;
}else if(temp2.equals("'")){
i = j - 1;
break;
}else if(temp2.equals(" ")||temp2.equals(">") || temp2.equals("/")){
boolean have = false;
String temp3 = lineTxt.substring(i,j-1);
//System.out.println("temp3: " + temp3);
for(String key:map.keySet()){
if(key.equals(temp3)){
map.put(key, map.get(key)+1);
have = true;
i = j-1;
break;
}
}
boolean blank = true;
if(temp3.equals("\r") || temp3.equals("//") || temp3.equals("\\") || temp3.equals("\"")){
blank = false;
}
if(!have && blank){
map.put(temp3, 1);
i = j -1;
}
break;
}
}
}
}
}
read.close();
}else{
System.out.println("cant find the file!");
}
}catch(Exception e){
System.out.println("read file error!");
e.printStackTrace();
}
return map;
}
public static void main(String[] args){
String path = "D:\\eclipse\\WebJava2\\vacation.htm";
Map<String , Integer> answerMap = new HashMap<String,Integer>();
answerMap = SelectTags(path);
Formatter f = new Formatter(System.out);
//key和value分别输出20和15个字符长度,-号表示左边对齐
f.format("%-20s %-15s\n", "key", "value");
for(String key:answerMap.keySet()){
f.format("%-20s %-15s\n", key, answerMap.get(key));
}
}
}