java 解析html标签_Java解析HTML标签Tag

标签:dfaimport java.util.HashMap;

import java.util.Map;

public class TagParser {

private Integer index = 0;

private char[] tagChar;

private int position = 0;

/* 解析符号 */

private char symbol = '"';

public static final String START_SCRIPT = "

public static final String END_SCRIPT_1 = "/>";

public static final String EQ = "=";

public static final char SPACE = ' ';

public static final String MUST_SPACE = " ";

public static final String TAB = "";

public static final String SYMBOL = "'";

private DFAStatus status;

private Map map = new HashMap();

private Map result = new HashMap();

public static void main(String[] args) throws SymbolError {

String tag = "";

int i = 0;

long start = System.currentTimeMillis();

while (i < 10) {

TagParser token = new TagParser(tag);

token.parser();

System.out.println(token.getAttr("src"));

i++;

System.out.println(token);

}

System.out.println("use time:" + (System.currentTimeMillis() - start));

}

private boolean startsWith(String str) {

char[] chat = str.toCharArray();

if (position + chat.length > tagChar.length) {

return false;

}

for (int i = 0; i < chat.length; i++) {

if (tagChar[position + i] != chat[i]) {

if (is_az(chat[i])) {

if (tagChar[position + i] == chat[i] - 32) {

continue;

}

}

return false;

}

}

return true;

}

public void parser() throws SymbolError {

if (status == null) {

status = DFAStatus.UNSTART;

skipSpace();

}

if (status == DFAStatus.UNSTART) {

if (startsWith(START_SCRIPT)) {

position += START_SCRIPT.length();

status = DFAStatus.START;

parser();

} else {

throw new SymbolError("语法错误:" + tagChar[position]);

}

} else if (status == DFAStatus.START) {

nextSpace();

parser();

} else if (status == DFAStatus.NULL) {

skipSpace();

if (startsWith(END_SCRIPT_1)) {

status = DFAStatus.DONE;

done();

return;

} else if (startsWith(END_SCRIPT)) {

status = DFAStatus.DONE;

done();

return;

}

parserName();

parser();

} else if (status == DFAStatus.EQ) {

parserVal();

parser();

} else if (status == DFAStatus.SYMBOL_END) {

status = DFAStatus.NULL;

parser();

}

}

private void done() {

for (Entity entity : map.values()) {

result.put(entity.name, entity.value);

}

}

private void parserVal() throws SymbolError {

skipSpace();

StringBuilder builder = new StringBuilder();

int startIndex = position;

for (int i = position; i < tagChar.length; i++) {

if (i == startIndex) {

if (tagChar[i] == '\'' || tagChar[i] == '"') {

symbol = tagChar[i];

status = DFAStatus.SYMBOL_START;

position++;

} else {

throw new SymbolError("语法错误:" + tagChar[position]);

}

} else {

if (tagChar[i] == symbol) {

status = DFAStatus.SYMBOL_END;

position++;

break;

} else {

builder.append(tagChar[i]);

position++;

}

}

}

map.get(index).value = builder.toString();

index++;

}

private boolean is_AZ(char chat) {

return chat >= 65 && chat <= 90;

}

private boolean is_az(char chat) {

return chat >= 97 && chat <= 122;

}

private void parserName() throws SymbolError {

StringBuilder builder = new StringBuilder();

for (int i = position; i < tagChar.length; i++) {

if (is_az(tagChar[i]) || is_AZ(tagChar[i])) {

builder.append(tagChar[i]);

position++;

} else {

if (builder.length() > 0) {

skipSpace();

nextEQ();

break;

}

}

}

if (builder.toString().length() == 0) {

throw new SymbolError("语法错误:" + tagChar[position]);

}

map.put(index, new Entity(builder.toString()));

}

private void nextEQ() throws SymbolError {

if (startsWith(EQ)) {

position++;

status = DFAStatus.EQ;

} else {

throw new SymbolError("语法错误:" + tagChar[position]);

}

}

private void skipSpace() {

for (int i = position; i < tagChar.length; i++) {

if (tagChar[i] == SPACE || tagChar[i] == '\t') {

position++;

} else {

return;

}

}

}

private void nextSpace() throws SymbolError {

if (startsWith(MUST_SPACE)) {

position += MUST_SPACE.length();

status = DFAStatus.NULL;

} else if (startsWith(TAB)) {

position += TAB.length();

status = DFAStatus.NULL;

} else {

throw new SymbolError("语法错误:" + tagChar[position]);

}

}

public String getAttr(String name) {

return result.get(name);

}

public TagParser(String str) {

this.tagChar = str.toCharArray();

}

@Override

public String toString() {

return result.toString();

}

public static class SymbolError extends Exception {

private static final long serialVersionUID = 2441411373778495898L;

public SymbolError(String msg) {

super(msg);

}

}

public static class Entity {

public Entity(String name) {

this.name = name;

}

public String name;

public String value;

@Override

public String toString() {

return "[" + name + ":" + value + "]";

}

}

public enum DFAStatus {

UNSTART, START, SYMBOL_START, SYMBOL_END, DONE, NULL, EQ

}

}

标签:dfa

原文地址:http://blog.csdn.net/kevin_luan/article/details/39441309

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值