实现xml解析器

1 篇文章 0 订阅
1 篇文章 0 订阅
属性值不支持空格
package httpclient;

import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;

class ParseException extends Exception{
    public ParseException(String message) {
        super(message);
    }
}
class RInputStream extends InputStream{

    Integer buffer;

    private InputStream in;

    public RInputStream(InputStream in) {
        this.in = in;
    }

    public void returnChar(char c){
        buffer=(int)c;
    }

    @Override
    public int read() throws IOException {

        if(buffer!=null){
            int tmp= buffer;
            buffer=null;
            return tmp;
        }else{
            return in.read();
        }
    }

}
public class XmlUtil {
    public static void error(String str){
        System.out.println(str);
    }
    public static Element parse(RInputStream in) throws IOException {
        char c;
        int a;
        boolean init=true;
        while((a=in.read())!=-1){
            c=(char)a;
            if(init){
                if(space(c)){
                    continue;
                }else if(c!='<'){
                    error("最开始字符需要<!");
                    return null;
                }else{
                    init=false;
                }
            }else{
                if(space(c)){
                    continue;
                }else if(c=='<'){
                    error("头行两个<!");
                }else if(c=='>'){
                    break;
                }
            }
        }
        c = firstNonNull(in);
        if(c!='<') {
            error("需要< 但是发现" + c);
            return null;
        }
        Element element = null;
        try {
            element = parseBody(in);
        } catch (ParseException e) {
            error(e.getMessage());
        } catch (IOException e) {
            error(e.getMessage());
        }
        in.close();
        return element;
    }
    public static Element parseBody(RInputStream in) throws ParseException,IOException{
        boolean leaveBegin=false;
        boolean gotEleName=false;
        boolean gettingEleName=false;
        boolean gettingValue=false;
        boolean isPlainValue=false;
        boolean gotAttrName=false;
        boolean gettingAttrName=false;
        boolean gotEq=false;
        boolean gettingAttrValue=false;
        boolean spaceArrowGenerateAttrHadStore=true;
        char c;
        int a;
        StringBuilder eleName = new StringBuilder();
        ArrayList<Object> value = new ArrayList<Object>();
        StringBuilder plainValue = new StringBuilder();
        HashMap<String, String> attributes = new HashMap<String, String>();
        StringBuilder attrName = new StringBuilder();
        StringBuilder attrValue = new StringBuilder();
        Element N = new Element(null, null, null);
        while((a=in.read())!=-1){
            c=(char)a;
            if(leaveBegin){

                if(gettingValue){
                    if(isPlainValue){
                        if(space(c)){
                            if(firstNonNull(in)!='<'){
                                throw new ParseException("已获取元素值但出现了"+c);
                            }else{
                                if(firstNonNull(in)!='/'){
                                    throw new ParseException("已获取元素值但出现了<"+c);
                                }else{
                                    String endEleName = parseEnd(in);
                                    if(!eleName.toString().equals(endEleName)){
                                        throw new ParseException("前后eleName不同"+eleName.toString()+" "+endEleName);
                                    }else{
                                        break;
                                    }
                                }
                            }
                        }else if(c=='>'){
                            throw new ParseException("正在获取字符串元素值但出现了>");
                        }else if(c=='<'){
                            if(firstNonNull(in)!='/'){
                                throw new ParseException("已获取元素值但出现了<"+c);
                            }else{
                                String endEleName = parseEnd(in);
                                if(!eleName.toString().equals(endEleName)){
                                    throw new ParseException("前后eleName不同"+eleName.toString()+" "+endEleName);
                                }else{
                                    break;
                                }
                            }
                        }else{
                            plainValue.append(c);
                        }
                    }else{//是元素值
                        if(space(c)){
                            continue;
                        }else if(c!='<'){
                            throw new Error("正在获取元素子元素值但出现了"+c);
                        }else{
                            c=firstNonNull(in);
                            if(c=='/'){
                                String endEleName = parseEnd(in);
                                if(!eleName.toString().equals(endEleName)){
                                    throw new ParseException("前后eleName不同"+eleName.toString()+" "+endEleName);
                                }else{
                                    break;
                                }
                            }else{
                                in.returnChar(c);
                                Element subElementN = parseBody(in);
                                value.add(subElementN);
                            }
                        }
                    }
                }else{//     未正在获取值
                    if(space(c)){
                        continue;
                    }else if(c=='>'){
                        throw new ParseException("将要获取元素值但出现了>");
                    }else if(c=='<'){
                        c=firstNonNull(in);
                        if(c=='/'){
                            String endEleName = parseEnd(in);
                            if(!eleName.toString().equals(endEleName)){
                                throw new ParseException("前后eleName不同"+eleName.toString()+" "+endEleName);
                            }else{
                                break;
                            }
                        }else{
                            in.returnChar(c);
                            Element subElement = parseBody(in);
                            value.add(subElement);
                            gettingValue=true;
                        }
                    }else{
                        plainValue.append(c);
                        gettingValue=true;
                        isPlainValue=true;
                    }
                }
            }else { //未离开开始标签
                if(gotEleName){//已获取eleName
                    if(gettingAttrName){
                        if(gotAttrName){
                            if(gotEq){
                                if(gettingAttrValue){
                                    if(space(c)){
                                        gotAttrName=false;
                                        gettingAttrName=false;
                                        gettingAttrValue=false;
                                        gotEq=false;
                                        spaceArrowGenerateAttrHadStore=true;
                                        attributes.put(attrName.toString(),attrValue.toString());
                                        attrName.delete(0,attrName.length());
                                        attrValue.delete(0,attrValue.length());
                                    }else if(c=='<'){
                                        throw new ParseException("正在获取属性值 但是出现了<");

                                    }else if(c=='>'){
                                        if(!spaceArrowGenerateAttrHadStore)
                                            attributes.put(attrName.toString(),attrValue.toString());
                                        leaveBegin=true;
                                    }else{
                                        attrValue.append(c);
                                    }
                                }else{//未正在获取属性值
                                    if(space(c)){
                                        continue;
                                    }else if(c=='<' || c=='>'){
                                        throw new ParseException("未获取属性值但出现了"+c);
                                    }else{
                                        attrValue.append(c);
                                        gettingAttrValue=true;
                                    }
                                }
                            }else{//未获取Eq
                                if(space(c)){
                                    continue;
                                }else if(c=='<' || c=='>'){
                                    throw new ParseException("已获取属性名未获取属性值,但出现了"+c);
                                }else if (c!='='){
                                    throw new ParseException("以获取属性名 需要= 但出现了"+c);
                                }else{
                                    gotEq=true;
                                }
                            }
                        }else{//未获取属性名

                            if(space(c)){
                                gotAttrName=true;
                            }else if(c=='='){
                                gotAttrName=true;
                                gotEq=true;
                            }else if(c=='<'|| c=='>'){
                                throw new ParseException("正在获取属性 未获取属性名 但是出现了"+c);
                            }else{
                                attrName.append(c);
                            }
                        }
                    }else{//未正在获取属性
                        if(space(c)){
                            continue;
                        }else if(c=='<'){
                            throw new ParseException("已获取eleName,需要结束标签,但是发现了<");
                        }else if(c=='>'){
                            leaveBegin=true;
                        }else {
                            spaceArrowGenerateAttrHadStore=false;
                            attrName.append(c);
                            gettingAttrName=true;
                        }
                    }
                }else{ //未获取eleName
                    if(gettingEleName){//正在获取eleName
                        if(space(c)){
                            gotEleName=true;
                        }else{
                            if(c=='<'){
                                throw new ParseException("正在获取eleName 但出现了"+c);
                            }else if (c=='>'){
                                leaveBegin=true;
                            }else{
                                eleName.append(c);
                            }
                        }
                    }else{//未正在获取eleName
                        if (space(c)){
                            continue;
                        }else{
                            if(c=='<' || c=='>'){
                                throw new ParseException("尚未开始获取eleName 但出现了"+c);
                            }else{
                                eleName.append(c);
                                gettingEleName=true;
                            }
                        }
                    }
                }
            }
        }
        N.setElementName(eleName.toString());
        N.setAttributes(attributes);
        N.setPlainValue(isPlainValue);
        if(isPlainValue)
            N.setValue(plainValue.toString());
        else
            N.setValue(value);

        return N;
    }
    public static char firstNonNull(RInputStream in) throws IOException {
        int a;
        char c=' ';
        while((a=in.read())!=-1){
            c=(char)a;
            if(space(c)){
                continue;
            }else{
                return c;
            }
        }
        return c;
    }
    public static String parseEnd(InputStream in) throws ParseException,IOException{
        boolean gettingEndEleName=false;
        boolean gotEndEleName=false;
        StringBuilder eleName = new StringBuilder();
        char c;
        int a;
        while((a=in.read())!=-1){
            c=(char)a;
            if(gotEndEleName){

                if(space(c)){
                    continue;
                }else if (c!='>'){
                    throw new ParseException("需要> d但发现"+c);
                }else
                    return eleName.toString();
            }else{
                if(gettingEndEleName){
                    if(space(c)){
                        gotEndEleName=true;
                    }else if(c=='<'){
                        throw new ParseException("两个连续的<");
                    }else if(c=='>'){
                        return eleName.toString();
                    }else
                        eleName.append(c);
                }else{
                    if (space(c)){
                        continue;
                    }else if(c=='>' || c=='<'){
                        throw new ParseException("未获取到eleName 获取< , >已经结束");
                    }else{
                        eleName.append(c);
                        gettingEndEleName=true;
                    }

                }
            }
        }
        return eleName.toString();
    }
    public static boolean space(char c){
        if( c=='\t' ||c=='\n' || c=='\r'|| c==' '){
            return true;
        }
        return false;
    }

    public static void main(String[] args) throws IOException {
        Element element = parse(new RInputStream(XmlUtil.class.getResourceAsStream("/pom.xml")));
        element.print(null);
    }
}
package httpclient;

import java.util.HashMap;
import java.util.List;

public class Element {
    private String elementName;
    private Object value;
    private HashMap<String,String> attributes;
    private boolean isPlainValue;

    public boolean isPlainValue() {
        return isPlainValue;
    }

    public void setPlainValue(boolean plainValue) {
        isPlainValue = plainValue;
    }

    public Element(String elementName, Object value, HashMap<String, String> attributes) {
        this.elementName = elementName;
        this.value = value;
        this.attributes = attributes;
    }

    Element getChildByName(String childName){
        if(!isPlainValue){
            List<Element> value = (List<Element>) this.value;
            for(Element ele:value){
                if(ele.getElementName().equals(childName)){
                    return ele;
                }
            }
        }
        return null;
    }

    public void print(Integer i){
        if(i==null)
            i=0;
        indentation(i,this.elementName);
        if(!this.attributes.isEmpty())
            indentation(i,this.attributes.toString());
        if(isPlainValue)
            indentation(i,this.value.toString());
        else{
            List<Element> list = (List<Element>) this.value;
            for(int m=0;m<list.size();m++){
                list.get(m).print(i+1);
                if(m!=list.size()-1)
                indentation(i+1,"-------------");
            }
        }
    }
    private void indentation(int i,String string){
        int j=i;
        while(j--!=0)System.out.print('\t');
        System.out.println(string);
    }

    public String getElementName() {
        return elementName;
    }

    public void setElementName(String elementName) {
        this.elementName = elementName;
    }

    public Object getValue() {
        return value;
    }

    public void setValue(Object value) {
        this.value = value;
    }

    public HashMap<String, String> getAttributes() {
        return attributes;
    }

    public void setAttributes(HashMap<String, String> attributes) {
        this.attributes = attributes;
    }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值