2021SC@SDUSC
engine部分。
后端核心engine部分架构:
本篇分析JsonParser。
JasonParser是底层JSon解析器,类似于Java使用stAx解析xml,但JsonParser仅仅解析Json。JsonParser实现相较于ObjectMapper更底层,因此解析速度更快,但相对复杂。
一般情况下,要创建JsonParser需先创建JsonFactory。JsonFactory用于创建JsonParser实例,应该包含有几个createParser方法,实现对不同json来源的接收。
比如:
String carJson =
"{ \"brand\" : \"Mercedes\", \"doors\" : 5 }";
JsonFactory factory = new JsonFactory();
JsonParser parser = factory.createParser(carJson);
创建完JsonParser实例后,可以用其解析json数据。JsonParser工作方式是将JSON分解成一系列标记(token),逐个迭代这些标记进行解析。
分解方法:
String carJson =
"{ \"brand\" : \"Mercedes\", \"doors\" : 5 }";
JsonFactory factory = new JsonFactory();
JsonParser parser = factory.createParser(carJson);
while(!parser.isClosed()){
JsonToken jsonToken = parser.nextToken();
System.out.println("jsonToken = " + jsonToken);
}
要JsonParser的isClosed()方法返回false,则说明源json仍有标记没有被解析。
通过JsonParser的nextToken()方法获得JsonToken,可以检查JsonToken实例的类型。
如果标记指针指向的是字段,JsonParser的getCurrentName()方法返回当前字段名称。getValueAsString() 返回当前标记值的字符串类型,同理 getValueAsInt()返回整型值,其他方法还有返回boolean, short, long, float, double 等类型。
本项目中使用的JsonParser :
package cn.edu.sdu.common;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
public class JsonParser {
private final String json;
public JsonParser(String json) {
this.json = json;
}
public Object parse() {
CharsRange trimmedJson = newRange(0, json.length()).trim();
return processValue(trimmedJson);
}
private Object processPlainObject(CharsRange range) {
List<Property> properties = processProperties(newRange(range.start + 1, range.end - 1));
Map<String, Object> object = new HashMap<>();
properties.forEach(prop -> object.put(prop.name, prop.value));
//System.out.println(object);
return object;
}
private List<Property> processProperties(CharsRange range) {
List<Property> properties = new ArrayList<>();
int nameStartMark = range.start;
for (int i = range.start; i < range.end; i++) {
char ch = json.charAt(i);
if (ch == ':') {
CharsRange nameToken = newRange(nameStartMark, i).trim();
AtomicInteger readCursor = new AtomicInteger();
CharsRange valueSegment = findNextValue(newRange(++i, range.end), readCursor);
i = readCursor.intValue() + 1;
nameStartMark = i;
//System.out.println("nameToken:"+nameToken+",valueSegment:"+valueSegment);
final String name = newRange(nameToken.start + 1, nameToken.end - 1).toString();
final Object value = processValue(valueSegment);
properties.add(Property.of(name, value));
}
}
return properties;
}
private List<?> processArray(CharsRange range) {
return processElements(newRange(range.start + 1, range.end - 1));
}
private List<?> processElements(CharsRange range) {
List<Object> array = new ArrayList<>();
int elementStartMark = range.start;
for (int i = range.start; i < range.end; i++) {
AtomicInteger readCursor = new AtomicInteger();
CharsRange elementSegment = findNextValue(newRange(elementStartMark, range.end), readCursor);
Object elementValue = processValue(elementSegment);
array.add(elementValue);
i = readCursor.intValue();
elementStartMark = i + 1;
}
return array;
}
/**
* @param chars
* @return value segment trimmed.
*/
private CharsRange findNextValue(CharsRange chars, AtomicInteger readCursor) {
CharsRange trimChars = chars.trimLeft();
if (trimChars.relativeChar(0) == '{') {
return completeSymbolPair(trimChars, readCursor, "{}");
} else if (trimChars.relativeChar(0) == '[') {
return completeSymbolPair(trimChars, readCursor, "[]");
} else {
int i;
for (i = trimChars.start + 1; i < trimChars.end; i++) {
char ch = json.charAt(i);
if (ch == ',') {
break;
}
}
readCursor.set(i);
return newRange(trimChars.start, i).trim();
}
}
private CharsRange completeSymbolPair(CharsRange trimChars, AtomicInteger readCursor, String symbolPair) {
int leftSymbol = symbolPair.charAt(0);
int rightSymbol = symbolPair.charAt(1);
int symbolsScore = 1;
//nested object
int i;
CharsRange valueSegment = null;
for (i = trimChars.start + 1; i < trimChars.end; i++) {
char ch = json.charAt(i);
if (ch == leftSymbol) {
symbolsScore++;
} else if (ch == rightSymbol) {
symbolsScore--;
}
if (symbolsScore == 0) {
valueSegment = newRange(trimChars.start, i + 1);
break;
}
}
for (; i < trimChars.end; i++) {
char chx = json.charAt(i);
if (chx == ',') {
break;
}
}
readCursor.set(i);
return valueSegment;
}
private Object processValue(CharsRange valueSegment) {
final Object value;
if (valueSegment.relativeChar(0) == '"') {
value = newRange(valueSegment.start + 1, valueSegment.end - 1).toString();
} else if (valueSegment.relativeChar(0) == '{') {
value = processPlainObject(valueSegment);
} else if (valueSegment.relativeChar(0) == '[') {
value = processArray(valueSegment);
} else if (valueSegment.equalsString("true")) {
value = true;
} else if (valueSegment.equalsString("false")) {
value = false;
} else if (valueSegment.equalsString("null")) {
value = null;
} else {
value = Double.parseDouble(valueSegment.toString());
}
return value;
}
static class Property {
final String name;
final Object value;
Property(String name, Object value) {
this.name = name;
this.value = value;
}
static Property of(String name, Object value) {
return new Property(name, value);
}
}
CharsRange newRange(int start, int end) {
return new CharsRange(start, end);
}
class CharsRange {
final int start;
final int end;
CharsRange(int start, int end) {
this.start = start;
this.end = end;
}
CharsRange trimLeft() {
int newStart = -1;
for (int i = start; i < end; i++) {
if (!Character.isWhitespace(json.charAt(i))) {
newStart = i;
break;
}
}
if (newStart == -1) {
throw new IllegalArgumentException("illegal blank string!");
}
return newRange(newStart, end);
}
CharsRange trimRight() {
int newEnd = -1;
for (int i = end - 1; i >= start; i--) {
if (!Character.isWhitespace(json.charAt(i))) {
newEnd = i + 1;
break;
}
}
if (newEnd == -1) {
throw new IllegalArgumentException("illegal blank string!");
}
return newRange(start, newEnd);
}
CharsRange trim() {
return this.trimLeft().trimRight();
}
char relativeChar(int index) {
return json.charAt(start + index);
}
public boolean equalsString(String str) {
return json.regionMatches(true, start, str, 0, str.length());
}
@Override
public String toString() {
return json.subSequence(start, end).toString();
}
}
}
构造方法中传入一个json对象
public JsonParser(String json) {
this.json = json;
}
parse入口方法
return 解析完成的对象
public Object parse() {
CharsRange trimmedJson = newRange(0, json.length()).trim();
return processValue(trimmedJson);
}
定义私有方法processPlainObject中传入一个CharsRange对象,处理范围。
private Object processPlainObject(CharsRange range) {
List<Property> properties = processProperties(newRange(range.start + 1, range.end - 1));
Map<String, Object> object = new HashMap<>();
properties.forEach(prop -> object.put(prop.name, prop.value));
//System.out.println(object);
return object;
}
CharsRange作为内部类实现:
class CharsRange {
final int start;
final int end;
CharsRange(int start, int end) {
this.start = start;
this.end = end;
}
CharsRange trimLeft() {
int newStart = -1;
for (int i = start; i < end; i++) {
if (!Character.isWhitespace(json.charAt(i))) {
newStart = i;
break;
}
}
if (newStart == -1) {
throw new IllegalArgumentException("illegal blank string!");
}
return newRange(newStart, end);
}
CharsRange trimRight() {
int newEnd = -1;
for (int i = end - 1; i >= start; i--) {
if (!Character.isWhitespace(json.charAt(i))) {
newEnd = i + 1;
break;
}
}
if (newEnd == -1) {
throw new IllegalArgumentException("illegal blank string!");
}
return newRange(start, newEnd);
}
CharsRange trim() {
return this.trimLeft().trimRight();
}
char relativeChar(int index) {
return json.charAt(start + index);
}
public boolean equalsString(String str) {
return json.regionMatches(true, start, str, 0, str.length());
}
@Override
public String toString() {
return json.subSequence(start, end).toString();
}
}
给出了匹配处理方法。
private List<Property> processProperties(CharsRange range) {
List<Property> properties = new ArrayList<>();
int nameStartMark = range.start;
for (int i = range.start; i < range.end; i++) {
char ch = json.charAt(i);
if (ch == ':') {
CharsRange nameToken = newRange(nameStartMark, i).trim();
AtomicInteger readCursor = new AtomicInteger();
CharsRange valueSegment = findNextValue(newRange(++i, range.end), readCursor);
i = readCursor.intValue() + 1;
nameStartMark = i;
//System.out.println("nameToken:"+nameToken+",valueSegment:"+valueSegment);
final String name = newRange(nameToken.start + 1, nameToken.end - 1).toString();
final Object value = processValue(valueSegment);
properties.add(Property.of(name, value));
}
}
return properties;
}
List存储property相关:
private List<Property> processProperties(CharsRange range)
private List<?> processArray(CharsRange range)
private List<?> processElements
processvalue方法用分支结构处理,匹配、检测:
(整个处理部分其实可以优化)
private Object processValue(CharsRange valueSegment) {
final Object value;
if (valueSegment.relativeChar(0) == '"') {
value = newRange(valueSegment.start + 1, valueSegment.end - 1).toString();
} else if (valueSegment.relativeChar(0) == '{') {
value = processPlainObject(valueSegment);
} else if (valueSegment.relativeChar(0) == '[') {
value = processArray(valueSegment);
} else if (valueSegment.equalsString("true")) {
value = true;
} else if (valueSegment.equalsString("false")) {
value = false;
} else if (valueSegment.equalsString("null")) {
value = null;
} else {
value = Double.parseDouble(valueSegment.toString());
}
return value;
}
查看当前的json定义:
主要是文档的名称、作者以及朝代等基础信息,预计后期开发还会进一步修改。根据新调整的xml标准,后期还要重新制定。
{
"document_info":{
"name":"文章名称",
"Author": "作者名称",
"dynasty": "朝代",
"sections": ["Section.id", "Section.id", "Section.id", "......"],
"......": "......"
},
"data":{
"Page": [
{
"id": "",
"image": ""
}
],
"Char": [
{
"id": "",
"page": "Page.id",
"points": [[0,0],[0,0],"..."],
"x":0,
"y":0,
"word":"",
"sentence":"",
"paragraph":"",
"section":""
}
],
"Word": [
{
"id": "",
"page": "Page.id",
"chars": ["Char.id", "Char.id", "Char.id", "......"]
}
],
"Sentence": [
{
"id": "",
"page": "Page.id",
"words": ["Word.id", "Word.id", "Word.id", "......"]
}
],
"Paragraph": [
{
"id": "",
"page": "Page.id",
"sentences": ["Sentence.id", "Sentence.id", "Sentence.id", "......"]
}
],
"Section": [
{
"id": "",
"page": "Page.id",
"paragraphs": ["Paragraph.id", "Paragraph.id", "Paragraph.id", "......"]
}
]
}
}