在项目中可能会出现这样的场景:需要从一段文本中解析出数据,
列如:需要从下文找出注红的数据
FSI/*CX
S KA 909Y22MAR PEK1630 2020HKG0X 333
S CX 806Y23MAR HKG1150 1315ORD0S 77W
01 YOW2+YX2 17758 CNY INCL TAX
*SYSTEM DEFAULT-CHECK OPERATING CARRIER
*INTERLINE AGREEMENT PRICING APPLIED
*ATTN PRICED ON 21JAN14*1307
BJS
XHKG YOW2 NVB NVA22MAR 2PC
CHI YX2 NVB NVA22MAR 2PC
FARE CNY 16480
TAX CNY 90CN CNY 94HK CNY 1094XT
TOTAL CNY 17758
22MAR14BJS KA X/HKG563.99CX CHI Q4.25 2140.91NUC2709.15END R
OE6.081590
XT CNY 106US CNY 31XA CNY 43XY CNY 34YC CNY 880YR
ENDOS 02 *T1
*AUTO BAGGAGE INFORMATION AVAILABLE - SEE FSB
RFSONLN/1E /EFEP_13/FCC=T/
通过下面这个解析类,可以实现我们的功能,主要用到了正则表达式的()捕获功能
package cn.test;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class QTaxParser1 {
private static final String QTAX_PATTERN = "^[0-9]{1,2}(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC).*";
private static final String TAX_PATTERN = "^TAX.*";
private static final String NUM_PATTERN = "([0-9]+)([A-Z]+) *";
private static final String QNUM_PATTERN = "Q([0-9]+\\.{0,1}[0-9]*)";
private static final String QROE_PATTERN = "\\s+R\\s*O\\s*E\\s*(([0-9]\\s*)+(\\.\\s*){0,1}([0-9]\\s*)*)\\s+";
private static final String RATE_PATTERN = "=([0-9]+\\.{0,1}[0-9]*)";
private Map<String, Double> tax = new HashMap<String, Double>();
private List<Double> qTax = new ArrayList<Double>();
private Double roe;
private static Logger log = LoggerFactory.getLogger(QTaxParser1.class);
public Map<String, Double> getTax(String txt){
// 分解出TAX 行
List<String> taxLine = parase(txt, TAX_PATTERN);
if ((taxLine != null) && (taxLine.size() > 0)) {
// 处理TAX 行
List<String> taxItem = parase(taxLine.get(0), NUM_PATTERN);
for (int i = 0; i < taxItem.size(); i += 2) {
tax.put(taxItem.get(i + 1), Double.parseDouble(taxItem.get(i)));
}
}
return tax;
}
public List<Double> getQTax(String txt){
// 分解出TAX 行
List<String> taxLine = parase(txt, TAX_PATTERN);
if ((taxLine != null) && (taxLine.size() > 0)) {
// 分解出Q行
List<String> qTaxLine = parase(txt, QTAX_PATTERN, false);
if ((qTaxLine != null) && (qTaxLine.size() > 0)) {
// 处理QTAX 行
List<String> qTaxItem = parase(qTaxLine.get(0), QNUM_PATTERN);
// 提取Q值
for (int i = 0; i < qTaxItem.size(); i++) {
qTax.add(Double.parseDouble(qTaxItem.get(i)));
}
}
}
return qTax;
}
public Double getROE(String txt) {
// 分解出ROE行
List<String> roeItem = parase(txt, QROE_PATTERN);
// 提取ROE值
if (roeItem.size() > 0) {
roe = Double.parseDouble(roeItem.get(0).replaceAll("\\s*", ""));
}
return roe;
}
public boolean isTaxPage(String txt) {
Pattern ptn = Pattern.compile(QTAX_PATTERN, Pattern.MULTILINE);
Matcher m = ptn.matcher(txt);
if (m.find()) {
log.debug("TAX Match:" + m.group());
return true;
}
return false;
}
public String getRateValue(String txt) {
List<String> rates = parase(txt, RATE_PATTERN);
if (rates.size() > 0) {
return parase(txt, RATE_PATTERN).get(0);
} else {
return null;
}
}
private List<String> parase(String txt, String pattern) {
return parase(txt, pattern, true);
}
private static List<String> parase(String txt, String pattern, boolean grouped) {
Pattern ptn = Pattern.compile(pattern, Pattern.MULTILINE);
Matcher m = ptn.matcher(txt);
List<String> matches = new ArrayList<String>();
if (!grouped || (m.groupCount() == 0)) {
if (m.find()) {
matches.add(m.group());
}
} else {
while (m.find()) {
for (int i = 1; i <= m.groupCount(); i++) {
matches.add(m.group(i));
}
}
}
return matches;
}
}
测试用例
package itour.cn.fare.gateway;
import cn.test.QTaxParser1;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
public class QTaxParserTest {
public static void main(String[] args) {
QTaxParser1 parser = new QTaxParser1();
String txt =" FSICH/*CX "+
"\n"+
"S KA 909Y22MAR PEK1630 2020HKG0X 333 "+
"\n"+
"S CX 806Y23MAR HKG1150 1315ORD0S 77W "+
"\n"+
"01 YOW2+YX2 CH 13464 CNY INCL TAX"+
"\n"+
"*SYSTEM DEFAULT-CHECK OPERATING CARRIER "+
"\n"+
"*INTERLINE AGREEMENT PRICING APPLIED"+
"\n"+
"*ACCOMPANIED VALIDATION-ALL PAX MUST BE TKTD AT SAME TIME "+
"\n"+
"*VERIFY AGE REQUIREMENTS"+
"\n"+
"*ATTN PRICED ON 21JAN14*1158"+
"\n"+
"BJS"+
"\n"+
"XHKG YOW2 CH25 NVB NVA22MAR 2PC "+
"\n"+
" CHI YX2 CH25 NVB NVA22MAR 2PC "+
"\n"+
"FARE CNY 12370 "+
"\n"+
"TAX EXEMPT CN CNY 106US CNY 988XT"+
"\n"+
"TOTAL CNY 13464 "+
"\n"+
"22MAR14BJS KA X/HKG422.99CX CHI Q4.25 1605.68NUC2032.92END R"+
"\n"+
"OE6.081590 "+
"\n"+
"XT CNY 31XA CNY 43XY CNY 34YC CNY 880YR "+
"\n"+
"ENDOS 02 *T1"+
"\n"+
"*AUTO BAGGAGE INFORMATION AVAILABLE - SEE FSB "+
"\n"+
"RFSONLN/1E /EFEP_23/FCC=T/";
System.out.println(JSONObject.fromObject(parser.getTax(txt)).toString());
System.out.println(JSONArray.fromObject(parser.getQTax(txt)).toString());
System.out.println(JSONArray.fromObject(parser.getROE(txt)).toString());
}
}