《搜索引擎零距离》IRQL语言的解析

package com.rayeen.spider.vertical.util;

import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.CopyOnWriteArraySet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;

import com.rayeen.spider.vertical.ParalleIRVirtualMachine;
import com.rayeen.spider.vertical.algorithm.AddFunction;
import com.rayeen.spider.vertical.algorithm.ClearTagFunction;
import com.rayeen.spider.vertical.algorithm.DoubleParameterFunction;
import com.rayeen.spider.vertical.algorithm.EqNullOperator;
import com.rayeen.spider.vertical.algorithm.EqOperator;
import com.rayeen.spider.vertical.algorithm.EqStringOperator;
import com.rayeen.spider.vertical.algorithm.FullUrlFunction;
import com.rayeen.spider.vertical.algorithm.Function;
import com.rayeen.spider.vertical.algorithm.MaxlengthFunction;
import com.rayeen.spider.vertical.algorithm.Operator;
import com.rayeen.spider.vertical.algorithm.RecursiveFunction;
import com.rayeen.spider.vertical.algorithm.ReplaceFunction;
import com.rayeen.spider.vertical.algorithm.SprintfFunction;
import com.rayeen.spider.vertical.algorithm.UneqNullOperator;
import com.rayeen.spider.vertical.algorithm.UneqOperator;
import com.rayeen.spider.vertical.algorithm.UneqStringOperator;
import com.rayeen.spider.vertical.algorithm.UniParameterFunction;
import com.rayeen.spider.vertical.auxiliary.CrawlResultSetCollection;
import com.rayeen.spider.vertical.auxiliary.SemanticException;
import com.rayeen.spider.vertical.auxiliary.TableMerge;
import com.rayeen.spider.vertical.constant.ArgumentType;
import com.rayeen.spider.vertical.constant.ConfConstant;
import com.rayeen.spider.vertical.constant.ErrorType;
import com.rayeen.spider.vertical.constant.FunctionConstant;

public class ResutTree {

static final Logger LOG = Logger.getLogger(ResutTree.class);

static Map<String, Function> FunctionNameMap = new ConcurrentHashMap<String, Function>();

// 单参数的函数
static Set<String> uniParameterFunction = new HashSet<String>();

//
static Set<String> doubleParameterFunction = new HashSet<String>();

static {
uniParameterFunction.add(FunctionConstant.FULL_URL);
uniParameterFunction.add(FunctionConstant.CLEAR_TAG);

doubleParameterFunction.add(FunctionConstant.MAX_LENGTH);
doubleParameterFunction.add(FunctionConstant.ADD);

}

static Function getFunctionInstance(String func) {

Class cls = FunctionNameMap.get(func).getClass();
Function f = null;
try {
f = (Function) cls.newInstance();
} catch (InstantiationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IllegalAccessException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

return f;
}

// 考虑f(..) showF:toByte情况
static Pattern FUNC_PATTERN = Pattern
.compile("(\\w+)\\s*\\((.+?)\\)\\s+(\\w+(:\\w+)?)");

Map<String, Function> fieldFunctionMap = new ConcurrentHashMap<String, Function>();

Map<String, Function> showFieldFunctionMap = new ConcurrentHashMap<String, Function>();

static Map<String, Operator> operators = new LinkedHashMap<String, Operator>();
static {
operators.put("!=", new UneqOperator());
operators.put("=", new EqOperator());
operators.put("is", new EqNullOperator());
operators.put("not", new UneqNullOperator());

FunctionNameMap.put(FunctionConstant.FULL_URL, new FullUrlFunction());
FunctionNameMap.put(FunctionConstant.SPRINGTF, new SprintfFunction());
FunctionNameMap
.put(FunctionConstant.RECURSIVE, new RecursiveFunction());
FunctionNameMap.put(FunctionConstant.ADD, new AddFunction());
FunctionNameMap.put(FunctionConstant.REPLACE, new ReplaceFunction());
FunctionNameMap.put(FunctionConstant.CLEAR_TAG, new ClearTagFunction());
FunctionNameMap.put(FunctionConstant.MAX_LENGTH,
new MaxlengthFunction());

}

public ResutTree(URL rootUrl,
Map<String, Map<String, String>> hierarchyResult,
CrawlResultSetCollection crawlRSC) {

this.rootUrl = rootUrl;
this.hierarchyResult = hierarchyResult;
this.crawlRSC = crawlRSC;
/**
* curUniResultTableMap里的数据的key以"tableName->extractName"构成
*/
}

static void error(String str) throws SemanticException {
LOG.error(str);
throw new SemanticException(str);
}

static void warn(String str) {
LOG.error(str);
}

URL rootUrl;

Map<String, Map<String, String>> hierarchyResult = new HashMap<String, Map<String, String>>();

CrawlResultSetCollection crawlRSC;

Map<String, TableMerge> mergeMap = new ConcurrentHashMap();

class Pfk {
String p;// 表名

String f;// 内部字段

String k;// 表示字段

public Pfk(String p, String f, String k) {
super();
this.p = p;
this.f = f;
this.k = k;

if (StringUtils.isEmpty(k)) {
this.k = f;
}
}

public String toString() {
return p + ":" + f + ":" + k;
}
}

class Pkpk {

Operator operator;

ArgumentType argumentType;

List<String> argList;

/**
* 不同argmentType ,和operator会对argList做不同的处理
*
*/
public Pkpk(ArgumentType argmentType, Operator operator,
List<String> argList) {
this.argumentType = argmentType;
this.operator = operator;
this.argList = argList;
}

public String toString() {
return argumentType + ":" + operator + ":" + argList.toString();
}

public List<String> getArgList() {
return argList;
}

public void setArgList(List<String> argList) {
this.argList = argList;
}

public ArgumentType getArgmentType() {
return argumentType;
}

public void setArgmentType(ArgumentType argmentType) {
this.argumentType = argmentType;
}

public Operator getOperator() {
return operator;
}

public void setOperator(Operator operator) {
this.operator = operator;
}

}

private List<Map> filterFinalResult(List<Map<String, String>> mainRows,
List<Pfk> pfkList, ArrayList<Pfk> hierarchyPfkList,
List<Pkpk> pkpkList) throws SemanticException {

Map<String, String> fieldMap = new HashMap<String, String>();
for (Pfk tmp : pfkList) {
if (fieldMap.containsKey(tmp.k)) {
error("duplicate show key :" + tmp.k);
} else {
fieldMap.put(tmp.p + "." + tmp.f, tmp.k);
}
}

for (Pfk tmp : hierarchyPfkList) {
if (fieldMap.containsKey(tmp.k)) {
error("duplicate show key :" + tmp.k);
} else {
fieldMap.put(tmp.p + "." + tmp.f, tmp.k);
}
}

List<Map> resultList = new ArrayList<Map>();
for (Map<String, String> res : mainRows) {

boolean fit = true;
for (Pkpk p : pkpkList) {

if (p.getArgmentType() == ArgumentType.ONE) {
String p1 = p.getArgList().get(0);
String f1 = p.getArgList().get(1);
if (!p.getOperator().operator(res.get(p1 + "." + f1))
.equals(ConfConstant.TRUE)) {
fit = false;
break;
}
}

if (p.getArgmentType() == ArgumentType.TWO) {
String p1 = p.getArgList().get(0);
String f1 = p.getArgList().get(1);

String p2 = p.getArgList().get(2);
String f2 = p.getArgList().get(3);

if (!p.getOperator().operator(res.get(p1 + "." + f1),
res.get(p2 + "." + f2)).equals(ConfConstant.TRUE)) {
fit = false;
break;
}
}

}


if (fit) {

Map<String, String> tmpMap = new HashMap<String, String>();
for (String key : res.keySet()) {
if (fieldMap.containsKey(key) && !fieldMap.get(key).endsWith(":FUNCTION")) {

String value = res.get(key);

tmpMap.put(fieldMap.get(key), value);
}
}

// 添加由function产生的字段
for (String key : showFieldFunctionMap.keySet()) {
Function func = showFieldFunctionMap.get(key);
String fieldValue = "";

if (func instanceof FullUrlFunction) {
String field = ((FullUrlFunction) func).getField();
fieldValue = res.get(field);
// tmpMap.remove(ParseUtils.parseFieldShowValue(field));
fieldValue = func.operator(new Object[] { rootUrl,
fieldValue });

} else if (func instanceof SprintfFunction) {
SprintfFunction sf = (SprintfFunction) func;
List<String> fields = sf.getFields();
List<String> args = new ArrayList<String>();
args.add(sf.getFormat());
for (String f : fields) {
args.add(res.get(f));
// tmpMap.remove(ParseUtils.parseFieldShowValue(f));
}
fieldValue = sf.operator(args.toArray());

} else if (func instanceof ReplaceFunction) {
ReplaceFunction rpf = (ReplaceFunction) func;
String field = res.get(rpf.getField());
fieldValue = rpf.operator(new Object[] { field,
rpf.getPatternStr(), rpf.getReplaceStr() });

// tmpMap.remove(ParseUtils.parseFieldShowValue(addf.getField()));
} else if (func instanceof ClearTagFunction) {
ClearTagFunction ctf = (ClearTagFunction) func;
String value = res.get(ctf.getField());
fieldValue = ctf.operator(new Object[] { value });

// tmpMap.remove(ParseUtils.parseFieldShowValue(addf.getField()));
} else if (func instanceof DoubleParameterFunction) {
// 通用些的放在后面尝试匹配
// 单参数的,普通形式的函数,包括Add,Maxlength
String field = ((DoubleParameterFunction) func)
.getField();
String parameter = ((DoubleParameterFunction) func)
.getParameter();
fieldValue = res.get(field);
fieldValue = func.operator(new Object[] { fieldValue });

} else if (func instanceof RecursiveFunction) {

RecursiveFunction rf = (RecursiveFunction) func;
List<String> args = new ArrayList<String>();

String functions = rf.getFunctions();
args.add(functions);

String[] params = rf.getParams();

for (String f : params) {
// 如果是字段名,则计算字段的值
if (!f.startsWith("\"")) {
args.add(res.get(f));
} else { // 否则,直接添加这个函数
args.add(ParseUtils.parseStrContent(f));
}
// tmpMap.remove(ParseUtils.parseFieldShowValue(f));
}
fieldValue = rf.operator(args.toArray());

}

// fieldValue=showFieldFunctionMap.get(key).operator(new
// Object[]{value});
tmpMap.put(key, fieldValue);
}

resultList.add(tmpMap);
}

}
return resultList;

}

/**
* get main row and call "filterFinalResult"
*
* @param pageNameMap
* @param pfkList
* @param pkpkList
* @return
* @throws SemanticException
*/
@SuppressWarnings("unchecked")
private List<Map> getFinalResult(Map<String, String> pageNameMap,
List<Pfk> pfkList, ArrayList<Pfk> hierarchyPfkList,
List<Pkpk> pkpkList) throws SemanticException {

long threadId = Thread.currentThread().getId();

// 把表名排序
Collections.sort(pfkList, new Comparator() {

public int compare(Object o1, Object o2) {
Pfk p1 = (Pfk) o1;
Pfk p2 = (Pfk) o2;
return p1.p.compareToIgnoreCase(p2.p);
}

});
//

// 某个表在显示中涉及的式子
Map<String, Set<String>> pkfMap = new HashMap();
for (Pfk tmp : pfkList) {
if (!pkfMap.containsKey(tmp.p)) {
Set<String> set = new HashSet();
set.add(tmp.f);
pkfMap.put(tmp.p, set);
} else {
pkfMap.get(tmp.p).add(tmp.f);
}

}

Map<String, Set<String>> hierarchyPkfMap = new HashMap();
for (Pfk tmp : hierarchyPfkList) {
if (!hierarchyPkfMap.containsKey(tmp.p)) {
Set<String> set = new HashSet();
set.add(tmp.f);
hierarchyPkfMap.put(tmp.p, set);
} else {
hierarchyPkfMap.get(tmp.p).add(tmp.f);
}

}

// 这个Map记录了某个数据表在IRQL中的投影字段中涉及的字段名。
// 投影字段可能是空的。
Map<String, Set<String>> pkpkMap = new HashMap();

for (Pkpk tmp : pkpkList) {

if (tmp.getArgmentType() == ArgumentType.ONE) {
if (tmp.getArgList().size() >= 2) {
String page = tmp.getArgList().get(0);
String field = tmp.getArgList().get(1);

if (!pkpkMap.containsKey(page)) {
Set<String> set = new HashSet();
pkpkMap.put(page, set);
}
// 先添加set
// 再往set里加东西
// 前提是表名在显示列里出现过了
if (pkfMap.containsKey(page)) {
pkpkMap.get(page).add(field);
}
}

}

if (tmp.getArgmentType() == ArgumentType.TWO) {
if (tmp.getArgList().size() == 2) {
String page = tmp.getArgList().get(2);
String field = tmp.getArgList().get(3);

if (!pkpkMap.containsKey(page)) {
Set<String> set = new HashSet();
pkpkMap.put(page, set);
}

if (pkfMap.containsKey(page)) {
pkpkMap.get(page).add(field);
}
}
}

}

String curTableName = "";
String exTableName = "";
// 以select P1.bcname bcname,P2.scame scame,P3.songname
// songname,P3.downlink downlink"
// 为主线,从左到右做连接

List<Map<String, String>> mainRows = new ArrayList();

for (Pfk pfk : pfkList) {

// 一条临时结果
// 获取表名

if (!pageNameMap.containsKey(pfk.p)) {
ParalleIRVirtualMachine.error("invalid pagename:" + pfk.p,
ErrorType.SEMANTIC);
}
String tableName = pageNameMap.get(pfk.p);

if (mergeMap.containsKey(tableName)) {
continue;
}

if (null == tableName) {
ParalleIRVirtualMachine.error("invalid page alias" + pfk.p,
ErrorType.SEMANTIC);
}

curTableName = tableName;

List<Map<String, String>> rows = new ArrayList();

// 数据不在共享表中的话,到独立表中去找
// "下载页->down"这种表明肯定在shareTable中找不到
// 更好的写法应该是 if(tableName.contains("->"))
if (crawlRSC.getGlobalShareResultTableMap(tableName).size() == 0) {

// 页名->规则名
String[] prPair = tableName.split("->");
if (prPair.length == 2) {

String pageName = prPair[0];
String ruleName = prPair[1];
// 页间总表集合和页内独立表集合中都没有那个表
if (crawlRSC.getGlobalUniResultTableMap(pageName, ruleName)
.size() == 0) {
warn("Thread-" + threadId
+ ":invalid uniTable pagename:" + pageName
+ "->" + ruleName);
continue;
} else {

rows = crawlRSC.getGlobalUniResultTableMap(pageName,
ruleName);
}
} else {
warn("invalid pagename:" + tableName + " or match failed");
}
} else {
// 数据在共享表中
rows = crawlRSC.getGlobalShareResultTableMap(tableName);
}

if (rows.size() == 0) {
break;
}

// 开始一张新表的处理
if (StringUtils.isNotEmpty(exTableName)
&& !StringUtils.equalsIgnoreCase(curTableName, exTableName)) {

// 把mainRows和rows连接
List<Map<String, String>> tmpRows = new ArrayList();
for (Map<String, String> result : mainRows) {
// 新建一条记录,然后把左面表+右面的结果 放进临时表

for (Map<String, String> map : rows) {
Map<String, String> tpMap = new HashMap();
tpMap.putAll(result);

//判断一下pfk.k里是否有 :toByte,如果有的话需要去掉:toByte,才能取到值
String key=pfk.k;
if(pfk.k.endsWith(ConfConstant.TO_BYTE)){
int ix = pfk.k.lastIndexOf(ConfConstant.TO_BYTE);
key= key.substring(0, ix);
}

tpMap.put(pfk.p + "." + pfk.f, map.get(key));

// 投影字段可能是空的
if (pkpkMap.containsKey(pfk.p)) {
Set<String> ext = pkpkMap.get(pfk.p);
for (String f : ext) {
tpMap.put(pfk.p + "." + f, map.get(f));
}
}
// 临时表中添加一行
tmpRows.add(tpMap);
}

}

mainRows = tmpRows;// 笛卡尔积完成

} else {// 继续放同一张表的数据

if (StringUtils.isEmpty(exTableName)) {

for (Map<String, String> map : rows) {
Map<String, String> tpMap = new HashMap();

String fld=pfk.f;
if(fld.endsWith(ConfConstant.FUNCTOIN_POSTFIX)){
fld=fld.substring(0,fld.length()-ConfConstant.FUNCTOIN_POSTFIX.length() );
}

tpMap.put(pfk.p + "." + pfk.f, map.get(fld));

// 投影字段可能是空的
if (pkpkMap.containsKey(pfk.p)) {
Set<String> ext = pkpkMap.get(pfk.p);
for (String f : ext) {
tpMap.put(pfk.p + "." + f, map.get(f));
}
}

mainRows.add(tpMap);
}
} else {

for (int i = 0; i < mainRows.size(); i += rows.size()) {


//遇到有":FUNCTION"的情况下,需要处理一下
String fld=pfk.f;
if(fld.endsWith(ConfConstant.FUNCTOIN_POSTFIX)){
fld=fld.substring(0,fld.length()-ConfConstant.FUNCTOIN_POSTFIX.length() );
}


String pf = pfk.p + "." + pfk.f;
for (int j = 0; j < rows.size(); j++) {
mainRows.get(i + j).put(pf, rows.get(j).get(fld));

if (pkpkMap.containsKey(pfk.p)) {
Set<String> ext = pkpkMap.get(pfk.p);
for (String f : ext) {
mainRows.get(i + j).put(pfk.p + "." + f,
rows.get(j).get(f));
}
}
}
}

}

}

exTableName = tableName;

}

// 可以在所有表处理完之后,再处理融合的表
for (String targetKey : mergeMap.keySet()) {

String p = "";
for (String pkey : pageNameMap.keySet()) {
if (pageNameMap.get(pkey).equals(targetKey)) {
p = pkey;
break;
}
}
if (StringUtils.isEmpty(p)) {
ParalleIRVirtualMachine.error(
"invalid page alias:" + targetKey, ErrorType.SEMANTIC);
}

if (StringUtils.isEmpty(targetKey))
continue;

TableMerge merge = mergeMap.get(targetKey);
Set<String> srcTbls = merge.getMergedTable();

List<Map<String, String>> mainMergeRows = new ArrayList();

// 先添加一条空数据
// mainRow.add(new HashMap());

for (String tableName : srcTbls) {

List<Map<String, String>> rows = new ArrayList();

String[] prPair = tableName.split("->");

if (prPair.length == 2) { // 是独立表
String pageName = prPair[0].trim();
String ruleName = prPair[1].trim();

if (crawlRSC.getGlobalUniResultTableMap(pageName, ruleName)
.size() == 0) {
warn("Thread-" + threadId
+ ":invalid uniTable pagename:" + pageName
+ "->" + ruleName);
continue;
} else {
rows = crawlRSC.getGlobalUniResultTableMap(pageName,
ruleName);

if (mainRows.size() == 0) {

List<Map<String, String>> tmpRows = new ArrayList();

for (Map<String, String> row : rows) {
Map tmpMap = new HashMap();
for (String columName : row.keySet()) {
tmpMap.put(p + "." + columName, row
.get(columName));
}
tmpRows.add(tmpMap);
}
mainRows = tmpRows;
} else {

// 2x2=>4
List<Map<String, String>> tmpRows = new ArrayList();

for (Map<String, String> mainColumn : mainRows) {

for (Map<String, String> row : rows) {
Map<String, String> tmpMap = new HashMap();
for (String columName : row.keySet()) {
tmpMap.put(p + "." + columName, row
.get(columName));
}
tmpMap.putAll(mainColumn);
tmpRows.add(tmpMap);
}
}
mainRows = tmpRows;
}

// }
}
} else {// 是共享表

String pageName = tableName.trim();
if (crawlRSC.getGlobalShareResultTableMap(pageName).size() == 0) {
warn("Thread-" + threadId + ":invalid pagename:"
+ pageName);
continue;
} else {
rows = crawlRSC.getGlobalShareResultTableMap(pageName);

if (mainRows.size() == 0) {
mainRows = rows;
} else {
for (Map<String, String> mainColumn : mainRows) {
for (Map<String, String> column : rows) {
mainColumn.putAll(column);
}
}
}

}
}
}

}

// 直接从hierarchyPfkList引用数据
for (Pfk pfk : hierarchyPfkList) {

// 获取表名
String tableName = pageNameMap.get(pfk.p);

curTableName = tableName;

if (!hierarchyResult.containsKey(tableName)) {
String err = "invalid hierarchy tableName:" + tableName;
err += ",\n if u want to specify a hierarchy tableName, u needn't to write the matchName";
err += ",\n because only one matchName can be used as a hierarchyTable";
error(err);
continue;
}
Map<String, String> row = hierarchyResult.get(tableName);

// 即使是在继承表里,字段名也是用“.”连起来, 为了统一起见
// 因此,在函数型字段的处理中,需要注意这个问题
for (int i = 0; i < mainRows.size(); i++) {
String pf = pfk.p + "." + pfk.f;
mainRows.get(i).put(pf, row.get(pfk.f));
}

}

return filterFinalResult(mainRows, pfkList, hierarchyPfkList, pkpkList);

}

public List<Map> getMapResult(String irql) throws SemanticException {

int pm = irql.indexOf(";");
if (-1 == pm) {
String err = "invalid IRQL format:" + irql;
err += "\nhave u forget to put ';' after Page Define?";
ParalleIRVirtualMachine.error(err, ErrorType.SEMANTIC);
}
String pageStr = irql.substring(0, pm);

Map<String, String> pageNameMap = new HashMap<String, String>();

String[] fields = pageStr.split(",");
for (String field : fields) {
String[] kv = field.split(":");
if (kv.length == 2) {
pageNameMap.put(kv[1].trim(), kv[0].trim());
} else {
error("error pageMap description:" + field);
}
}

String prefix = "";
String postfix = "";
int w = irql.indexOf("where");
if (w != -1) {
prefix = irql.substring(pm + 1, w).trim();
postfix = irql.substring(w);
} else {
prefix = irql;
}

int s = prefix.indexOf("select");
if (s == -1) {
ParalleIRVirtualMachine.error("miss 'select'", ErrorType.GRAMMER);
}
s = s + "select".length();

String fieldStr = prefix.substring(s).trim();

Set<String> functionalFields = new HashSet();

Matcher m = FUNC_PATTERN.matcher(fieldStr);

boolean found = false;
int last = 0;
while (m.find(last)) {

found = true;

last = m.end();

String field = m.group(0);




String functionName = m.group(1);
Function fun = FunctionNameMap.get(functionName);

String showField = m.group(3);

if (null == fun) {
error("unexisting function anme :" + functionName);
}

// 一个参数的函数
if (uniParameterFunction.contains(functionName)) {
UniParameterFunction func = (UniParameterFunction) getFunctionInstance(functionName);

field = m.group(2);

// 避免忘记写 别名
if (field.contains(",") || field.contains(")")) {
error("invalid Uniunction format");
}
String tableFieldKey = field.replace("->", ".");

// 加上":FUNCTION"后缀
tableFieldKey=tableFieldKey+ConfConstant.FUNCTOIN_POSTFIX;

func.setField(tableFieldKey);




functionalFields.add(field);
showFieldFunctionMap.put(showField, func);
}

if (doubleParameterFunction.contains(functionName)) {
DoubleParameterFunction func = (DoubleParameterFunction) getFunctionInstance(functionName);

String[] args = m.group(2).split(",");

func.setParameter(ParseUtils.parseStrContent(args[1].trim()));
if (args.length != 2) {
error("invalid DoubleParameterFunction format");
}
String tableFieldKey = args[0].replace("->", ".");

// 加上":FUNCTION"后缀
tableFieldKey=tableFieldKey+ConfConstant.FUNCTOIN_POSTFIX;


func.setField(tableFieldKey);
functionalFields.add(args[0]);// 必须把这个字段加入“结果集中出现的字段”,不能漏掉
showFieldFunctionMap.put(showField, func);

}

// 3个参数的函数
if (functionName.equals(FunctionConstant.REPLACE)) {
ReplaceFunction rpf = new ReplaceFunction();
String[] args = m.group(2).split(",");
if (args.length != 3) {
error("invalid parameter for replace function");
}

String tableFieldKey = args[0].replace("->", ".");

// 加上":FUNCTION"后缀
tableFieldKey=tableFieldKey+ConfConstant.FUNCTOIN_POSTFIX;

rpf.setField(tableFieldKey);

String patternStr = args[1].replace("\\\"", "");
String replaceStr = args[2].replace("\\\"", "");
rpf.setPatternStr(patternStr);
rpf.setReplaceStr(replaceStr);

functionalFields.add(tableFieldKey);

showFieldFunctionMap.put(showField, rpf);
}

// N个参数的函数
if (functionName.equals(FunctionConstant.SPRINGTF)) {
SprintfFunction spf = new SprintfFunction();
String[] args = m.group(2).split(",");

spf.setFormat(ParseUtils.parseStrContent(args[0].trim()));
ArrayList<String> spFields = new ArrayList();
for (int i = 1; i < args.length; i++) {
// 这个字段名需要把共享表,独立表,继承表三种形式统一起来
String tableFieldKey = args[i].replace("->", ".");


// 加上":FUNCTION"后缀
tableFieldKey=tableFieldKey+ConfConstant.FUNCTOIN_POSTFIX;

spFields.add(tableFieldKey);
functionalFields.add(args[i]);
}
spf.setFields(spFields.toArray(new String[0]));
showFieldFunctionMap.put(showField, spf);
}

// 逆波兰式 递归函数
if (functionName.equals(FunctionConstant.RECURSIVE)) {
RecursiveFunction rf = new RecursiveFunction();
String[] args = m.group(2).split(",");

rf.setFunctions(ParseUtils.parseStrContent(args[0].trim()));

List<String> params = new ArrayList();
for (int i = 1; i < args.length; i++) {
// 这个字段名需要把共享表,独立表,继承表三种形式统一起来

if (!args[i].trim().startsWith("\"")) {
String tableFieldKey = args[i].replace("->", ".");

// 加上":FUNCTION"后缀
tableFieldKey=tableFieldKey+ConfConstant.FUNCTOIN_POSTFIX;

functionalFields.add(tableFieldKey);
params.add(args[i]);
} else {
params.add(args[i]);
}
}
rf.setParams(params.toArray(new String[0]));
showFieldFunctionMap.put(showField, rf);
}

}

if (found) {
fieldStr = m.replaceAll("");
}

// 把fieldStr里的函数型字段找出来,换成空
ArrayList<Pfk> pfk = new ArrayList<Pfk>();

ArrayList<Pfk> hierarchyPfk = new ArrayList<Pfk>();

fields = fieldStr.split(",");

Set<String> metNames = new HashSet<String>();

// 函数型字段和普通字段分开处理。。。函数型字段没有别名
//给函数型字段的k值加上 “:FUNCTION”后缀,就不会把单独字段的值显示值冲掉了
//比如 cleartag(P.f) f, P.f f2,这时候f和f2这两个字段就能分别取道正确的值了
//f值必须是 “:FUNCTION”结尾,因为filterFinalResult函数中的if (fieldMap.containsKey(key) && !fieldMap.get(key).endsWith(":FUNCTION")) {
//这段里会需要判断是否“:FUNCTION”结尾
//而k值必须“:FUNCTION”结尾,否则会冲掉P.f的showField
//fieldMap={"P.f:FUNCTION"=>"f1","P.f"=>"f2"}
//只要"P.f"=>"f2这条不要被冲掉就可以了,而两个function相互冲掉是没关系的
for (String field : functionalFields) {
field = field.trim();

// 链接语义的数据集
if (field.indexOf("->") > 0) {
String[] pf = field.trim().split("->");
if (pf.length == 2) {
String f = ParseUtils.getRealFieldName(pf[1]);
//hierarchyPfk.add(new Pfk(pf[0].trim(), f, f));
hierarchyPfk.add(new Pfk(pf[0].trim(), f+ConfConstant.FUNCTOIN_POSTFIX, f));
} else {
error("error prefix pf:" + field);
}
} else {

String[] pf = field.trim().split("\\.");

if (pf.length == 2) {
String f = ParseUtils.getRealFieldName(pf[1]);
//pfk.add(new Pfk(pf[0].trim(), f, f));
pfk.add(new Pfk(pf[0].trim(), f+ConfConstant.FUNCTOIN_POSTFIX, f));
} else {
error("error prefix pf:" + field);
}
}
}

for (String field : fields) {

if (StringUtils.isBlank(field))
continue;

field = field.trim();

String[] kv = field.trim().split("\\s+");

if (kv.length == 2) {
// 显示字段重复性校验
if (!metNames.contains(kv[1].trim())) {
metNames.add(kv[1].trim());
} else {
error("duplicate show field:" + kv[1]);
}

// 链接语义的数据集 select P.songName songName,P->downlink downlink;
if (kv[0].indexOf("->") > 0) {
String[] pf = kv[0].split("->");
if (pf.length == 2) {

hierarchyPfk.add(new Pfk(pf[0].trim(), pf[1].trim(),
kv[1].trim()));
} else {
error("error prefix pf:" + kv[0]);
}
} else {

String[] pf = kv[0].trim().split("\\.");
if (pf.length == 2) {

pfk.add(new Pfk(pf[0].trim(), pf[1].trim(), kv[1]
.trim()));
} else {
error("error prefix pf:" + kv[0]);
}
}

} else if (kv.length == 1) { // select P.songName,P->downlink;
// 链接语义的数据集
if (kv[0].indexOf("->") > 0) {
String[] pf = kv[0].trim().split("->");

if (pf.length == 2) {
String f = ParseUtils.getRealFieldName(pf[1]);
// 显示字段重复性校验
if (!metNames.contains(f)) {
metNames.add(f);
} else {
error("duplicate show field:" + f);
}

metNames.add(pf[1]);
hierarchyPfk.add(new Pfk(pf[0].trim(), f, f));

} else {
error("error prefix pf:" + kv[0]);
}

} else {

String[] pf = kv[0].trim().split("\\.");

if (pf.length == 2) {

String f = ParseUtils.getRealFieldName(pf[1]);

if (!metNames.contains(f)) {
metNames.add(f);
} else {
error("duplicate show field:" + f);
}
pfk.add(new Pfk(pf[0].trim(), f, pf[1].trim()));
} else {
error("error prefix pf:" + kv[0]);
}
}
} else {
StringBuffer fieldsError = new StringBuffer();
for (String f : kv) {
fieldsError.append(f);
}
error("error fields description:" + fieldsError);
}
}// end foreach fields

w = postfix.indexOf("where");
if (w != -1) {
fieldStr = postfix.substring(w + "where".length()).trim();
} else {
fieldStr = "";
}

ArrayList<Pkpk> pkpk = new ArrayList();

fields = new String[0];

// 投影条件
if (StringUtils.isNotEmpty(fieldStr)) {
fields = fieldStr.split("and");
}

for (String field : fields) {

field = field.trim();
String optag = "=";
// 算符
Operator operator = null;

// 分析表达式中的算符
for (String op : operators.keySet()) {
if (field.contains(op)) {
optag = op;
operator = operators.get(op);
break;
}
}

// P1.F1=P2.F2, P1.F1=NULL
String[] kv = field.split(optag);
if (kv.length == 2) {// 当前只支持以上两种语法,因此这条应该肯定成立

String tableKey = kv[0].trim().replace("->", ".");

String[] pf = tableKey.split("\\.");

String p1 = null, f1 = null, p2 = null, f2 = null;
if (pf.length == 2) {
p1 = pf[0].trim();
f1 = pf[1].trim();
} else {
error("error post pf");
}

List args = new CopyOnWriteArrayList();

String targetTableKey = kv[1].trim();
// 非空的过滤条件,单参数
if (targetTableKey.equals(ConfConstant.NULL)) {

args.addAll(Arrays.asList(new String[] { p1, f1 }));
Pkpk cmpNullOP = new Pkpk(ArgumentType.ONE, operator, args);
pkpk.add(cmpNullOP);
} else if (targetTableKey.startsWith("\"")
&& targetTableKey.endsWith("\"")) {
String str = targetTableKey.substring(1, targetTableKey
.length() - 1);

// 不等于或者等于字符串
if (operator instanceof UneqOperator) {
operator = new UneqStringOperator(str);
} else {
operator = new EqStringOperator(str);
}
args.addAll(Arrays.asList(new String[] { p1, f1, str }));
Pkpk cmpStrOP = new Pkpk(ArgumentType.ONE, operator, args);
pkpk.add(cmpStrOP);

} else {

pf = kv[1].trim().split("\\.");
if (pf.length == 2) {
p2 = pf[0].trim();
f2 = pf[1].trim();
args.addAll(Arrays
.asList(new String[] { p1, f1, p2, f2 }));
} else {
error("error post pf");
}
Pkpk cmpOP = new Pkpk(ArgumentType.TWO, operator, args);
pkpk.add(cmpOP);
}

} else {
error("error fields description");
}
}

return getFinalResult(pageNameMap, pfk, hierarchyPfk, pkpk);

}

public static void main(String[] argv) {

// 构造源数据
Map<String, List<Map<String, String>>> pageMap = new HashMap();

// P1的数据
List<Map<String, String>> page = new ArrayList();
HashMap map = new HashMap();
map.put("bcid", "v_bcid");
map.put("bcname", "v_bcname");
page.add(map);

map = new HashMap();
map.put("bcid", "v_bcid1");
map.put("bcname", "v_bcname1");
page.add(map);

pageMap.put("pagename1", page);

// P2的数据
page = new ArrayList();
map = new HashMap();
map.put("bcid", "v_bcid");
map.put("scid", "v_sci");
map.put("scname", "v_scname");
page.add(map);

map = new HashMap();
map.put("bcid", "v_bcid1");
map.put("scid", "v_sci1");
map.put("scname", "v_scname1");

page.add(map);

map = new HashMap();
map.put("bcid", "v_bcid1");
map.put("scid", "v_sci2");
map.put("scname", "v_scname2");
page.add(map);
pageMap.put("pagename2", page);

// P3的数据
page = new ArrayList();
map = new HashMap();
map.put("scid", "v_sci");
map.put("scname", "v_scname");
map.put("songname", "v_songname1");
map.put("downlink", "v_downlink1");
page.add(map);

map = new HashMap();
map.put("scid", "v_sci");
map.put("scname", "v_scname");
map.put("songname", "v_songname2");
map.put("downlink", "v_downlink2");
page.add(map);

map = new HashMap();
map.put("scid", "v_sci1");
map.put("scname", "v_scname");
map.put("songname", "v_songname3");
map.put("downlink", "v_downlink3");
page.add(map);

map = new HashMap();
map.put("scid", "v_sci1");
map.put("scname", "v_scname");
map.put("songname", "v_songname4");
map.put("downlink", "v_downlink4");
page.add(map);

pageMap.put("pagename3", page);

Map<String, Map<String, String>> hierarchy = new HashMap();

// Map<String, List<Map<String, String>>> curUniResultTableMap =new
// ConcurrentHashMap();
// ResutTree resutTree = new ResutTree(pageMap,
// hierarchy,curUniResultTableMap);
// resutTree.setPageMap(pageMap);
//
// String str = "pagename1:P1,pagename2:P2,pagename3:P3;select P1.bcname
// bcname,P2.scname sncame,P3.songname songname,P3.downlink downlink"
// + " downlink where P1.bcid=P2.bcid and P2.scid=P3.scid; dao->insert
// ";
//
// int semi = str.lastIndexOf(";");
//
// String dao = str.substring(semi + 1);
// String irql = str.substring(0, semi);
//
// ArrayList<Map> am = resutTree.getMapResult(irql);
}

public void setMergeMap(Map<String, TableMerge> mergeMap) {
this.mergeMap = mergeMap;
}

}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值