最近做了一个项目,需要做到这个事情,将一些网站的链接转换成其对应wap站点的链接,比如将凡客诚品的商品链接,转换成对应的wap站点的地址,这样就可以直接在手持设备上直接访问。
这里代码备份一下。
<?xml version="1.0" encoding="GBK"?>
<rule>
<!-- 本文内容改成allnot,则强制所有的都不转换 -->
<!-- 简单快速匹配配置,最低优先级 -->
<!-- 适用于URL是静态格式,主参数所在的位置是固定的,可以迅速获取到的情况 -->
<!--
name : 配置的名称,用于标示当前的规则适配的目标名称。
${1} :代表参数占位符,可以是输入URL中的,也可以是在目标URL中的,目前还仅支持一个参数。
pattern :用于匹配的格式,${1}标注了参数所在的位置,用于简单URL的快速定位。其中‘首页链接’类的配置是相对固定的,'商品链接'不可以有正则。
target :生成目标的格式,可以自定义。
注意:多个simple的配置顺序代表了匹配优先级,希望优先被匹配的放到前面,一般商品链接的优先级 > 首页的优先级
-->
<!-- 淘宝网 -->
<simple name="淘宝集市">
<pattern><![CDATA[http://www.taobao.com(/*)]]></pattern>
<target><![CDATA[http://m.taobao.com]]></target>
</simple>
<simple name="淘宝商城">
<pattern><![CDATA[http://www.tmall.com(/*)]]></pattern>
<target><![CDATA[http://m.taobao.com]]></target>
</simple>
<!-- 京东 -->
<simple name="京东商品">
<pattern><![CDATA[http://www.360buy.com/product/${1}.html]]></pattern>
<target><![CDATA[http://m.360buy.com/product/${1}.html]]></target>
</simple>
<simple name="京东首页">
<pattern><![CDATA[http://www.360buy.com(/*)]]></pattern>
<target><![CDATA[http://m.360buy.com]]></target>
</simple>
<!-- 新蛋 -->
<simple name="新蛋商品">
<pattern><![CDATA[http://www.newegg.com.cn/Product/${1}.htm]]></pattern>
<target><![CDATA[http://m.newegg.com.cn/Product/${1}.htm]]></target>
</simple>
<simple name="新蛋首页">
<pattern><![CDATA[http://www.newegg.com.cn(/*)]]></pattern>
<target><![CDATA[http://m.newegg.com.cn]]></target>
</simple>
<!-- 当当 -->
<simple name="当当商品">
<pattern><![CDATA[http://product.dangdang.com/product.aspx?product_id=${1}]]></pattern>
<target><![CDATA[http://m.dangdang.com/product.php?pid=${1}]]></target>
</simple>
<simple name="当当商品">
<pattern><![CDATA[http://product.dangdang.com/Product.aspx?product_id=${1}]]></pattern>
<target><![CDATA[http://m.dangdang.com/product.php?pid=${1}]]></target>
</simple>
<simple name="当当首页">
<pattern><![CDATA[http://www.dangdang.com(/*)]]></pattern>
<target><![CDATA[http://m.dangdang.com]]></target>
</simple>
<!-- 凡客 -->
<simple name="凡客商品">
<pattern><![CDATA[http://item.vancl.com/${1}.html]]></pattern>
<target><![CDATA[http://m.vancl.com/style/StyleHome/0/${1}/0/.mvc]]></target>
</simple>
<simple name="凡客首页">
<pattern><![CDATA[http://www.vancl.com(/*)]]></pattern>
<target><![CDATA[http://m.vancl.com]]></target>
</simple>
<!-- 麦包包,有部分商品不一致的情况,pc商品在wap上找不到 -->
<simple name="麦包包商品">
<pattern><![CDATA[http://item.mbaobao.com/pshow-${1}.html]]></pattern>
<target><![CDATA[http://m.mbaobao.com/index.php/goods/detail/itemid/${1}]]></target>
</simple>
<simple name="麦包包首页">
<pattern><![CDATA[http://www.mbaobao.com(/*)]]></pattern>
<target><![CDATA[http://m.mbaobao.com/index.php/]]></target>
</simple>
<!-- 一号店 -->
<simple name="一号店商品">
<pattern><![CDATA[http://www.yihaodian.com/product/detail.do?productID=${1}&merchantID=]]></pattern>
<target><![CDATA[http://m.yihaodian.com/product/${1}_1]]></target>
</simple>
<simple name="一号店商品">
<pattern><![CDATA[http://www.yihaodian.com/product/detail.do?productID=${1}]]></pattern>
<target><![CDATA[http://m.yihaodian.com/product/${1}_1]]></target>
</simple>
<simple name="一号店商品">
<pattern><![CDATA[http://www.yihaodian.com/product/${1}]]></pattern>
<target><![CDATA[http://m.yihaodian.com/product/${1}]]></target>
</simple>
<simple name="一号店首页">
<pattern><![CDATA[http://www.yihaodian.com(/*)]]></pattern>
<target><![CDATA[http://m.yihaodian.com/index]]></target>
</simple>
<!-- 淘鞋网 -->
<simple name="淘鞋网首页">
<pattern><![CDATA[http://www.taoxie.com/(/*)]]></pattern>
<target><![CDATA[http://m.taoxie.cn/index.aspx]]></target>
</simple>
<!-- 亚马逊 -->
<simple name="亚马逊首页">
<pattern><![CDATA[http://www.amazon.cn(/*)]]></pattern>
<target><![CDATA[http://m.amazon.cn]]></target>
</simple>
<!-- 唯品会 -->
<simple name="唯品会首页">
<pattern><![CDATA[http://www.vipshop.com(/*)]]></pattern>
<target><![CDATA[http://m.vipshop.com/index.php]]></target>
</simple>
<!-- 特殊转换配置,优先级高于simple -->
<!-- 适用于URL是动态格式,主参数的位置是动态的情况 -->
<!--
name :配置的名称。
pattern : 配置匹配的正则表达式
before :配置参数前面的字符串,支持多个,用||隔开,带有优先级,越往前的越优先匹配
after :配置参数后面的字符串,如果带有NULL,则表示该参数可能是整个URL的临界位置
isAfter :是否从整个URL的后面开始匹配,即优先使用after进行截取。
URL是后面字符串固定,前面不固定的:true;
URL是前面字符串固定,后面不固定的:false
target :生成目标URL的模板,可以自定义。
注意:多个special的配置顺序代表了匹配优先级,本文中配置越靠前的越优先匹配
-->
<!-- 淘宝 -->
<special name="淘宝商品" isAfter="false">
<pattern><![CDATA[http://item.taobao.com/item.htm\?id=(.*)]]></pattern>
<target><![CDATA[http://a.m.taobao.com/i${1}.htm]]></target>
<before><![CDATA[id=]]></before>
<after><![CDATA[/||&||NULL]]></after>
</special>
<!-- 亚马逊 -->
<special name="亚马逊商品" isAfter="false">
<pattern><![CDATA[http://www.amazon.cn(.*)/(dp|gp/product)/(.*)]]></pattern>
<target><![CDATA[http://www.amazon.cn/gp/aw/d/${1}]]></target>
<before><![CDATA[/gp/product/||/dp/]]></before>
<after><![CDATA[/||NULL]]></after>
</special>
<special name="亚马逊商品" isAfter="false">
<pattern><![CDATA[http://www.amazon.cn/mn/detailApp/?(.*)asin=(.*)]]></pattern>
<target><![CDATA[http://www.amazon.cn/gp/aw/d/${1}]]></target>
<before><![CDATA[asin=]]></before>
<after><![CDATA[/||NULL]]></after>
</special>
<!-- 淘鞋网 -->
<special name="淘鞋网商品" isAfter="false">
<pattern><![CDATA[http://www.taoxie.com/commodity-(.+)(-{0,1})(.*).htm]]></pattern>
<target><![CDATA[http://m.taoxie.cn/detail.aspx?id=${1}]]></target>
<before><![CDATA[commodity-]]></before>
<after><![CDATA[-||.htm]]></after>
</special>
<!-- 京东网 -->
<special name="京东子商城" isAfter="false">
<pattern><![CDATA[http://([^www]*).360buy.com/(.*).html]]></pattern>
<target><![CDATA[http://m.360buy.com/product/${1}.html]]></target>
<before><![CDATA[.360buy.com/]]></before>
<after><![CDATA[.html||NULL]]></after>
</special>
<!-- 唯品会 -->
<special name="唯品会商品" isAfter="false">
<pattern><![CDATA[http://www.vipshop.com/detail-(.*).html(.*)]]></pattern>
<target><![CDATA[http://m.vipshop.com/merchandise.php?a=detail&id=${1}]]></target>
<before><![CDATA[detail-]]></before>
<after><![CDATA[-||NULL]]></after>
</special>
</rule>
package com.taobao.wireless.decider.s.manager.impl;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import com.alibaba.citrus.service.resource.Resource;
import com.alibaba.citrus.service.resource.ResourceLoadingService;
import com.taobao.common.tools.MiniCms;
import com.taobao.minicms.serve.IMinicmsCallBack;
import com.taobao.wireless.decider.s.domain.ConvertResult;
import com.taobao.wireless.decider.s.manager.UrlConvertManager;
/**
* URL转换器
*
* @author yanyuan.qzs@taobao.com
*/
public class DefaultUrlConvertManager implements UrlConvertManager, IMinicmsCallBack {
// ~~~ Logger
private static final Logger logger = LoggerFactory.getLogger(DefaultUrlConvertManager.class);
/**
* 域名映射配置文件路径
*/
private String configFiltPath;
/**
* 资源加载服务
*/
@Autowired
private ResourceLoadingService resourceLoadingService;
/**
* 配置文件解析对象列表
*/
private List<UrlConfig> configUrlList;
// ~~~ Static Fields
private static final String PLACE_HOLD_REG = "\\$\\{\\d\\}";
private static final String PLACE_HOLD = "${1}";
private static final String TARGET_URL_DEFAULT = "http://m.etao.com";
private static final String CMS_CONFIG_NAME = "decider_url_convert";
private static final String CMS_CONFIG_ALL_NOT_CONVERT = "allnot";
/**
* 执行初始化
*/
public void init() {
// 先从minicms中获取
String minicmsConfig = MiniCms.get(CMS_CONFIG_NAME);
// 配置为空,不使用推送内容
boolean result = update(minicmsConfig);
// minicms推送配置初始化失败,从本地的默认配置获取
if (!result) {
Resource resource = resourceLoadingService.getResource(configFiltPath);
try {
initByInputStream(resource.getInputStream());
} catch (Exception e) {
logger.error("解析配置文件发生异常!", e);
} finally {
resource = null;
}
}
}
/**
* 更新配置
* @param config 新的配置内容
* @return true:成功; false:失败
*/
public boolean update(String config) {
// 配置了强制所有的都不转换
if (StringUtils.isBlank(config)) {
logger.error("无法使用空的配置内容更新url转换配置。获取到 config = " + config);
return false;
}
// 配置了allnot则清空所有的配置规则
if (StringUtils.isNotBlank(config) &&
StringUtils.equalsIgnoreCase(config, CMS_CONFIG_ALL_NOT_CONVERT)) {
configUrlList = null;
return true;
}
InputStream inputStream = new ByteArrayInputStream(config.getBytes());
try {
initByInputStream(inputStream);
return true;
} catch (Exception e) {
logger.error("更新解析配置文件发生异常!", e);
return false;
} finally {
inputStream = null;
}
}
/**
* 流格式初始化
* @param inputStream
*/
private void initByInputStream(InputStream inputStream) {
SAXReader saxReader = null;
Document document = null;
try {
saxReader = new SAXReader();
document = saxReader.read(inputStream);
init(document);
} catch (Exception e) {
logger.error("解析配置文件发生异常!", e);
} finally {
saxReader = null;
document = null;
}
}
/**
* 执行解析过程初始化配置
* @param document
*/
@SuppressWarnings("unchecked")
private void init(Document document) {
configUrlList = new ArrayList<UrlConfig>();
if (null != document) {
// special node configuration
List<Element> snodes = document.selectNodes("rule/special");
if (null != snodes) {
for (Element n : snodes) {
String isAfter = n.attribute("isAfter").getText();
String name = n.attribute("name").getText();
String configUrl = n.selectSingleNode("pattern").getText();
String template = n.selectSingleNode("target").getText();
String before = n.selectSingleNode("before").getText();
String after = n.selectSingleNode("after").getText();
if (StringUtils.isNotBlank(configUrl)
&& StringUtils.isNotBlank(template)) {
UrlConfig uc = new UrlConfig(name, configUrl, configUrl,
true, getSplitSortString(before),
getSplitSortString(after),
Boolean.valueOf(isAfter), template);
configUrlList.add(uc);
}
}
}
// common node configuration
List<Element> nodes = document.selectNodes("rule/simple");
if (null != nodes) {
for (Element n : nodes) {
String name = n.attribute("name").getText();
String configUrl = n.selectSingleNode("pattern").getText();
String template = n.selectSingleNode("target").getText();
if (StringUtils.isNotBlank(configUrl)
&& StringUtils.isNotBlank(template)) {
UrlConfig uc = new UrlConfig(name, configUrl, template, false);
String match = getFormatPatternStr(configUrl,
PLACE_HOLD_REG);
uc.setMatch(match);
configUrlList.add(uc);
}
}
}
}
}
private static String[] getSplitSortString(String str) {
String[] strarray = StringUtils.split(str, "||");
return strarray;
}
/* (non-Javadoc)
* @see com.taobao.minicms.serve.IMinicmsCallBack#callback(java.lang.String)
*/
@Override
public void callback(String fileName) {
if(StringUtils.equals(CMS_CONFIG_NAME, fileName)) {
if (logger.isWarnEnabled()) {
logger.warn("recieve url convert config :" + fileName);
}
String minicmsConfig = MiniCms.get(CMS_CONFIG_NAME);
boolean result = update(minicmsConfig);
if (logger.isWarnEnabled()) {
logger.warn("update cms config url convert isSuccess: " + result);
}
}
}
/* (non-Javadoc)
* @see com.taobao.core.s.manager.UrlConvertManager#convert(java.lang.String)
*/
public ConvertResult convert(String url) {
if(StringUtils.isBlank(url)) {
return new ConvertResult(TARGET_URL_DEFAULT,null);
}
if (null == configUrlList && 0 == configUrlList.size()) {
return new ConvertResult(url,null);
}
String result = null;
// 根据预先配置的规则,进行转换
for (UrlConfig uc : configUrlList) {
if(uc.isEspecial()) {
result = convertSpecial(url ,uc);
} else {
result = convertCommon(url ,uc);
}
if(StringUtils.isNotBlank(result)) {
return new ConvertResult(result, uc.getName());
}
}
return new ConvertResult(result, null);
}
/**
* 转换特殊的URL
* @param url 待转URL
* @param uc 配置项
* @return
*/
private static String convertSpecial(String url, UrlConfig uc) {
if(url.matches(uc.getMatch())) {
String[] t = uc.isAfter() ? uc.getAfter() : uc.getBefore();
String[] t2 = uc.isAfter() ? uc.getBefore() : uc.getAfter();
if(null == t) {
return null;
}
String value = "";
for (String tt : t) {
if(url.contains(tt)) {
String cStr = uc.isAfter() ? StringUtils.substringBeforeLast(url, tt)
: StringUtils.substringAfter(url, tt);
for (String tt2 : t2) {
if (cStr.contains(tt2)) {
String tar = uc.isAfter() ? StringUtils.substringAfterLast(cStr, tt2) :
StringUtils.substringBefore(cStr, tt2);
if(StringUtils.isNotBlank(tar)) {
value = tar;
break;
}
} else if (!cStr.contains(tt2) && StringUtils.equalsIgnoreCase(tt2, "NULL")) {
value = cStr;
break;
}
}
}
}
return copyParam(uc.getTemplate(), value);
}
return null;
}
/**
* 转换普通的URL
* @param uc
* @return
*/
private static String convertCommon(String url, UrlConfig uc) {
if(url.matches(uc.getMatch())) {
// 截取起始位置
int begin = StringUtils.indexOf(uc.getConfigUrl(), PLACE_HOLD);
// 截取结束位置
int after = 0;
if(StringUtils.endsWith(uc.getConfigUrl(), PLACE_HOLD)) {
after = url.length();
} else {
String afterStr = StringUtils.substring(uc.getConfigUrl(), StringUtils.indexOf(uc.getConfigUrl(), PLACE_HOLD)+ PLACE_HOLD.length());
after = StringUtils.indexOf(url, afterStr);
}
// 截取值
String value = StringUtils.substring(url, begin, after);
// 替换得到URL
return copyParam(uc.getTemplate(), value);
}
return null;
}
private static String copyParam(String template, String value) {
if(template.contains(PLACE_HOLD)) {
if(StringUtils.isBlank(value)) {
return null;
}
return StringUtils.replace(template, PLACE_HOLD, value);
} else {
return template;
}
}
/**
* 将配置串转换成正则表达式
* @param src 配置串
* @param replace 配置占位符
* @return
*/
private static String getFormatPatternStr(String src, String replace) {
src = StringUtils.replace(src, "?", "\\?");
src = src.replaceAll(replace, "(.+)");
src = src + "(.*)";
return src;
}
public String getConfigFiltPath() {
return configFiltPath;
}
public void setConfigFiltPath(String configFiltPath) {
this.configFiltPath = configFiltPath;
}
public static void main(String[] args) {
// String input = "http://www.amazon.cn/gp/product/B0036CU8OC";
// String input = "http://www.amazon.cn/mn/detailApp?uid=477-5569909-2850306&ref=RK&asin=B0040EIHJI";
String input = "http://item.taobao.com/item.htm?id=7719538630&s=2";
// String input = "http://www.taoxie.com/commodity-19662-1.htm";
UrlConfig uc = new UrlConfig();
uc.setAfter(false);
uc.setEspecial(true);
uc.setConfigUrl("");
uc.setMatch("http://item.taobao.com/item.htm\\?id=(.*)");
uc.setTemplate("http://a.m.taobao.com/i${1}.htm");
uc.setBefore(getSplitSortString("id="));
uc.setAfter(getSplitSortString("/||&||NULL"));
String result = DefaultUrlConvertManager.convertSpecial(input, uc);
System.out.println(result);
//
String input2 = "http://item.taobao.com/item.htm?id=5272733064&ad_id=&am_id=&cm_id=&pm_id=";
UrlConfig uc2 = new UrlConfig();
String pattern2 = "http://item.taobao.com/item.htm?id=${1}&";
uc2.setConfigUrl(pattern2);
uc2.setMatch(DefaultUrlConvertManager.getFormatPatternStr(pattern2, PLACE_HOLD_REG));
uc2.setTemplate("http://a.m.taobao.com/i${1}.htm");
String result2 = DefaultUrlConvertManager.convertCommon(input2, uc2);
System.out.println(result2);
}
}
/**
* 配置对象
*
* @author yanyuan.qzs
*/
class UrlConfig {
/**
* 名称
*/
private String name;
/**
* 匹配格式
*/
private String match;
/**
* 配置源URL格式
*/
private String configUrl;
/**
* 是否差异化转换
*/
private boolean isEspecial;
/**
* 目标参数前面的str[]
*/
private String[] before;
/**
* 目标参数后面的str[]
*/
private String[] after;
/**
* 使用after做基准
*/
private boolean isAfter;
/**
* 目标URL生成模板
*/
private String template;
public UrlConfig() {
super();
}
public UrlConfig(String name,String match, String configUrl, boolean isEspecial,
String[] before, String[] after, boolean isAfter, String template) {
super();
this.name = name;
this.match = match;
this.configUrl = configUrl;
this.isEspecial = isEspecial;
this.before = before;
this.after = after;
this.isAfter = isAfter;
this.template = template;
}
public UrlConfig(String name, String configUrl, String template, boolean isEspecial) {
super();
this.name = name;
this.configUrl = configUrl;
this.template = template;
this.isEspecial = isEspecial;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getMatch() {
return match;
}
public void setMatch(String match) {
this.match = match;
}
public String getConfigUrl() {
return configUrl;
}
public void setConfigUrl(String configUrl) {
this.configUrl = configUrl;
}
public String getTemplate() {
return template;
}
public void setTemplate(String template) {
this.template = template;
}
public boolean isEspecial() {
return isEspecial;
}
public void setEspecial(boolean isEspecial) {
this.isEspecial = isEspecial;
}
public String[] getBefore() {
return before;
}
public void setBefore(String[] before) {
this.before = before;
}
public String[] getAfter() {
return after;
}
public void setAfter(String[] after) {
this.after = after;
}
public boolean isAfter() {
return isAfter;
}
public void setAfter(boolean isAfter) {
this.isAfter = isAfter;
}
}
检测样本:
http://www.tmall.com/?sid=5b380c886d65dc29d0338a5a201e398d,
http://www.taobao.com,
http://item.taobao.com/item.htm?id=7719538630,
http://item.taobao.com/item.htm?id=5272733064&ad_id=&am_id=&cm_id=&pm_id=,
http://www.360buy.com/product/437741.html,
http://www.360buy.com/product/1000513506.html,
http://www.vipshop.com/sh.html,
http://www.vipshop.com/detail-791891-0-3.html,
http://www.newegg.com.cn/Product/A28-800-4ER.htm,
http://www.newegg.com.cn/Product/12-c03-081.htm,
http://product.dangdang.com/product.aspx?product_id=22476442#ref=www-0-H,
http://product.dangdang.com/Product.aspx?product_id=60121683,
http://item.vancl.com/0029646.html?ref=sr_1_10_14b63acbf943465689a5b8a7b8ecac89,
http://item.vancl.com/0041439.html?ref=shirts_7_0041439,
http://item.mbaobao.com/pshow-1101017703.html?l=1f41,
http://item.mbaobao.com/pshow-1205000303.html?s=gl_4f_6,
http://www.yihaodian.com/product/detail.do?productID=1108577&merchantID=3,
http://www.yihaodian.com/product/1178936_3,
http://www.taoxie.com/commodity-18011.htm,
http://www.taoxie.com/commodity-16161-1.htm,
http://www.amazon.cn/%E7%91%9E%E5%A3%AB-%E6%AD%A3%E5%93%81-Longines-%E6%B5%AA%E7%90%B4-%E5%90%8D%E5%8C%A0-%E8%87%AA%E5%8A%A8%E6%9C%BA%E6%A2%B0%E7%94%B7%E8%A1%A8L2-518-4-78-6/dp/B005KXPOWQ/ref=sr_1_1?s=watch&ie=UTF8&qid=1315378677&sr=1-1,
http://www.amazon.cn/s/ref=amb_link_29479872_2?ie=UTF8&n=888502051&pf_rd_m=A1AJ19PSB66TGU&pf_rd_s=center-1&pf_rd_r=0FBW6Y9N0XD72GZM1PVB&pf_rd_t=101&pf_rd_p=60331932&pf_rd_i=899254051,
http://book.360buy.com/10617807.html,
http://mvd.360buy.com/20045418.html,