获取meta里的keywords及description的方法

通过jericho包获取meta里的keywords及description的方法一:
		// 获取meta里的keywords和description
List segments = source.findAllElements(Tag.META);

getKeywordsDesc(segments);


if (null != segments) {
String keywordsStr = null;
String descriptionStr = null;
int sumSegments = segments.size();
if (sumSegments > 0) {
for (int i = 0; i < sumSegments; i++) {
String metaStr = segments.get(i).toString();
if (null != metaStr && !"".equals(metaStr.trim())) {
// 获取description
if (metaStr.indexOf("description") != -1
&& metaStr.indexOf("content") != -1) {
metaStr = metaStr.replaceAll("\"", "").replaceAll(
"/", "");
descriptionStr = metaStr.substring(metaStr
.indexOf("content"));
descriptionStr = descriptionStr.substring(
descriptionStr.indexOf("=") + 1,
descriptionStr.length() - 1);
descriptionStr = TextHtml.html2text(descriptionStr);
}
parserBean.setDescription(removeTag(descriptionStr));

// 获取keywords
if (metaStr.indexOf("keywords") != -1
&& metaStr.indexOf("content") != -1) {
metaStr = metaStr.replaceAll("\"", "").replaceAll(
"/", "");
keywordsStr = metaStr.substring(metaStr
.indexOf("content"));
keywordsStr = keywordsStr
.substring(keywordsStr.indexOf("=") + 1,
keywordsStr.length() - 1);
keywordsStr = TextHtml.html2text(keywordsStr);
parserBean.setKeywords(removeTag(keywordsStr));
}
}
}// for over
}
}


方法二:
/**
* 获取meta里的keywords和description
*/
private void getKeywordsDesc(List segments) {
if (null != segments) {
String keywords = null;
String description = null;
int sumSegments = segments.size();
for (int i = 0; i < sumSegments; i++) {
String segment = segments.get(i).toString().toLowerCase() ;
if (null != segment && !"".equals(segment.trim())) {
// 获取meta里的keywords
if (segment.indexOf("keywords") > 0
&& segment.indexOf("content") > 0) {
String patternStr = "< *meta *name *= *\"? *keywords *\"? *content *= *\"?(.*) *\"? */? *>";
keywords = Regex(patternStr, segment);
if (null == keywords) {
patternStr = "< *meta *content *= *\"?(.*) *\"? *name *= *\"? *keywords *\"? */? *>";
keywords = Regex(patternStr, segment);
}
if (null != keywords) {
keywords = removeTag(keywords);
}
this.keyowrds = keywords.replace("/", "").replace("\"", "");
}

// 获取meta里的description
if (segment.indexOf("description") > 0
&& segment.indexOf("content") > 0) {
String patternStr = "< *meta name *= *\"? *description *\"? *content *= *\"?(.*) *\"? */? *>";
description = Regex(patternStr, segment);
if (null == description) {
patternStr = "< *meta *content *= *\"?(.*) *\"? *name *= *\"? *description *\"? */? *>";
description = Regex(patternStr, segment);
}
if (null != description) {
description = removeTag(description).replace("/", "");
}
this.description = description.replace("/", "").replace("\"", "");
}
}

}

}

}

	private String Regex(String patternStr, String segment) {
String str = null;
Pattern p = Pattern.compile(patternStr,Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(segment);
while (m.find()) {
str = m.group(1);
}
return str;
}



用htmlparser获取meta里面的keywords及descripton

Parser parser=new Parser(url);
NodeFilter filter=new NodeClassFilter(MetaTag.class);
NodeList nodelist=parser.extractAllNodesThatMatch(filter);
for(Node node:nodelist.toNodeArray())
{
MetaTag meta=(MetaTag) node;
System.out.println(meta.getAttribute("name")+":"+meta.getAttribute("content"));
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值