语料库mysql_整理搜狗2008版搜狗新闻分类语料库和全网新闻分类语料库--转存至mysql数据库...

该博客介绍了如何将搜狗2008版新闻分类语料库和全网新闻分类语料库整理并转存至MySQL数据库。通过分析URL前缀,确定新闻类别如汽车、财经、IT、健康等,并提供了相应的Java代码示例。
摘要由CSDN通过智能技术生成

packagezju.dawn.ai.corpus.sogou;publicfinalclassCategoryDetector {privatestaticString category=null;publicstaticString detectCategory(String url) {//xinhuanet.if(url.startsWith("http://www.xinhuanet.com/auto/")) {

category="car";

}elseif(url.startsWith("http://www.xinhuanet.com/fortune")) {

category="finance";

}elseif(url.startsWith("http://www.xinhuanet.com/internet/")) {

category="IT";

}elseif(url.startsWith("http://www.xinhuanet.com/health/")) {

category="health";

}elseif(url.startsWith("http://www.xinhuanet.com/sports")) {

category="sports";

}elseif(url.startsWith("http://www.xinhuanet.com/travel")) {

category="travel";

}elseif(url.startsWith("http://www.xinhuanet.com/edu")) {

category="education";

}elseif(url.startsWith("http://www.xinhuanet.com/employment")) {

category="employment";

}elseif(url.startsWith("http://www.xinhuanet.com/life")) {

category="culture";

}elseif(url.startsWith("http://www.xinhuanet.com/mil")) {

category="military";

}elseif(url.startsWith("http://www.xinhuanet.com/olympics/")) {

category="olympics";

}elseif(url.startsWith("http://www.xinhuanet.com/society")||url.startsWith("http://www.xinhuanet.com/local/")||url.startsWith("http://www.xinhuanet.com/world")) {

category="society";

}elseif(url.startsWith("http://www.xinhuanet.com/house")) {

category="house";

}elseif(url.startsWith("http://www.xinhuanet.com/ent")) {

category="ent";

}elseif(url.startsWith("http://www.xinhuanet.com/lady")) {

category="lady";

}elseif(url.startsWith("http://www.xinhuanet.com/school")) {

category="school";

}//chinaif(url.startsWith("http://auto.china.com/")) {

category="car";

}elseif(url.startsWith("http://caifu.china.com/")) {

category="finance";

}elseif(url.startsWith("http://tech.china.com/zh_cn/news/net/")) {

category="IT";

}elseif(url.startsWith("http://health.china.com/")) {

category="health";

}elseif(url.startsWith("http://sports.china.com/")) {

category="sports";

}elseif(url.startsWith("http://goo66.china.com/")) {

category="travel";

}elseif(url.startsWith("http://edu.533.com/")) {

category="education";

}elseif(url.startsWith("http://culture.china.com/")) {

category="culture";

}elseif(url.startsWith("http://military.china.com/")) {

category="military";

}elseif(url.startsWith("http://2008.china.com/")) {

category="olympics";

}elseif(url.startsWith("http://news.china.com/zh_cn/social/")||url.startsWith("http://news.china.com/zh_cn/domestic/")||url.startsWith("http://news.china.com/zh_cn/international/")) {

category="society";

}elseif(url.startsWith("http://china.soufun.com/")) {

category="house";

}elseif(url.startsWith("http://fun.china.com/zh_cn/star/")) {

category="ent";

}elseif(url.startsWith("http://meirong.533.com/")) {

category="lady";

}elseif(url.startsWith("http://edu.533.com/news/xiaoyuan/")) {

category="school";

}//sina.com.cnif(url.startsWith("http://auto.sina.com.cn/")) {

category="car";

}elseif(url.startsWith("http://finance.sina.com.cn/")) {

category="finance";

}elseif(url.startsWith("http://tech.sina.com.cn/it/")) {

category="IT";

}elseif(url.startsWith("http://sina.kangq.com/")) {

category="health";

}elseif(url.startsWith("http://sports.sina.com.cn/")) {

category="sports";

}elseif(url.startsWith("http://tour.sina.com.cn/")) {

category="travel";

}elseif(url.startsWith("http://edu.sina.com.cn/j/")) {//employment和education不能互换.category="employment";

}elseif(url.startsWith("http://edu.sina.com.cn/")) {

category="education";

}elseif(url.startsWith("http://cul.book.sina.com.cn/")) {

category="culture";

}elseif(url.startsWith("http://mil.news.sina.com.cn/")) {

category="military";

}elseif(url.startsWith("http://2008.sina.com.cn/")) {

category="olympics";

}elseif(url.startsWith("http://news.sina.com.cn/society/")||url.startsWith("http://news.sina.com.cn/china/")||url.startsWith("http://news.sina.com.cn/world/")) {

category="society";

}elseif(url.startsWith("http://house.sina.com.cn/")) {

category="house";

}elseif(url.startsWith("http://ent.sina.com.cn/")) {

category="ent";

}elseif(url.startsWith("http://eladies.sina.com.cn/")) {

category="lady";

}elseif(url.startsWith("http://edu.sina.com.cn/y/")) {

category="school";

}//163.comif(url.startsWith("http://auto.163.com/")) {

category="car";

}elseif(url.startsWith("http://money.163.com/")) {

category="finance";

}elseif(url.startsWith("http://tech.163.com/it/")) {

category="IT";

}elseif(url.startsWith("http://163.39.net/")) {

category="health";

}elseif(url.startsWith("http://sports.163.com/")) {

category="sports";

}elseif(url.startsWith("http://war.163.com/")) {

category="military";

}elseif(url.startsWith("http://2008.163.com/")) {

category="olympics";

}elseif(url.startsWith("http://news.163.com/shehui/")||url.startsWith("http://news.163.com/domestic/")||url.startsWith("http://news.163.com/world/")) {

category="society";

}elseif(url.startsWith("http://house.163.com/")) {

category="house";

}elseif(url.startsWith("http://ent.163.com/")) {

category="ent";

}elseif(url.startsWith("http://lady.163.com/")) {

category="lady";

}//qq.comif(url.startsWith("http://auto.qq.com/")) {

category="car";

}elseif(url.startsWith("http://finance.qq.com/")) {

category="finance";

}elseif(url.startsWith("http://tech.qq.com/a/")) {

category="IT";

}elseif(url.startsWith("http://sports.qq.com/")) {

category="sports";

}elseif(url.startsWith("http://edu.qq.com/job/")) {//employment和education不能互换.category="employment";

}elseif(url.startsWith("http://edu.qq.com/")) {

category="education";

}elseif(url.startsWith("http://cul.qq.com/")) {

category="culture";

}elseif(url.startsWith("http://mil.qq.com/")) {

category="military";

}elseif(url.startsWith("http://news.qq.com/a/")) {

category="society";

}elseif(url.startsWith("http://2008.qq.com/")) {

category="olympics";

}elseif(url.startsWith("http://house.qq.com/")) {

category="house";

}elseif(url.startsWith("http://ent.qq.com/")) {

category="ent";

}elseif(url.startsWith("http://lady.qq.com/")) {

category="lady";

}elseif(url.startsWith("http://campus.qq.com/")) {

category="school";

}//sohu.comif(url.startsWith("http://auto.sohu.com/")) {

category="car";

}elseif(url.startsWith("http://business.sohu.com/")) {

category="finance";

}elseif(url.startsWith("http://it.sohu.com/")) {

category="IT";

}elseif(url.startsWith("http://health.sohu.com/")) {

category="health";

}elseif(url.startsWith("http://sports.sohu.com/")) {

category="sports";

}elseif(url.startsWith("http://travel.sohu.com/")) {

category="travel";

}elseif(url.startsWith("http://learning.sohu.com/")) {

category="education";

}elseif(url.startsWith("http://career.sohu.com/")) {

category="employment";

}elseif(url.startsWith("http://cul.sohu.com/")) {

category="culture";

}elseif(url.startsWith("http://news.sohu.com/")) {

category="society";

}elseif(url.startsWith("http://mil.news.sohu.com/")) {

category="military";

}elseif(url.startsWith("http://2008.sohu.com/")) {

category="olympics";

}elseif(url.startsWith("http://house.sohu.com/")) {

category="house";

}elseif(url.startsWith("http://yule.sohu.com/")) {

category="ent";

}elseif(url.startsWith("http://women.sohu.com/")) {

category="lady";

}returncategory;

}publicstaticvoidmain(String args[]) {

category=CategoryDetector.detectCategory("http://edu.sina.com.cn/");

System.out.println(category);

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值