import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
public class BaiduShiTuDemo {
public static void main(String[] args) {
String[] keyWord=whatTheFuckImage("http://d.hiphotos.baidu.com/image/pic/item/c9fcc3cec3fdfc0389d07a7ed13f8794a5c2264f.jpg");
for (String string : keyWord) {
System.out.println(string);
}
}
static String[] whatTheFuckImage(String imageUrl){//返回关键字数组或者返回null
HttpClient hc=new HttpClient();//这是我自己写的用于get和post操作的工具类,用你自己的方法就行。
StringSubClass ss=new StringSubClass();//我的截取字符串的工具类,你自己实现吧。就是取文本中间
String temp=null;
try {
temp=hc.get("http://image.baidu.com/n/pc_search?queryImageUrl="+URLEncoder.encode(imageUrl,"utf-8")+"&fm=result&pos=&uptype=drag",
"utf-8", null, true);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
temp=ss.subStringOne(temp, "guess-info-text", "</div>");
if (temp==null) {
return null;
}
String[] tempData=ss.subStringAll(temp, "guess-info-word-link", "/a>");
for (int i = 0; i < tempData.length; i++) {
tempData[i]=ss.subStringOne(tempData[i], ">", "<");
}
return tempData;
}
}
通过get这个网址获取到内容,一般内容如下:
<div id="guessInfo" class="guess-info clearfix">
<div class="guess-info-title">您搜索的图片可能是:</div>
<div class="guess-info-text">
<a class="guess-info-word-link" href="http://www.baidu.com/s?word=%E7%BE%8E%E5%A5%B3%E5%9B%BE" data-word-index="0" target="_blank">美女图</a>
<a class="guess-info-word-link" href="http://www.baidu.com/s?word=%E5%A5%B3%E7%A5%9E" data-word-index="1" target="_blank">女神</a>
<a class="guess-info-word-link" href="http://www.baidu.com/s?word=%E7%BE%8E%E8%83%B8" data-word-index="2" target="_blank">美胸</a>
</div>
直接截取这个<div>再获取里面每个<a>的关键字就可以了。