Java代码示例
package javaTest2077;//记得修改自己的package
import java.io.BufferedReader;//accelate the speed of reading
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.*;
import java.util.IllegalFormatCodePointException;
import java.util.regex.*;
public class Request {
public static String getAddressByIp(String ip) {
if (ip == null || ip.equals("")) {
return null;
}
String httpUrl ="https://www.ip138.com/iplookup.asp";
BufferedReader reader = null;
String result = null;
StringBuffer sbf = new StringBuffer();
String thisUrl = httpUrl + "?ip=" + ip+"&action=2";//
System.out.println(thisUrl);
try {
URL url = new URL(thisUrl);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
//setting request header
connection.setRequestMethod("GET");
connection.setRequestProperty("Referer", thisUrl);
connection.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36 SLBrowser/7.0.0.12151 SLBChan/30");
connection.connect();//try to connect
InputStream is = connection.getInputStream();
reader = new BufferedReader(new InputStreamReader(is, "gbk"));
String strRead = null;
while ((strRead = reader.readLine()) != null) {
sbf.append(strRead);
sbf.append("\r\n");
}
reader.close();
result = sbf.toString();
String patternString = "(?s)\"ASN归属地\".*?:(\\\".*?\\\")";
Pattern pattern = Pattern.compile(patternString);
Matcher matcher = pattern.matcher(result);
while (matcher.find()) {
System.out.println(matcher.group());
};
return matcher.group();
} catch (Exception e) {
System.out.println("获取IP地址失败");
}
return null;}
public static void main(String[] args) throws Exception {
Request request = new Request();
request.getAddressByIp("137.172.142.47");
};
};
Python代码示例
import re,requests
ip = "223.172.142.47"
url = f"https://www.ip138.com/iplookup.asp?ip={ip}&action=2"
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36',
'Referer': url
}
result = requests.get(url=url,headers=header)
text=result.content.decode('gbk')
try:
print(re.findall('"ASN归属地":"(.*?)"',text))
except:
print('获取ip所在地区失败!')
两者都是用正则表达式提取数据
你们觉得哪种写爬虫比较好呢?