import sys
import os
import requests
from bs4 import BeautifulSoup
import socket
import re
import pandas as pd
import random
import time
#匹配ip地址
def matchip (ip):
url = "http://ip.chinaz.com/"
try:
url = url+str(ip)
except:
print(url)
#time.sleep(random.uniform(1,3.5)) #每隔1至3.5秒获取一次,频次太高易被反爬
wbdata = requests.get(url).text
soup = BeautifulSoup(wbdata, 'lxml')
for tag in soup.find_all('span', class_='Whwtdhalf w50-0'):
tag_extractl = tag.get_text()
if tag_extractl.find("IP的物理位置"): #过滤'IP的物理位置'字符串
l_ip.append(ip)
l_location.append(tag_extractl)
if __name__ == '__main__':
l_ip=list()
l_location=list()
data=pd.read_csv('in.csv')
i=0
for ip in list(data.user_ip):
i=i+1
print('iter',i)
matchip(ip)
df=pd.DataFrame({'ip':l_ip,'地区':l_location})
df.to_csv('out.csv')
数据格式,如192.168.0.1
输出格式如下,