废话不多说,直接上代码!
# coding:utf-8
import urllib
import requests
import bs4
import json
import re
def get_IP():
"""获取代理IP
"""
url = "http://www.xicidaili.com/nn/"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',}
session = requests.session()
html = session.get(url, headers = headers).text
table = bs4.BeautifulSoup(html, 'lxml')
IP_lists = table.find('table', attrs={
'id':'ip_list'}).find_all('tr')
ip_list = []
for IP_list in IP_lists[1:]:
lists = IP_list.find_all('td')
ip = {
'ip':