import urllib.request
import json
import time
import urllib.error
import requests
from bs4 import BeautifulSoup
import random
import re
import httpx
import requests
from bs4 import BeautifulSoup
import time
import json
import random
def get_ip_list(url,ip_list):
print("正在获取代理列表...")
res = requests.get(url, headers=headers)
# res = res.content.decode("utf-8")
soup = BeautifulSoup(res.text, 'lxml')
tables = soup.findAll('table')
tab = tables[2]
for tr in tab.findAll('tr')[1:]:
tds = tr.findAll('td')
ip_list.append(tds[0].text + ':' + tds[1].text)
print("代理列表抓取成功.")
return ip_list
def get_random_ip(ip_list):
print("正在设置随机代理...")
proxy_list = []
for ip in ip_list:
proxy_list.append('http://' + ip)
proxy_ip = random.choice(proxy_list)
proxies = {'http': proxy_ip}
print("代理设置成功.")
return proxies
urls =[ 'http://www.66ip.cn/{}.html'.format(x) for x in range(1,5)]
ip_list = []
for url in urls:
get_ip_list(url,ip_list)
time.sleep(1)
get_random_ip(ip_list)
获取代理ip-进行爬虫
最新推荐文章于 2024-11-08 13:43:44 发布