在Python中使用代理IP池可以通过以下步骤实现:
- 导入所需的库:
import requests from bs4 import BeautifulSoup
- 获取代理IP列表:
def get_proxy_list(): url = 'http://www.example.com/proxy' # 代理IP列表的网址 response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') proxy_list = [] for row in soup.find_all('tr'): columns = row.find_all('td') if len(columns) >= 2: ip = columns[0].text.strip() port = columns[1].text.strip() proxy = f'{ip}:{port}' proxy_list.append(proxy) return proxy_list
- 随机选择一个代理IP:
import random def get_random_proxy(proxy_list): return random.choice(proxy_list)
- 使用代理IP发送请求:
def send_request(url, proxy): proxies = { 'http': f'http://{proxy}', 'https': f'https://{proxy}' } try: response = requests.get(url, proxies=proxies) # 处理响应数据 return response.text except requests.exceptions.RequestException as e: print(e) return None
- 完整示例代码:
import requests from bs4 import BeautifulSoup import random def get_proxy_list(): url = 'http://www.example.com/proxy' # 代理IP列表的网址 response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') proxy_list = [] for row in soup.find_all('tr'): columns = row.find_all('td') if len(columns) >= 2: ip = columns[0].text.strip() port = columns[1].text.strip() proxy = f'{ip}:{port}' proxy_list.append(proxy) return proxy_list def get_random_proxy(proxy_list): return random.choice(proxy_list) def send_request(url, proxy): proxies = { 'http': f'http://{proxy}', 'https': f'https://{proxy}' } try: response = requests.get(url, proxies=proxies) # 处理响应数据 return response.text except requests.exceptions.RequestException as e: print(e) return None # 使用示例 proxy_list = get_proxy_list() proxy = get_random_proxy(proxy_list) url = 'http://www.example.com' response = send_request(url, proxy) if response: print(response)