# -*- coding: utf-8 -*-
# @Time: 2020/1/2 19:03
# @Author: gaoyanshun
# @ Site:
# @File: 获取西刺代理
# @ Explain:
# 当前维护者: gaoyanshun
# 修改记录: 时间,用户名,修改内容。 最上面一条为最近记录
# 环境:Python2.7,requests,bs4,re,
import requests
from bs4 import BeautifulSoup
import re
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)'
headers = {'User-Agent': user_agent}
def getListProxies():
session = requests.session()
page = session.get("http://www.xicidaili.com/nn", headers=headers)
soup = BeautifulSoup(page.text, 'lxml')
proxyList = []
taglist = soup.find_all('tr', attrs={'class': re.compile("(odd)|()")})
for trtag in taglist:
tdlist = trtag.find_all('td')
proxy = {'http': tdlist[1].string + ':' + tdlist[2].string,
'https': tdlist[1].string + ':' + tdlist[2].string}
proxyList.append(proxy)
# 设定代理ip个数
if len(proxyList) >= 10:
break
return proxyList
res = getListProxies()
print len(res)
print(res)
获取西刺代理IP构建代理池
最新推荐文章于 2020-02-15 19:14:37 发布