#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @time :2020-03-23 11:53:22
# coding=utf8
import random
import requests
from bs4 import BeautifulSoup
import re
import os.path
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)'
headers = {'User-Agent': user_agent}
def getListProxies():
session = requests.session()
page = session.get("http://www.xicidaili.com/nn", headers=headers)
soup = BeautifulSoup(page.text, 'lxml')
proxyList = []
taglist = soup.find_all('tr', attrs={'class': re.compile("(odd)|()")})
for trtag in taglist:
tdlist = trtag.find_all('td')
proxy = {'http':'http://' +tdlist[1].string + ':' + tdlist[2].string,
'https': 'https://' +tdlist[1].string + ':' + tdlist[2].string}
# url = "http://ip.chinaz.com/getip.aspx" # 用来测试IP是否可用的url(现在该网址好像不能使用)
# try:
# print('proxy is ',proxy)
# response = session.get(url, proxies=proxy, timeout=5)
# print(response)
# proxyList.append(proxy)
# if (len(proxyList) == 3):
# break
# except Exception, e:
# continue
proxyList.append(proxy)
# 设定代理ip个数
if len(proxyList) >= 10:
break
return proxyList
def test_ip():
url = 'http://httpbin.org/get'
# IP_list = getListProxies()
# IP = getListProxies()[random.randint(0,10)]
# print(IP)
# res = requests.get(url,proxies=IP)
# print(res.text)
url = 'http://httpbin.org/get'
IP_list = getListProxies()
while True:
if len(IP_list) == 0:
test_ip()
try:
IP = IP_list.pop()
print(IP)
res = requests.get(url, proxies=IP)
print(res.text)
return IP
except:
pass
if __name__ == '__main__':
test_ip()
免费代理
https://ip.jiangxianli.com/country/%E7%BE%8E%E5%9B%BD?country=%E7%BE%8E%E5%9B%BD
def get_g_proxyip():
url = 'https://ip.jiangxianli.com/country/%E7%BE%8E%E5%9B%BD?country=%E7%BE%8E%E5%9B%BD'
Headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36',
}
t_list = []
try:
res = requests.get(url, headers=Headers)
html = etree.HTML(res.text)
data_list = html.xpath('//table[@class="layui-table"]//tr')
for data in data_list[1:]:
ip = ''.join(data.xpath('./td[1]/text()'))+ ':'+ ''.join(data.xpath('./td[2]/text()'))
t_list.append({'http':'http://'+ip,'https':'https://'+ip,})
except Exception as e:
print(e)
get_g_proxyip()
return t_list