批量获取代理ip地址

#coding=utf-8
#作者:须尽欢
#收集代理ip地址
import urllib
import urllib2
import re
from bs4 import BeautifulSoup
import os
import socket

user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = { 'User-Agent' : user_agent }
def GetProxyIp():
    url_proxy = 'http://www.youdaili.net/Daili/http/'
    request = urllib2.Request(url_proxy,headers = headers)
    response = urllib2.urlopen(request)
    html = response.read().decode('utf-8')
    #    soup = BeautifulSoup(html)
    #    for x in soup.select('a'):
    pattern = '<a href="http://www.youdaili.net/Daili/http/(\d+).html" .*?>.*?</a>'
    ipport = re.findall(pattern, html)
    proxy_url = ipport[0]
    #print proxy_url
    proxy_urls = 'http://www.youdaili.net/Daili/http/' + proxy_url + '.html'
    print proxy_urls
    request_ip = urllib2.Request(proxy_urls,headers = headers)
    response_ip = urllib2.urlopen(request_ip)
    proxy_html = response_ip.read().decode('utf-8')
    #print proxy_html
    soup = BeautifulSoup(proxy_html)
#    f = open('ip_port.txt', 'a')
    proxy = []
    for x in soup.select('p'):
        try:
            ip_port = x.string
            y = re.compile(u'\@')
            ips = re.split(y, ip_port)[0]
    #        print 'proxy is print'
            proxy.append(ips)
            #print ips
#            f.write(ips+'\n')
        except TypeError:
            continue
    #        print '异常'
        except UnicodeEncodeError:
    #        print '异常'
            break
#   f.close()
    return proxy
#验证收集的ip是否可用
def UsefulIp(proxy):
    url = "http://ip.chinaz.com/getip.aspx"
    f = open("ip_proxy.txt","a")
    socket.setdefaulttimeout(3)
    for ip in proxy:
        try:
            proxy_host = "http://"+ip
            proxy_temp = {"http":proxy_host}
            res = urllib.urlopen(url,proxies=proxy_temp).read()
            f.write(ip+'\n')
            print ip
        except Exception,e:
            continue
    f.close()
if __name__ == '__main__':
    proxy = GetProxyIp()
    UsefulIp(proxy)
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值