#! /usr/bin/env python3
# -*- coding:utf-8 -*-
'通过url地址获取代理IP地址,目前只能获取一页'
__author__ = 'ken'
import urllib.request;
import urllib.parse;
import re;
import os;
curPath = os.path.abspath(os.path.dirname(__file__));
class proxy:
def __init__(self):
pass;
def getIp(self, siteUrl):
# 1、添加cookie支持
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor());
# 2、进行请求,获取数据
request = opener.open(siteUrl);
# 3、获取数据,进行编码转换
request_html = request.read();
if isinstance(request_html, str):
request_html = request_html;
decoded = False;
else:
request_html = request_html.decode('utf-8');
decoded = True;
# 4、数据处理
#......
if __name__ == '__main__':
# example实例
siteUrl = "http://www.youdaili.net/Daili/http/3464_3.html";
pro = proxy();
pro.getIp(siteUrl);
转载于:https://my.oschina.net/kenblog/blog/486509