urllib2.urlopen()
urlib2是使用各种协议完成打开url的一个扩展包。最简单的使用方式是调用urlopen方法,比如
def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
和urllib中不同的是第三个参数为timeout了,所以代理只能在外面设置了。
import urllib2
content_stream = urllib2.urlopen('http://www.baidu.com/')
content = content_stream.read()
print content
而
request = urllib2.Request( url = 'http://www.ideawu.net/?act=send', headers = {'Content-Type' : 'text/xml'}, data = data)
#!/usr/bin/python
#-*-coding:utf-8-*-
import httplib,urllib; #加载模块
#定义需要进行发送的数据
params = urllib.urlencode({'title':'标题','content':'文章'});
#定义一些文件头
headers = {"Content-Type":"application/x-www-form-urlencoded",
"Connection":"Keep-Alive","Referer":"http://mod.qlj.sh.cn/sing/post.php"};
#与网站构建一个连接
conn = httplib.HTTPConnection("http://mod.qlj.sh.cn/sing/");
#开始进行数据提交 同时也可以使用get进行
conn.request(method="POST",url="post.php",body=params,headers=headers);
#返回处理后的数据
response = conn.getresponse();
#判断是否提交成功
if response.status == 302:
print "发布成功!";
else:
print "发布失败";
#关闭连接
conn.close();<span id="more-998"></span>
不使用COOKIES 简单提交
import urllib2, urllib
data = {'name' : 'www', 'password' : '123456'}
f = urllib2.urlopen(
url = 'http://www.ideawu.net/',
data = urllib.urlencode(data)
)
print f.read()#读取全部返回内容
print f.info() #取响应header头所有信息
假设信息如下。
Date: Wed, 26 Aug 2009 08:46:03 GMT
Server: Apache/2.2.9 (Unix) PHP/5.2.6
X-Powered-By: PHP/5.2.6
X-Pingback: http://www.ideawu.net/index.php/XXXX
Content-Type: text/html
Connection: closeContent-Length: 31206
那么,如果只取header某一部分信息,如’Content-Type’部分,用:
print f.info().getheader('Content-Type')
使用COOKIES 复杂
import urllib2
cookies = urllib2.HTTPCookieProcessor()
opener = urllib2.build_opener(cookies)
f = opener.open('http://www.ideawu.net/?act=login&name=user01')
data = '<root>Hello</root>'
request = urllib2.Request(
url = 'http://www.ideawu.net/?act=send',
headers = {'Content-Type' : 'text/xml'},
data = data)
opener.open(request)
一个小例子:
一、打开一个网页获取所有的内容
from urllib import urlopen
doc = urlopen("http://www.baidu.com").read()
print doc
二、获取Http头
from urllib import urlopen
doc = urlopen("http://www.baidu.com")
print doc.info()
print doc.info().getheader('Content-Type')
三、使用代理
1 查看环境变量
print ""n".join(["%s=%s" % (k, v) for k, v in os.environ.items()])
print os.getenv("http_proxy")
2 设置环境变量
import os
os.putenv("http_proxy", "http://proxyaddr:<port>")
3 使用代理
# Use http://www.someproxy.com:3128 for http proxying
proxies = {'http': 'http://www.someproxy.com:3128'}
filehandle = urllib.urlopen(some_url, proxies=proxies)
# Don't use any proxies
filehandle = urllib.urlopen(some_url, proxies={})
# Use proxies from environment - both versions are equivalent
filehandle = urllib.urlopen(some_url, proxies=None)
filehandle = urllib.urlopen(some_url)
详细出处参考:http://www.jb51.net/article/15720.htm
urllib2 post
# -*- coding: cp936 -*-
import urllib2
import urllib
def postHttp(name=None,tel=None,address=None,
price=None,num=None,paytype=None,
posttype=None,other=None):
url="http://www.xxx.com/dog.php"
#定义要提交的数据
postdata=dict(name=name,tel=tel,address=address,price=price,
num=num,paytype=paytype,posttype=posttype,other=other)
#url编码
postdata=urllib.urlencode(postdata)
#enable cookie
request = urllib2.Request(url,postdata)
response=urllib2.urlopen(request)
print response
urllib
import httplib, urllib
params = urllib.urlencode({'@number': 12524, '@type': 'issue', '@action': 'show'})
headers = {"Content-type": "application/x-www-form-urlencoded","Accept": "text/plain"}
try:
conn = httplib.HTTPConnection("bugs.python.org")
conn.request("POST", "", params, headers)
response = conn.getresponse()
print response.status, response.reason
data = response.read()
except Exception as ex:
pass
finally:
conn.close()