http://www.jb51.net/article/15720.htm
一、打开一个网页获取所有的内容
from
urllib
import
urlopen
doc = urlopen ( "http://www.baidu.com" ) . read ()
print doc
doc = urlopen ( "http://www.baidu.com" ) . read ()
print doc
二、获取Http头
from
urllib
import
urlopen
doc = urlopen ( "http://www.baidu.com" )
print doc . info ()
print doc . info () . getheader ( 'Content-Type' )
doc = urlopen ( "http://www.baidu.com" )
print doc . info ()
print doc . info () . getheader ( 'Content-Type' )
三、使用代理
1. 查看环境变量
print
"
"n
"
.
join
([
"
%s
=
%s
"
%
(
k
,
v
)
for
k
,
v
in
os
.
environ
.
items
()])
print os . getenv ( "http_proxy" )
print os . getenv ( "http_proxy" )
2. 设置环境变量
import
os
os . putenv ( "http_proxy" , "http://proxyaddr:<port>" )
os . putenv ( "http_proxy" , "http://proxyaddr:<port>" )
3. 使用代理
# Use http://www.someproxy.com:3128 for http proxying
proxies = { 'http' : 'http://www.someproxy.com:3128' }
filehandle = urllib . urlopen ( some_url , proxies = proxies )
# Don't use any proxies
filehandle = urllib . urlopen ( some_url , proxies = {})
# Use proxies from environment - both versions are equivalent
filehandle = urllib . urlopen ( some_url , proxies = None )
filehandle = urllib . urlopen ( some_url )
proxies = { 'http' : 'http://www.someproxy.com:3128' }
filehandle = urllib . urlopen ( some_url , proxies = proxies )
# Don't use any proxies
filehandle = urllib . urlopen ( some_url , proxies = {})
# Use proxies from environment - both versions are equivalent
filehandle = urllib . urlopen ( some_url , proxies = None )
filehandle = urllib . urlopen ( some_url )