-*- coding: utf-8 –
import urllib2
import urllib
import cookielib
import re
import sys
reload(sys)
from bs4 import BeautifulSoup
sys.setdefaultencoding(“utf-8”)
#设置cookie
cj = cookielib.LWPCookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
#添加headers
opener.addheaders=[(“User-Agent”,“Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36”),]
urllib2.install_opener(opener)
#登录网站
url = ‘http://www.xinxianwang.com/login/login.asp’
#POST 数据
values = {
‘username’:'xxx ',
‘password’:‘xxx’,
‘k’:‘Fri Oct 23 2015 23:39:12 GMT+0800 (中国标准时间)40000’
}
post_header ={
“Referer”:“http://www.xinxianwang.com/login/”,
“User-Agent”:“Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36”,
“Accept-Encoding”:“gzip, deflate”,
“Accept-Language”:“zh-CN,zh;q=0.8”,
“Connection”:“keep-alive”,
“Content-Length”:“175”,
“Content-Type”:“application/x-www-form-urlencoded”
}
data = urllib.urlencode(values)
req = urllib2.Request(url,data)
response = urllib2.urlopen(req)
page = response.read()
print page
#保持cookie
cj.save(‘H:\python_learn/wangyi.txt’)
#访问我的主页
mm = opener.open(‘http://www.xinxianwang.com/my/’).read()
soup = BeautifulSoup(mm)
print soup.title
tt=soup.find(“div”, attrs={“class”: “login_info”})
print tt
p = re.compile(’<[^>]+>’)
print p.sub("", str(tt))
执行结果