#-*-coding:utf8-*-
import requests
from lxml import etree
cook = {"Cookie": "此处请填写你获取到的Cookie"} ####使用fiddler进行抓包,或者查看元素中的网络均可找到cookie
url = 'http://weibo.cn/u/xxxxxxxx' #此处请修改为微博网址(登陆后的网址)
# html = requests.get(url).content
# print html
###注意content和text的使用区别
html = requests.get(url, cookies = cook).content
# html = requests.get(url, cookies = cook).text
# html = bytes(bytearray(html, encoding='utf-8'))
selector = etree.HTML(html)
content = selector.xpath('//span[@class="ctt"]')
for each in content:
text = each.xpath('string(.)')
b = 1
print text
设置cookie很简单,html= requests.get(url, cookie = cookie)。
上述代码还要注意:
content返回的是byte型的字符串
text返回的是unicode型的字符串
如果使用text,则需要将unicode转化为byte型的, bytes(bytearray(html,encoding='utf-8'))。