使用的是python 2.7
python装不了requests:
百度上的大致意思是我的电脑的用户名是中文,python不支持中文
解决办法是在python文件路径下(我的是C:\Python27)的Lib\site-packages内新建一个名为“sitecustomize.py”的文件,文件里写入:
import sys
sys.setdefaultencoding('gbk')
保存即可
‘NoneType’ object is not iterable错误
出现这个原因是有个变量出现了none值,比如:
File "XXXXXXXXX.py", line 33, in main
for card in list_cards:
TypeError: 'NoneType' object is not iterable
这里是因为list_cards是none,所以要追查list_cards的来源,这里我用到了debug
如何使用可查看:Python如何用自带的IDLE进行调试DEBUG
查到了获取微博用户主页的某条微博内容是有两层的(data和cards):
然后通过开发者工具查看也是如此:
然后将原来代码:
list_cards = ob_json.get('cards')
修改代码为
# _*_ coding:utf-8 _*_
from lxml import html
import requests
import json
import re
class Tool:
removeImg = re.compile('<img.*?>| {1,7}| ')
removeAddr = re.compile('<a.*?>|</a>')
replaceLine = re.compile('<tr>|<div>|</div>|</p>')
removeTag = re.compile('<.*?>')
#self是实例方法 cls是类方法
@classmethod
def replace(cls,x):
x=re.sub(cls.removeImg,'',x)
x=re.sub(cls.removeAddr,'',x)
x=re.sub(cls.replaceLine,'',x)
x=re.sub(cls.removeTag,'',x)
return x.strip() #去掉多余的空格
class Weibo(object):
def get_weibo(self,id,page):
url = 'https://m.weibo.cn/api/container/getIndex?uid={}&type=uid&value={}&containerid=107603{}&page={}'.format(id,id,id,page)
response = requests.get(url)
ob_json = json.loads(response.text)
list_cards = ob_json.get('data').get('cards')
return list_cards
def get_comments(self,id,page):
url = 'https://m.weibo.cn/api/comments/show?id={}&page={}'.format(id,page)
response = requests.get(url)
ob_json = json.loads(response.text)
list_comments = ob_json.get('data').get('hot_data')
return list_comments
def main(self,uid,page):
list_cards = self.get_weibo(uid,page)
if list_cards != None:
for card in list_cards:
if card.get('card_type')==9:
id=card.get('mblog').get('id')
text=card.get('mblog').get('text')
text = Tool.replace(text)
print '******'
print u'微博:'+text+'\n'
list_comments = weibo.get_comments(id,1)
count_hotcomments =1
for comment in list_comments:
created_at = comment.get('created_at')
like_counts = comment.get('like_counts')
source = comment.get('source')
text = comment.get('text')
tree = html.fromstring(text)
text = tree.xpath('string(.)')#用string函数过滤多余标签
name_user = comment.get('user').get('screen_name')
if source =='':
source = u'未知'
print str(count_hotcomments),':**',name_user+u' 时间'+created_at+u' 点赞'+str(like_counts)+u' 来源'+source
print text+'\n'
count_hotcomments+=1
print '============='
if __name__ == '__main__':
weibo=Weibo()
weibo.main('1192329374',1)