import requests
import os
import re
import csv
import time
import json
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
Cookies = {"Cookie": "你的cookies"}
#当出现一些解决不了的问题时候 试着更新一下Cookies
#用户信息,同时也能获取到uid、fid、oid等关键参数
def get_user_info(usr_id):
url = 'https://m.weibo.cn/api/container/getIndex?type=uid&value={usr_id}'.format(usr_id=usr_id)
resp = requests.get(url, headers=headers, cookies=Cookies)
jsondata = resp.json()
#print(jsondata)
nickname = jsondata.get('data').get('userInfo').get('screen_name')
mblog_num = jsondata.get('data').get('userInfo').get('statuses_count')
verified = jsondata.get('data').get('userInfo').get('verified')
verified_reason = jsondata.get('data').get('userInfo').get('verified_reason')
gender = jsondata.get('data').get('userInfo').get('gender')
urank = jsondata.get('data').get('userInfo').get('urank') #用户等级
mbrank = jsondata.get('data').get('userInfo').get('mbrank')
followers_count = jsondata.get('data').get('userInfo').get('followers_count')
follow_count = jsondata.get('data').get('userInfo').get('follow_count')
uid = jsondata.get('data').get('userInfo').get('toolbar_menus')[0].get('params').get('uid')
try:
fid = jsondata.get('data').get('userInfo').get('toolbar_menus')[1].get('actionlog').get('fid')
oid = jsondata.get('data').get('userInfo').get('toolbar_menus')[2].get('params').get('menu_list')[0].get('actionlog').get('oid')
cardid = jsondata.get('data').get('userInfo').get('toolbar_menus')[1].get('actionlog').get('cardid')
except:
uid = ''
fid = ''
oid = ''
cardid = ''
containerid = jsondata.get('data').get('tabsInfo').get('tabs')[0].get('containerid')
Info = {'nickname':nickname,'mblog_num':mblog_num,
'verified':verified,'verified_reason':verified_reason,
'gender':gender,'urank':urank,'mbrank':mbrank,'followers_count':followers_count,
'follow_count':follow_count,'uid':uid,'fid':fid,
'cardid':cardid,'containerid':containerid,'oid':oid
}
print(Info)
return Info
#获取所有热门微博信息(所发微博内容,每条微博的评论id,转发数,评论数...)
def mblog_list(uid,oid):
Mblog_list = []
base_url = 'https://m.weibo.cn/api/container/getIndex?containerid={oid}'
page_url = 'https://m.weibo.cn/api/container/getIndex?containerid={oid}&type=uid&value={uid}&page={page}'
url = base_url.format(oid=oid)
resp = requests.get(url, headers=headers, cookies=Cookies)
resp.encoding = 'gbk'
response = resp.json()
#print(response)
#热门微博数total
total = response['data']['cardlistInfo']['total']
print(total)
#热门微博网页数
page_num = int(int(total)/10)+1
for i in range(1,page_num+1,1):
p_url = page_url.format(oid=oid, uid=uid, page=i)
#print(p_url)
page_resp = requests.get(p_url,headers=headers,cookies=Cookies)
resp.encoding = 'gbk'
page_data = page_resp.json()
'''filename='22.json'
with open(filename,'w') as f:
json.dump(page_data,f)'''
try:
cards = page_data['data']['cards']
#print(cards)
for card in cards:
#print(card)
try:
mblog = card['mblog']
created_at = mblog['created_at']
id = mblog['id']
dirty_text = mblog['text'] #dirty_text中含有很多链接杂质
cleaned1 = re.sub(r'<span .*?</span>', '', dirty_text)
text = re.sub(r"<a .*?</a>", '', cleaned1)
reposts_count = mblog['reposts_count']
comments_count = mblog['comments_count']
attitudes_count = mblog['attitudes_count']
mblog_data = {'created_at': created_at, 'id': id, 'text': text, 'reposts_count': reposts_count,
'comments_count': comments_count, 'attitudes_count': attitudes_count}
Mblog_list.append(mblog_data)
print(' '*10,mblog_data)
except:
continue
print('................')
except:
continue
time.sleep(1)
return Mblog_list
def main():
#user_id= '1655128924'
#user_id='2736225585'
#user_id = '2386831995'
user_id= '1282005885'
user_info = get_user_info(user_id)
uid = user_info.get('uid')
oid = user_info.get('oid')
print(uid,oid)
mblog_list(uid,oid)
print('............')
main()