# -*- coding: UTF-8 -*-
_author_ = 'zy'
_date_ = '2019/1/30 0030 16:20'
uid=5462875448
user_name='热爱学习好宝宝'
containerid=1076035462875448
params = {"uid": "{uid}",
"luicode": "10000011",
'lfid': '100103type=1&q=热爱学习好宝宝',
"type": "uid",
"value": "{uid}",
"containerid": "{containerid}",
"page": "{page}"}
#
#since_id: 4328148851867106
import requests
headers = {
"Host": "m.weibo.cn",
"Referer": "https://m.weibo.cn/u/5258700147",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
"cookie":""}
import json
url='https://m.weibo.cn/api/container/getIndex?'
params['uid'] = uid
params['value'] = uid
params['lfid']="100103type=1&q="+user_name
params['containerid'] = containerid
total=299
import math
pages=math.ceil(total/10)
#params={'uid': 5462875448, 'luicode': '10000011', 'lfid': '100103type=1&q=热爱学习好宝宝', 'type': 'uid', 'value': 5462875448, 'containerid': 1076035462875448, 'page': 1}
import json
import time
miaomiao=[]
from bs4 import BeautifulSoup
from lxml import etree
def get_content(html):
#bs = BeautifulSoup(html, "html.parser")
#bs.textarea.get_text()#解析页面
oc = etree.HTML(html)
return oc.xpath('/html/body/text()')[0]
#/html/body/text()
for page in (range(pages)):
params['page']=page+1
res = requests.get(url, params=params, headers=headers)
cards = json.loads(res.content)['data']['cards']
for card in cards:
#print(card)
try:
weibo=card['mblog']['text']
try:
weibo=get_content(weibo)
except:
weibo=weibo
miaomiao.append(weibo)
miaomiao.append('\n')
print(weibo)
except KeyError:
print('keyerror')
time.sleep(1)
print('正在休息一秒')
print(page)
print(params)
with open('weibo1.txt', 'w', encoding='utf-8') as f:
f.write("\n".join(miaomiao))
微博爬虫api版本
最新推荐文章于 2024-05-16 15:28:38 发布