微博爬虫api版本

19 篇文章 0 订阅
# -*- coding: UTF-8 -*-
_author_ = 'zy'
_date_ = '2019/1/30 0030 16:20'

uid=5462875448
user_name='热爱学习好宝宝'
containerid=1076035462875448

params = {"uid": "{uid}",
          "luicode": "10000011",
          'lfid': '100103type=1&q=热爱学习好宝宝',
          "type": "uid",
          "value": "{uid}",
          "containerid": "{containerid}",
          "page": "{page}"}
#
#since_id: 4328148851867106
import requests
headers = {
    "Host": "m.weibo.cn",
    "Referer": "https://m.weibo.cn/u/5258700147",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36",
    "cookie":""}
import json
url='https://m.weibo.cn/api/container/getIndex?'

params['uid'] = uid
params['value'] = uid
params['lfid']="100103type=1&q="+user_name
params['containerid'] = containerid

total=299
import math
pages=math.ceil(total/10)

#params={'uid': 5462875448, 'luicode': '10000011', 'lfid': '100103type=1&q=热爱学习好宝宝', 'type': 'uid', 'value': 5462875448, 'containerid': 1076035462875448, 'page': 1}

import json
import time
miaomiao=[]
from bs4 import BeautifulSoup
from lxml import etree
def get_content(html):
    #bs = BeautifulSoup(html, "html.parser")
    #bs.textarea.get_text()#解析页面
    oc = etree.HTML(html)
    return oc.xpath('/html/body/text()')[0]
#/html/body/text()

for page in (range(pages)):
    params['page']=page+1

    res = requests.get(url, params=params, headers=headers)
    cards = json.loads(res.content)['data']['cards']
    for card in cards:
        #print(card)
        try:
            weibo=card['mblog']['text']

            try:
               weibo=get_content(weibo)
            except:
                weibo=weibo

            miaomiao.append(weibo)
            miaomiao.append('\n')
            print(weibo)
        except KeyError:
            print('keyerror')
    time.sleep(1)
    print('正在休息一秒')
    print(page)
    print(params)

with open('weibo1.txt', 'w', encoding='utf-8') as f:
    f.write("\n".join(miaomiao))

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值