python微博根据用户名搜索爬取该用户userId

最新推荐文章于 2024-04-30 19:24:04 发布

杰骜不驯丶

最新推荐文章于 2024-04-30 19:24:04 发布

阅读量3.2k

点赞数 5

分类专栏： python 文章标签：微博用户id 微博搜索 python python抓取微博用户

本文链接：https://blog.csdn.net/qq_24542767/article/details/103505612

版权

python 专栏收录该内容

4 篇文章 1 订阅

订阅专栏

根据微博用户名搜索爬取该用户userId并返回

import urllib.request
from urllib import parse

from bs4 import BeautifulSoup


# 微博根据用户名查找userId


# userName 用户名
# pageNum 查询页数，每页20个数据。 默认为第一页值为1，第二页值为2，以此类推。
def getUserId(userName, pageNum):
    # 用户名需要URL编码后
    html_doc = "https://s.weibo.com/user/&nickname=" + parse.quote(userName) + "&page=" + pageNum
    req = urllib.request.Request(html_doc)
    webpage = urllib.request.urlopen(req)
    html = webpage.read()
    soup = BeautifulSoup(html, 'html.parser')  # 文档对象
    if soup:
        print("找到html")
    # 第一步：抓取a标签
    # <a class="name" href="//weibo.com/u/5288987897" suda-data="key=tblog_search_weibo&amp;value=seqid:157406704247901067764|type:3|t:0|pos:1-0|q:|ext:mpos:1,click:user_name" target="_blank">暴烈甜心<em class="s-color-red">小</em><em class="s-color-red">鳄鱼</em>毛毛</a>
    for a in soup.find_all('a', class_='name'):
        if a:
            # 第二步：抓取a标签中用户名
            rpuserName = a.get_text()
            print("搜到用户名=" + rpuserName)
            # 第三步： 判断是否有该用户，如果有，获取该用户userId
            if a.get_text(strip=True) == userName:
                print("匹配到该用户")
                print("用户个人主页链接=" + a['href'])
                # 第四步：提取userId，然后返回
                userUrl = a['href'].split("/")
                print(userUrl)
                if userUrl and len(userUrl) > 0:
                    userId = userUrl[len(userUrl) - 1]
                    return "userId=" + userId
                else:
                    return "userID抓取失败"
                break
        else:
            return "没有查找到a标签"
    else:
        return "没有找到数据"


if __name__ == "__main__":
    userName = "用户名"
    print(getUserId(userName, "1"))

杰骜不驯丶

关注

5
点赞
踩
15

收藏

觉得还不错? 一键收藏
5
评论
python微博根据用户名搜索爬取该用户userId

根据微博用户名搜索爬取该用户userId并返回import urllib.requestfrom urllib import parsefrom bs4 import BeautifulSoup# 微博根据用户名查找userId# userName 用户名# pageNum 查询页数，每页20个数据。默认为第一页值为1，第二页值为2，以此类推。def getUs...
复制链接

扫一扫