python爬取微博用户关注和粉丝的公开基本信息,阿里P7大牛手把手教你

本文介绍了如何使用Python爬取微博用户关注和粉丝的公开信息,包括关注数、粉丝数、性别和所在地。通过设置随机User-Agent避免反爬,数据保存至MongoDB数据库,并绘制男女比例扇形图和用户所在地条形图进行分析。此外,还使用进程池提高爬虫效率,限制爬取页数以防止无效请求。
摘要由CSDN通过智能技术生成

wb.cookie_test()

test.py

import random

import pymongo

import requests

from time import sleep

import matplotlib.pyplot as plt

from multiprocessing import Pool

返回随机的User-Agent

def get_random_ua():

user_agent_list = [

“Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1”

“Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/”

“536.11”,

“Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6”,

“Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6”,

“Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1”,

“Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5”,

“Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5”,

“Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3”,

“Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3”,

"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 "

“Safari/536.3”,

“Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3”,

“Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3”,

“Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3”,

“Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3”,

“Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3”,

“Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3”,

“Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24”,

“Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24”

]

return {

“User-Agent”: random.choice(user_agent_list)

}

返回关注数和粉丝数

def get():

res = requests.get(“https://m.weibo.cn/profile/info?uid=5720474518”)

return res.json()[‘data’][‘user’][‘follow_count’], res.json()[‘data’][‘user’][‘followers_count’]

获取内容并解析

def get_and_parse1(url):

res = requests.get(url)

cards = res.json()[‘data’][‘cards’]

info_list = []

try:

for i in cards:

if “title” not in i:

for j in i[‘card_group’][1][‘users’]:

user_name = j[‘screen_name’] # 用户名

user_id = j[‘id’] # 用户id

fans_count = j[‘followers_count’] # 粉丝数量

sex, add = get_user_info(user_id)

info = {

“用户名”: user_name,

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值