无聊,做个微博用户关系图

最近要学的太多,还有一堆期末大作业等着做,实在是脑阔疼。好不容易闲了一点,用networkx做个关系图玩一下

思路

爬取微博关注列表,然后再去遍历关注列表,得到每个用户的信息,以及他们的关注,这样用户关系就能丰富起来,画的图也就更复杂

代码

import requests
import re
import json
import networkx as nx
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['YouYuan']
plt.rcParams['axes.unicode_minus'] = False 

# 请求文本
def getHtmlText(url, code='UTF-8'):
    trytime = 10
    while trytime > 0:
        try:
            header = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6726.400 QQBrowser/10.2.2265.400',
            }
            r = requests.get(url, headers=header, timeout=5)
            r.raise_for_status()
            r.encoding = code
            return r.text
        except:
            print("get获取失败,重连中")
            trytime -= 1


# 获取用户信息
def getUserInfo(uid):
    url = 'https://m.weibo.cn/api/container/getIndex?type=uid&value={}'.format(uid)
    Infomation = json.loads(getHtmlText(url))
    UserInfo = {}
    UserInfo['id'] = Infomation['data']['userInfo']['id']
    UserInfo['name'] = Infomation['data']['userInfo']['screen_name']
    UserInfo['gender'] = '女' if Infomation['data']['userInfo']['gender'] == 'f' else '男'
    UserInfo['desc'] = Infomation['data']['userInfo']['description']
    # with open('./UserInfo.json', 'w', encoding='utf-8') as f:
    #     f.write(json.dumps(UserInfo, ensure_ascii=False))
    return UserInfo


# 获取用户关注列表
def getInterestList(uid, num):
    url = 'https://m.weibo.cn/api/container/getIndex?containerid=231051_-_followers_-_{}&page=1'.format(uid)
    data = json.loads(getHtmlText(url))
    intertestList = []
    cardlist = data['data']['cards']
    for cards in cardlist:
        if 'title' in cards and (cards['title'] == '她的全部关注' or '他的全部关注'):
            i = 0
            for card in cards['card_group']:
                if i < num:
                    person = {}
                    person['id'] = card['user']['id']
                    intertestList.append(person)
                    i += 1
    # with open('./interestList.json', 'w', encoding='utf-8') as f:
    #     f.write(json.dumps(intertestList, ensure_ascii=False))
    return intertestList


# 深搜获取多层用户信息及用户关注列表
def deepSearchList(list, uid, floor, num):
    if floor == 0:
        # print(list.keys())
        if uid in list.keys():
            print('{}有重复'.format(uid))
            return list
        else:
            # print(list.keys())
            # print(uid in list.keys())
            list[str(uid)] = dict()
            list[uid]['userInfo'] = getUserInfo(uid)
            print('{}\t{}\t{}\t{}'.format(uid, list[uid]['userInfo']['name'], list[uid]['userInfo']['gender'],
                                          list[uid]['userInfo']['desc']))
            return list
    elif uid in list.keys() and 'interestList' in list[uid].keys():
        # print('interestList' in list[uid].keys())
        #print('{}有重复'.format(uid))
        return list
    else:
        list[str(uid)] = dict()
        list[uid]['userInfo'] = getUserInfo(uid)
        list[uid]['interestList'] = getInterestList(uid, num)
        print('{}\t{}\t{}\t{}'.format(uid, list[uid]['userInfo']['name'],
                                      list[uid]['userInfo']['gender'],
                                      list[uid]['userInfo']['desc']))
        i = 0
        for interestList in list[uid]['interestList']:
            if i < num:
                list = deepSearchList(list, str(interestList['id']), floor - 1, num)
                # with open('./list.json', 'w', encoding='utf-8') as f:
                #     f.write(json.dumps(list, ensure_ascii=False))
                i += 1
        return list


if __name__ == '__main__':
    uid='******'   #微博的uid

    try:
        with open(f'./{uid}.json', 'r', encoding='utf-8') as f:
            data = json.load(f)
    except:
        data = dict()
    data = deepSearchList(data, uid, 10, 20)
    with open(f'./{uid}.json', 'w', encoding='utf-8') as f:
        f.write(json.dumps(data, ensure_ascii=False))
#     print(data)
#     print(len(data))

    plt.figure(figsize=(20,10),dpi=100)
    G = nx.DiGraph()
    node_size_list = dict()
    node_color_list = dict()
    for person in data:
        G.add_node(data[person]['userInfo']['name'])
        node_size_list[data[person]['userInfo']['name']] = 0
        node_color_list[data[person]['userInfo']['name']] = 'lightblue' if data[person]['userInfo']['gender'] == '男' else 'pink'
    for person in data:
        if 'interestList' in data[person].keys():
            for interest in data[person]['interestList']:
                #print('{} -> {}'.format(person, interest['id']))
                G.add_edge(data[person]['userInfo']['name'],
                           data[str(interest['id'])]['userInfo']['name'])
                node_size_list[data[str(interest['id'])]['userInfo']['name']] += 1
    nx.draw(G,
            pos=nx.spring_layout(G),
            with_labels=True,
            node_size=[i * i * 250 + 100 for i in list(node_size_list.values())],
            node_color=[i for i in list(node_color_list.values())],
            width=0.6,
            font_size=10)
    plt.savefig(f"./{uid}微博关系图.png",dpi=100)
    plt.show()

在这里插入图片描述
要是觉得看不清楚的话就从保存的图片打开看就会清楚很多。
在这里插入图片描述


2020/3/30更新

有读者说遇到了get失败的问题,我去试了确实存在这个问题,因为反爬导致的,所以这里更新一下代码,主要就是添加了sleep和更改一下header

import requests
import re
import json
import time
import networkx as nx
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['YouYuan']
plt.rcParams['axes.unicode_minus'] = False

# 请求文本
def getHtmlText(url, code='UTF-8'):
    trytime = 15
    while trytime > 0:
        try:
            header = {
                'upgrade-insecure-requests': '1',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
            }
            r = requests.get(url, headers=header, timeout=5)
            r.raise_for_status()
            r.encoding = code
            return r.text
        except:
            print("get获取失败,重连中")
            time.sleep(40)
            trytime -= 1

def getHtmlText2(url, code='UTF-8'):
    trytime = 15
    while trytime > 0:
        try:
            header = {
                'referer': 'https://m.weibo.cn/sw.js',
                'X-Requested-With': 'XMLHttpRequest',
                'X-XSRF-TOKEN': 'ef39f9',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
            }
            r = requests.get(url, headers=header, timeout=5)
            r.raise_for_status()
            r.encoding = code
            return r.text
        except:
            print("get获取失败,重连中")
            time.sleep(40)
            trytime -= 1


# 获取用户信息
def getUserInfo(uid):
    url = 'https://m.weibo.cn/api/container/getIndex?type=uid&value={}'.format(uid)
    Infomation = json.loads(getHtmlText(url))
    UserInfo = {}
    UserInfo['id'] = Infomation['data']['userInfo']['id']
    UserInfo['name'] = Infomation['data']['userInfo']['screen_name']
    UserInfo['gender'] = '女' if Infomation['data']['userInfo']['gender'] == 'f' else '男'
    UserInfo['desc'] = Infomation['data']['userInfo']['description']
    # with open('./UserInfo.json', 'w', encoding='utf-8') as f:
    #     f.write(json.dumps(UserInfo, ensure_ascii=False))
    return UserInfo


# 获取用户关注列表
def getInterestList(uid, num):
    url = 'https://m.weibo.cn/api/container/getIndex?containerid=231051_-_followers_-_{}&page=1'.format(uid)
    data = json.loads(getHtmlText2(url))
    intertestList = []
    cardlist = data['data']['cards']
    for cards in cardlist:
        if 'title' in cards and (cards['title'] == '她的全部关注' or '他的全部关注'):
            i = 0
            for card in cards['card_group']:
                if i < num:
                    person = {}
                    person['id'] = card['user']['id']
                    intertestList.append(person)
                    i += 1
    # with open('./interestList.json', 'w', encoding='utf-8') as f:
    #     f.write(json.dumps(intertestList, ensure_ascii=False))
    return intertestList


# 深搜获取多层用户信息及用户关注列表
def deepSearchList(list, uid, floor, num):
    if floor == 0:
        # print(list.keys())
        if uid in list.keys():
            print('{}有重复'.format(uid))
            return list
        else:
            # print(list.keys())
            # print(uid in list.keys())
            list[str(uid)] = dict()
            list[uid]['userInfo'] = getUserInfo(uid)
            print('{}\t{}\t{}\t{}'.format(uid, list[uid]['userInfo']['name'], list[uid]['userInfo']['gender'],
                                          list[uid]['userInfo']['desc']))
            return list
    elif uid in list.keys() and 'interestList' in list[uid].keys():
        # print('interestList' in list[uid].keys())
        #print('{}有重复'.format(uid))
        return list
    else:
        list[str(uid)] = dict()
        list[uid]['userInfo'] = getUserInfo(uid)
        list[uid]['interestList'] = getInterestList(uid, num)
        print('{}\t{}\t{}\t{}'.format(uid, list[uid]['userInfo']['name'],
                                      list[uid]['userInfo']['gender'],
                                      list[uid]['userInfo']['desc']))
        i = 0
        for interestList in list[uid]['interestList']:
            if i < num:
                list = deepSearchList(list, str(interestList['id']), floor - 1, num)
                # with open('./list.json', 'w', encoding='utf-8') as f:
                #     f.write(json.dumps(list, ensure_ascii=False))
                i += 1
        return list


if __name__ == '__main__':
    #uid='******'   #微博的uid
    uid=input("请输入微博的uid:")
    try:
        with open(f'./{uid}.json', 'r', encoding='utf-8') as f:
            data = json.load(f)
    except:
        data = dict()
    data = deepSearchList(data, uid,2,6)
    with open(f'./{uid}.json', 'w', encoding='utf-8') as f:
        f.write(json.dumps(data, ensure_ascii=False))
#     print(data)
#     print(len(data))

    plt.figure(figsize=(20,10),dpi=100)
    G = nx.DiGraph()
    node_size_list = dict()
    node_color_list = dict()
    for person in data:
        G.add_node(data[person]['userInfo']['name'])
        node_size_list[data[person]['userInfo']['name']] = 0
        node_color_list[data[person]['userInfo']['name']] = 'lightblue' if data[person]['userInfo']['gender'] == '男' else 'pink'
    for person in data:
        if 'interestList' in data[person].keys():
            for interest in data[person]['interestList']:
                #print('{} -> {}'.format(person, interest['id']))
                G.add_edge(data[person]['userInfo']['name'],
                           data[str(interest['id'])]['userInfo']['name'])
                node_size_list[data[str(interest['id'])]['userInfo']['name']] += 1
    nx.draw(G,
            pos=nx.spring_layout(G),
            with_labels=True,
            node_size=[i * i * 250 + 100 for i in list(node_size_list.values())],
            node_color=[i for i in list(node_color_list.values())],
            width=0.6,
            font_size=10)
    plt.savefig(f"./{uid}微博关系图.png",dpi=100)
    plt.show()

运行结果:
在这里插入图片描述
在这里插入图片描述
想要更多用户关系可以自己去代码中调参数,但是有些账号没有uid就会返回错误,这个要注意。

  • 8
    点赞
  • 39
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 17
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 17
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

shelgi

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值