"""采用 Python Selenium + 无头浏览器,获取知乎粉丝数排行榜 TOP 50 的个人主页基础数据。"""
from time import sleep
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
driver = webdriver.Chrome(options=chrome_options)
def get_one_info(driver,url):
driver.get(url)
sleep(1)
rlts = driver.find_elements_by_class_name("Tabs-meta")
nums1 = [rlt.text for rlt in rlts]
rlts = driver.find_elements_by_class_name("NumberBoard-itemValue")
nums2 = [rlt.text for rlt in rlts]
rlt = {}
rlt["用户"] = url
if len(nums1) >= 7:
rlt["回答"] = nums1[-7]
rlt["视频"] = nums1[-6]
rlt["提问"] = nums1[-5]
rlt["文章"] = nums1[-4]
rlt["专栏"] = nums1[-3]
rlt["想法"] = nums1[-2]
rlt["收藏"] = nums1[-1]
else:
print(url,"nums1 少于7个",nums1)
if len(nums2) == 2:
rlt["关注了"] = nums2[0]
rlt["关注者"] = nums2[1]
else:
print(url,"nums2异常",nums2)
rlt["日期"] = str(datetime.now())[:-7]
return rlt
urls = [
'https://www.zhihu.com/people/haili-9-70/',
"https://www.zhihu.com/people/zhi-hu-ri-bao-51-41",
"https://www.zhihu.com/people/liu-kan-shan-78",
"https://www.zhihu.com/people/ding-xiang-yi-sheng",
"https://www.zhihu.com/people/zhang-jia-wei",
"https://www.zhihu.com/people/zhi-ke-ji-13",
"https://www.zhihu.com/people/knowyourself-1",
"https://www.zhihu.com/people/kaifulee",
"https://www.zhihu.com/people/zhouyuan",
"https://www.zhihu.com/people/zhang-xiao-bei",
"https://www.zhihu.com/people/warfalcon",
"https://www.zhihu.com/people/lisanshui1230",
"https://www.zhihu.com/people/tian-ji-shun",
"https://www.zhihu.com/people/jixin",
"https://www.zhihu.com/people/ma-bo-yong",
"https://www.zhihu.com/people/sizhuren",
"https://www.zhihu.com/people/imike",
"https://www.zhihu.com/people/raymond-wang",
"https://www.zhihu.com/people/ChenZhangyu",
"https://www.zhihu.com/people/excited-vczh",
"https://www.zhihu.com/people/zhu-xuan-86",
"https://www.zhihu.com/people/lisongwei",
"https://www.zhihu.com/people/xia-chu-fang",
"https://www.zhihu.com/people/dong-ji-zai-hang-zhou",
"https://www.zhihu.com/people/gejinyuban",
"https://www.zhihu.com/people/guo-zi-501",
"https://www.zhihu.com/people/gao-ke-69",
"https://www.zhihu.com/people/chenqin",
"https://www.zhihu.com/people/magie",
"https://www.zhihu.com/people/chenbailing",
"https://www.zhihu.com/people/wang-ni-ma-94",
"https://www.zhihu.com/people/thejennyyy",
"https://www.zhihu.com/people/cai-tong",
"https://www.zhihu.com/people/zhou-xiao-nong",
"https://www.zhihu.com/people/qiong-you-jin-nang",
"https://www.zhihu.com/people/mali",
"https://www.zhihu.com/people/bo-cai-28-7",
"https://www.zhihu.com/people/cheng-yi-nan",
"https://www.zhihu.com/people/lens-27",
"https://www.zhihu.com/people/commando",
"https://www.zhihu.com/people/nordenbox",
"https://www.zhihu.com/people/binka",
"https://www.zhihu.com/people/zhen-shi-gu-shi-ji-hua",
"https://www.zhihu.com/people/he-ming-ke",
"https://www.zhihu.com/people/ccat",
"https://www.zhihu.com/people/talich",
"https://www.zhihu.com/people/feifeimao",
"https://www.zhihu.com/people/zhong-guo-ke-pu-bo-lan",
"https://www.zhihu.com/people/pan-fan-65",
"https://www.zhihu.com/people/gong-qing-tuan-zhong-yang-67",
"https://www.zhihu.com/people/divinites"
]
rlts = {}
number = 0
for url in urls:
rlt = get_one_info(driver,url)
number += 1
rlts[number] = rlt
driver.quit()