python 爬虫 表格_零基础自学爬虫(3)爬取结果保存为表格-附Python源代码

"""采用 Python Selenium + 无头浏览器,获取知乎粉丝数排行榜 TOP 50 的个人主页基础数据。"""

from time import sleep

from datetime import datetime

from selenium import webdriver

from selenium.webdriver.chrome.options import Options

chrome_options = Options()

chrome_options.add_argument("--headless")

chrome_options.add_argument("--disable-gpu")

driver = webdriver.Chrome(options=chrome_options)

def get_one_info(driver,url):

driver.get(url)

sleep(1)

rlts = driver.find_elements_by_class_name("Tabs-meta")

nums1 = [rlt.text for rlt in rlts]

rlts = driver.find_elements_by_class_name("NumberBoard-itemValue")

nums2 = [rlt.text for rlt in rlts]

rlt = {}

rlt["用户"] = url

if len(nums1) >= 7:

rlt["回答"] = nums1[-7]

rlt["视频"] = nums1[-6]

rlt["提问"] = nums1[-5]

rlt["文章"] = nums1[-4]

rlt["专栏"] = nums1[-3]

rlt["想法"] = nums1[-2]

rlt["收藏"] = nums1[-1]

else:

print(url,"nums1 少于7个",nums1)

if len(nums2) == 2:

rlt["关注了"] = nums2[0]

rlt["关注者"] = nums2[1]

else:

print(url,"nums2异常",nums2)

rlt["日期"] = str(datetime.now())[:-7]

return rlt

urls = [

'https://www.zhihu.com/people/haili-9-70/',

"https://www.zhihu.com/people/zhi-hu-ri-bao-51-41",

"https://www.zhihu.com/people/liu-kan-shan-78",

"https://www.zhihu.com/people/ding-xiang-yi-sheng",

"https://www.zhihu.com/people/zhang-jia-wei",

"https://www.zhihu.com/people/zhi-ke-ji-13",

"https://www.zhihu.com/people/knowyourself-1",

"https://www.zhihu.com/people/kaifulee",

"https://www.zhihu.com/people/zhouyuan",

"https://www.zhihu.com/people/zhang-xiao-bei",

"https://www.zhihu.com/people/warfalcon",

"https://www.zhihu.com/people/lisanshui1230",

"https://www.zhihu.com/people/tian-ji-shun",

"https://www.zhihu.com/people/jixin",

"https://www.zhihu.com/people/ma-bo-yong",

"https://www.zhihu.com/people/sizhuren",

"https://www.zhihu.com/people/imike",

"https://www.zhihu.com/people/raymond-wang",

"https://www.zhihu.com/people/ChenZhangyu",

"https://www.zhihu.com/people/excited-vczh",

"https://www.zhihu.com/people/zhu-xuan-86",

"https://www.zhihu.com/people/lisongwei",

"https://www.zhihu.com/people/xia-chu-fang",

"https://www.zhihu.com/people/dong-ji-zai-hang-zhou",

"https://www.zhihu.com/people/gejinyuban",

"https://www.zhihu.com/people/guo-zi-501",

"https://www.zhihu.com/people/gao-ke-69",

"https://www.zhihu.com/people/chenqin",

"https://www.zhihu.com/people/magie",

"https://www.zhihu.com/people/chenbailing",

"https://www.zhihu.com/people/wang-ni-ma-94",

"https://www.zhihu.com/people/thejennyyy",

"https://www.zhihu.com/people/cai-tong",

"https://www.zhihu.com/people/zhou-xiao-nong",

"https://www.zhihu.com/people/qiong-you-jin-nang",

"https://www.zhihu.com/people/mali",

"https://www.zhihu.com/people/bo-cai-28-7",

"https://www.zhihu.com/people/cheng-yi-nan",

"https://www.zhihu.com/people/lens-27",

"https://www.zhihu.com/people/commando",

"https://www.zhihu.com/people/nordenbox",

"https://www.zhihu.com/people/binka",

"https://www.zhihu.com/people/zhen-shi-gu-shi-ji-hua",

"https://www.zhihu.com/people/he-ming-ke",

"https://www.zhihu.com/people/ccat",

"https://www.zhihu.com/people/talich",

"https://www.zhihu.com/people/feifeimao",

"https://www.zhihu.com/people/zhong-guo-ke-pu-bo-lan",

"https://www.zhihu.com/people/pan-fan-65",

"https://www.zhihu.com/people/gong-qing-tuan-zhong-yang-67",

"https://www.zhihu.com/people/divinites"

]

rlts = {}

number = 0

for url in urls:

rlt = get_one_info(driver,url)

number += 1

rlts[number] = rlt

driver.quit()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值