练习---爬取知乎某个用户所写文章的标题、链接、内容简介

最新推荐文章于 2024-04-27 00:26:31 发布

Croyance_M

最新推荐文章于 2024-04-27 00:26:31 发布

阅读量1k

点赞数

分类专栏：爬虫学习

本文链接：https://blog.csdn.net/Croyance_M/article/details/89884872

版权

爬虫学习专栏收录该内容

22 篇文章 4 订阅

订阅专栏

import requests
import openpyxl
from pypinyin import lazy_pinyin
import os

#汉字转拼音
def toPy(name):
	names=lazy_pinyin(name)
	py=names[0]
	for n in names[1:]:
		py=py+'-'+n
	return py

def get(name,path):
	offset=0
	py=toPy(name)
	url='https://www.zhihu.com/api/v4/members/'+py+'/articles?'
    #不是所有用户在搜索时链接格式是他的拼音，更可靠的方法还是一个个去找出它的url再获取
	i=0
	all_a=[]
	while True:
		params={
		'include':'data[*].comment_count,suggest_edit,is_normal,thumbnail_extra_info,thumbnail,can_comment,comment_permission,admin_closed_comment,content,voteup_count,created,updated,upvoted_followees,voting,review_info,is_labeled,label_info;data[*].author.badge[?(type=best_answerer)].topics',
		'offset':str(offset),
		'limit':'20',
		'sort_by':'voteups'}
		headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36'}

		res=requests.get(url,params=params,headers=headers)
		json=res.json()
		articles=json['data']

		for a in articles:
			art=[]
			title=a['title']
			art.append(title)
			link=a['url']
			art.append(link)
			excerpt=a['excerpt']
			art.append(excerpt)
			all_a.append(art)
			#print(art)	
		offset+=20
		
		if json['paging']['is_end']==True:
			break
	
	print(all_a)
	
	if os.path.exists(path):
		wb=openpyxl.load_workbook(path)
		sheet=wb.active
		sheet=wb.create_sheet()
	else:
		wb=openpyxl.Workbook()
		sheet=wb.active
	sheet.title=name
	sheet['A1']='标题'
	sheet['B1']='链接'
	sheet['C1']='简介'
	for a in all_a:
		sheet.append(a)
	wb.save(path)
	

name='丁香医生'
path='C:\\Users\\Xpeng\\Desktop\\爬取到的表格\\知乎.xlsx'

get(name,path)

这个写的不足的地方在于不是所有用户在搜索时链接格式是他的拼音，更可靠的方法还是一个个去找出它的url再获取。

Croyance_M

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
练习---爬取知乎某个用户所写文章的标题、链接、内容简介

import requestsimport openpyxlfrom pypinyin import lazy_pinyinimport os#汉字转拼音def toPy(name): names=lazy_pinyin(name) py=names[0] for n in names[1:]: py=py+'-'+n return pydef get(name,pa...
复制链接

扫一扫