循环中注意递增公式位置(爬取**文章为例)
一、取出后循环(取出了两页内容)
import requests
from bs4 import BeautifulSoup
url1 = 'https://www.……articles'#网址
headers={'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}#请求头
offset = 10#初始值
while True:#循环,取多页
params = {
'include': 'data[*].comment_count,suggest_edit,is_normal,thumbnail_extra_info,thumbnail,can_comment,comment_permission,admin_closed_comment,content,voteup_count,created,updated,upvoted_followees,voting,review_info,is_labeled,label_info;data[*].author.badge[?(type=best_answerer)].topics',
'offset': str(offset),
'limit': '10',
'sort_by': 'created'}
res = requests.get(url1,headers = headers,params = params)
articles1 = res.json()
articles = articles1['data']
for i in articles:
title = [i['title']]
print(title)
**offset = offset + 20 #注:取一次,递增一次
if offset > 30:
break**
二、循环后取出(取出一页内容)
import requests
from bs4 import BeautifulSoup
url1 = 'https://www.……articles'#网址
headers={'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}#请求头
offset = 10#初始值
while True:#循环,取多页
params = {
'include': 'data[*].comment_count,suggest_edit,is_normal,thumbnail_extra_info,thumbnail,can_comment,comment_permission,admin_closed_comment,content,voteup_count,created,updated,upvoted_followees,voting,review_info,is_labeled,label_info;data[*].author.badge[?(type=best_answerer)].topics',
'offset': str(offset),
'limit': '10',
'sort_by': 'created'}
res = requests.get(url1,headers = headers,params = params)
articles1 = res.json()
articles = articles1['data']
**offset = offset + 20 #注:取第二页之前,已经停止
if offset > 30:
break**
for i in articles:
title = [i['title']]
print(title)