def caution(homepage, aim_url):
from urllib.robotparser import RobotFileParser
from urllib.request import urlopen
rp = RobotFileParser()
url_robots = homepage + '/robots.txt'
rp.parse(urlopen(url_robots).read().decode('utf-8').split('\n'))
res = rp.can_fetch('*', aim_url)
if res:
print('恭喜,此网站可以爬取了')
else:
print('\033[1;31;0m切记,此网站不可爬取!!!!! \033[0m')
homepage = 'https://www.zhihu.com/'
aim_url = '''https://www.zhihu.com/question/287426676/answer/1774597016'''
caution(homepage, aim_url)
君子协议测试
最新推荐文章于 2022-04-19 00:17:24 发布