import requests
def GetRobotsHtml(url):
try:
result = requests.get(url)
result.raise_for_status()
result.encoding = result.apparent_encoding
return result.text
except Exception as finalresult:
return finalresult
if __name__ == '__main__':
url = 'https://www.bilibili.com/robots.txt'
r = GetRobotsHtml(url)
print(r)
User-agent: *
Disallow: /include/
Disallow: /mylist/
Disallow: /member/
Disallow: /images/
Disallow: /ass/
Disallow: /getapi
Disallow: /search
Disallow: /account
Disallow: /badlist.html
Disallow: /m/
- 由结果可知bilibili允许所有的爬虫对其进行爬取,但也限制了哪些内容是不可爬取的