爬取猫眼
import bs4
from bs4 import BeautifulSoup
import requests
import re
import os
def get_html(url):
try:
d = {'User Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0"}
r = requests.get(url, params = d)
if r.status_code == 200:
return r.text
except:
print(url, " 爬取失败!")
def html_params(html):
soup = BeautifulSoup(html, "html.parser")
links = [link["href"]
for link in soup.find_all('a')
if link.has_attr('href')]
for url in links:
print('子链接:', url)
get_html(url)
def main():
url = "https://maoyan.com/board/4"
r = get_html(url)
html_params(r)
print("爬取完成")
main()