《python无师自通》第十九章版本控制

最新推荐文章于 2023-12-02 23:01:08 发布

qq灰尘

最新推荐文章于 2023-12-02 23:01:08 发布

阅读量133

点赞数

本文链接：https://blog.csdn.net/qq_43169516/article/details/104070643

版权

import urllib.request
from bs4 import BeautifulSoup

class Scraper:
    def __init__(self,
                 site):
        self.site = site


    def scrape(self):
        r = urllib.request.urlopen(self.site)
        html = r.read()
        parser = "html.parser"
        sp = BeautifulSoup(html,parser)
        for tag in sp.find_all("a"):
            url = tag.get("href")
            if url is None:
                continue
            if "html" in url:
                print("\n" + url)

news = "https://news.google.com/"
Scraper(news).scrape()