一、准备工作
安装 requests库和bs4库(已经安装过的可以忽略)
pip install requests
pip install bs4
二、实践爬取源码
# -*- coding: utf-8 -*-
"""
@ description:学习python3.8.5 文章爬取
@ author: chz
@ datetime: 2021-04-20 23:01:47
"""
import os
import io
import sys
import requests
from bs4 import BeautifulSoup
#改变标准输出的默认编码
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030')
def urlBS(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36'}
resp = requests