from urllib import request
from bs4 import BeautifulSoup
url = r'https://www.jianshu.com/'
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}
page = request.Request(url, headers=headers)
page_info = request.urlopen(page).read().decode('utf-8')
soup = BeautifulSoup(page_info, 'html.parser')
titles = soup.find_all('a','title')
try:
# 在E盘以只写的方式打开/创建一个名为 titles 的txt文件
file = open(r'E:\titles.txt', 'w')
for tmp in titles:
# 将爬去到的文章题目写入txt中
file.write(tmp.string + '\n')
finally:
if file:
# 关闭文件(很重要)
file.close()
python中BeautifulSoup的fiind用法:find以及find_all