本周代码练习顺利通过
贪心算法
而且一直在想用python爬虫
附上代码
爬取书城三国演义
数据分析:bs4
import requests
from bs4 import BeautifulSoup
if __name__ == '__main__':
url = 'https://www.shicimingju.com/book/sanguoyanyi.html'
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36 Edg/100.0.1185.29'
}
page_text = requests.get(url=url,headers=headers).content
#print(page_text)
soup = BeautifulSoup(page_text,'lxml')
li_lst = soup.select('.book-mulu > ul > li')
fp = open('sanguoyanyi.text', 'w', encoding='utf-8')
for li in li_lst:
title = li.a.string
detail_url = 'http://www.shicimingju.com' + li.a['href']
detail_page_text = requests.get(url = detail_url,headers=headers).content
detail_soup = BeautifulSoup(detail_page_text,'lxml')
div_tag = detail_soup.find('div',class_='chapter_content')
content = div_tag.text
fp.write(title+':'+content+'\n')
print(title+'爬取成功')
fp.close()
print('三国演义已全部爬取成功!!!')