# -*- coding: UTF-8 -*- import urllib.request from bs4 import BeautifulSoup urls = [] pageindex = 1 pageindexnum = 14 for pageindex in range(pageindexnum): pageindex = pageindex + 1 url = "http://www.qiushibaike.com/8hr/page/%d/" % pageindex urls.append(url) print(urls) jokesurl = [] for pageurl in urls: request = urllib.request.Request(pageurl) request.add_header("user-agent", "Mozilla/5.0") response = urllib.request.urlopen(request) # print (response.getcode()) # urlsuse = urlsuse + response.getcode() html_cont = response.read() soup = BeautifulSoup(html_cont, 'html.parser', from_encoding='utf-8') jokes = soup.find_all('div', class_="content") jokesurl.append(jokes) #以html打印出来 fout = open('output2.html', 'w', encoding='utf-8') fout.write("<html>") fout.write('<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />') fout.write("<body>") fout.write("<table>") fout.write("<tr>Just for fun</tr>") count2 = 1 for aa in jokesurl: for joke in aa: fout.write("<tr>") fout.write("<td>%d : %s</td>" % (count2, joke.get_text())) fout.write("</tr>") count2 = count2 + 1 fout.write("</table>") fout.write("</body>") fout.write("</html>")
python 爬虫简单例子
最新推荐文章于 2022-03-02 11:39:31 发布