主要功能:
1、requests模块的应用;
2、正则表达式的使用。
import requests
import re
r = requests.get("http://www.qiushibaike.com/hot/page/1")
content = r.content
content = content.decode('utf-8')
# print(str(content))
pattern = re.compile(r'<div class="article block untagged mb15.'
+'*?<div class="author clearfix">.*?</div>' #作者信息
+'.*?<div class="content">.*?</div>' # 段子内容
+'.*?<div class="stats">.*?</div>' #好笑、评论 数
+'.*?</div>',re.S) #正则表达式
contentPattern = re.compile(r'<div class="content">.*?<span>(.*?)</span>.*?</div>',re.S)
authorPattern = re.compile(r'<div class="author clearfix">.*?<h2>(.*?)</h2>',re.S)
commentPattern = re.compile(r'<div class="stats">.*?<i class="number">(.*?)</i>.*?<i class="number">(.*?)</i>.*?</div>',re.S)
items = re.findall(pattern,content)
print(str(items))
print('--------------------------------')
number = 0
for item in items:
number = number + 1
print(str(number))
contentItems = re.findall(contentPattern,str(item))
print(contentItems[0].strip().replace('<br/>','\n'))
authorItems = re.findall(authorPattern,str(item))
print("作者:"+str(authorItems[0].strip()))
commentItems = re.findall(commentPattern,str(item))
print("点赞:"+commentItems[0][0]+"次 "+"评论:"+commentItems[0][1]+"条")