爬取i春秋社区精华页文章标题
import requests
import re
def getHtml():
url='https://bbs.ichunqiu.com/portal.php'
headers = {
'Host': 'bbs.ichunqiu.com',
'Connection': 'close',
'Cache-Control': 'max-age=0',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.8',
}
req = requests.get(url=url,headers=headers)
html = req.content.decode('utf8')
titlere=titlere = r'target="blank" class="ui_colorG" style="color: #555555;">(.+?)</a></h3>'
title=re.findall(titlere,html)
for i in title:
print(i)
if __name__=='__main__':
getHtml()
爬取美女图