最近自学python爬虫入门,好奇想尝试一下
环境:python3.8
目标:i春秋的视频文章标题
代码如下
import requests
import re
def getTitle():
url = 'https://bbs.ichunqiu.com/portal.php'
headers = {
'Host': 'bbs.ichunqiu.com',
'Connection': 'close',
'Cache-Control': 'max-age=0',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.8',
}
req = requests.get(url=url,headers=headers)
html = req.content
titlere = r'target="blank" class="ui_colorG" style="color: #555555;">(.+?)</a></h3>'
title = re.findall(titlere,html.decode('utf-8'))
num=0
for titles in title:
num=num+1
print (num,titles)
if __name__ == '__main__':
getTitle()
结果显示