import requests
import re
url = 'https://news.qq.com/'
headers = {
'User-Agent':'Mozilla/5.0 (
Windows NT 10.0; WOW64) AppleWebKit/537.36 (
KHTML, like Gecko) Chrome/70.0.3538.9 Safari/537.36',
}
response = requests.get(url=url, headers=headers)
root_pattern = '<div class="Q-tpWrap">([\d\D]*?)</div>'
two_pattern = '<em class="f14 l24">([\d\D]*?)</em>'
three_pattern = '.html">(.*?)</a?>'
root_html = response.text
first_html = re.findall(root_pattern, root_html)
first_html = ''.join(first_html)
two_html = re.findall(two_pattern, first_html)
two_html = ''.join(two_html)
three_html = re.findall(three_pattern, two_html)
# a=1 调试用
# print(two_html)
# print(three_html)
# def my_news():
# for x in range(len(three_html)):
# print('%d:' % (x + 1) + three_html[x])
#写入新文件中
with open('tengxun.txt', 'w') as fb:
for x in range(len(three_html)):
fb.write('%d :' % (x+1)+three_html[x]+'\n')