import urllib.request
import re
def get_new_qq(url, pat, file):
data = urllib.request.urlopen(url).read().decode("gbk", "ignore").strip()
res = re.compile(pat).findall(data)
with open(file, "w") as f:
f.write(str(res))
return res
if __name__ == '__main__':
url = "https://www.qq.com/?pgv_ref=1"
pat = "<li><a .*>(.*)</a></li>"
file = "../腾讯新闻.txt"
print(get_new_qq(url, pat, file))
通过urllib简易爬取腾讯新闻内容方法封装
最新推荐文章于 2024-05-10 12:36:22 发布