有些小瑕疵不知道为啥爬取的标题个数与摘要个数不是整个页面的,而且两者也不相等,不过是第一次做出来,简单记录一下啦~~~~
import urllib.request
import re
import os
def url_open(url):
req=urllib.request.Request(url)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36')
response=urllib.request.urlopen(url)
html=response.read().decode('utf-8')
return html
def get_title