import parsel
response = requests.get(url=url, headers=headers, cookies=cookie)
selector = parsel.Selector(response.text)
urls = selector.css(‘.article-list h4 a::attr(href)’).getall()
for html_url in urls:
print(html_url)
这样就获取了每一篇文章的url地址
提取解析方法同上
response = requests.get(url, headers=headers, cookies=cookie)
text 文本(字符串)
遭遇了反扒
print(response.text)
提取文章部分
sel = parsel.Selector(response.text)