获取标题
>>> response.css(".entry-header h1").extract()
['<h1>写代码容易,编程并不容易</h1>']
>>> response.css(".entry-header h1::text").extract()
['写代码容易,编程并不容易']
获取时间
create_time = response.css("p.entry-meta-hide-on-mobile ::text").extract()[0].strip().replace("·", "")
获取点赞人数
praise_num = int(response.css(".vote-post-up h10 ::text").extract()[0])
获取收藏人数
fav_num = response.css(".bookmark-btn ::text").extract()[0]
match_re = re.match(r'.*?(\d+).*', fav_num)
if match_re:
fav_num = match_re.group(1)
获取评论数
comments_num = response.css("a[href='#article-comment'] span::text").extract()[0]
match_re = re.match(r'.*?(\d+).*', comments_num)
if match_re:
comments_num = match_re.group(1)
else:
comments_num = 0
获取内容
content = response.css("div.entry").extract()[0]
获取标签
tag_list = response.css('p.entry-meta-hide-on-mobile a ::text').extract()
# 对标签进行过滤
tag_list = [element for element in tag_list if not element.strip().endswith("评论")]
tags = ",".join(tag_list)