核心算法代码分享如下:
try:
repeat = article.xpath('//div[@class="card-act"]/ul/li[1]/a/text()')
repeat = repeat[num]
comment = article.xpath('//div[@class="card-act"]/ul/li[2]/a/text()')
comment = comment[num]
comment_url = article.xpath('//div//p[@class="txt"]/a/@href')
print(comment_url)
comment_url = "https:" + comment_url[num - 1]
support = article.xpath('//div[@class="card-act"]/ul/li[3]/a/button/span[@class="woo-like-count"]/text()')
support = support[num]
# 时间
publish_time = article.xpath('//div//div[@class="from"]/a[@target="_blank"]/text()')
publish_time = publish_time[num].strip()
num += 2
# 秒
try:
if "秒" in publish_time:
publish_time = publish_time.replace("秒前", '').strip()
# 拿当前时间戳减去多少秒,然后转化成对应的日期
timestamp = time.time()
timestamp -= int(publish_time)
publish_time = time.strftime("%Y-%m-%d %H:%M", time.localtime(timestamp))
elif "分钟" in publish_time:
try:
publish_time = publish_time.replace("分钟前", '').strip().split(' ')[0]
except Exception as e:
publish_time = publish_time.replace("分钟前", '').strip()
timestamp = time.time()
timestamp -= int(publish_time) * 60
publish_time = time.strftime("%Y-%m-%d %H:%M", time.localtime(timestamp))
elif "今天" in publish_time:
now_time = time.localtime()
publish_time = publish_time.replace("今天", '').strip()
localtime = time.strftime("%Y-%m-%d", now_time)
publish_time = localtime + ' ' + publish_time
if '月' in publish_time:
publish_time = publish_time.replace('月', '-')
publish_time = publish_time.replace('日', '')
if '年' in publish_time:
publish_time = publish_time.replace('年', '-')
else:
publish_time = str(time.localtime(time.time())[0]) + '-' + publish_time
yield user_name, publish_time, article_content, repeat, comment, comment_url, support
except ValueError as e:
print(e)
pass
except IndexError as e:
print(e)
pass
既有适合小白学习的零基础资料,也有适合3年以上经验的小伙伴深入学习提升的进阶课程,涵盖了95%以上大数据知识点,真正体系化!
由于文件比较多,这里只是将部分目录截图出来,全套包含大厂面经、学习笔记、源码讲义、实战项目、大纲路线、讲解视频,并且后续会持续更新