先把图上了:
1、首先从最新文章列表页爬取最新文章的链接
import json
import re
import urllib.request
from lxml import etree
import random
import requests
import time
import os
paperRecords = {}
with open('spiRecords.json','r') as fel:
paperRecords = json.load(fel)
try:
lastLst = paperRecords['lastLst']
except Exception as err:
lastLst = []
dateStr = '2018-05-02'
#dateStr = time.strftime('%Y-%m-%d',time.localtime(time.time()))
toYear,toMonth,toDay = list(map(int,dateStr.split('-')))
strY = 'a' + str(toYear)
strM = 'a' + str(toMonth)
strD = 'a' + str(toDay)
try:
if paperRecords[strY]:
pass