获取下载的链接的url
def getdownurl(url):
text=get_html(url).text
re0=r'
firurl=re.findall(re0,text)
print(firurl)
if len(firurl)==0:
return
strurl=str(firurl[0])
print(strurl)
tpurl=re.split(',',strurl)
endstp=[]
#print(tpurl)
for ul in tpurl:
elem=ul.strip('\'').strip('\'')
endstp.append(elem)
print(endstp,type(endstp[0]))
head='http://www.wanfangdata.com.cn/search/downLoad.do?page_cnt='
geturl=head+endstp[0]+"&language="+endstp[2]+"&resourceType="+endstp[6]+"&source="+endstp[3]+ "&resourceId="+endstp[1]+"&resourceTitle="+endstp[4]+"&isoa="+endstp[5]+"&type="+endstp[0]
print(geturl)
re1=r''
text=get_html(geturl).text
print()
sucurl=re.findall(re1,text)
print(sucurl)
return sucurl[0]
下载所有的PDF文件
def downloadAllPdf(key):
row=getNum(key)
pages=search_key(key)
allurl=[]
num=0
for page in pages:
allurl=get_url(page)