```python
class Ieee2():
def __init__(self):
self.url='https://ieeexplore.ieee.org/rest/search'
self.key=input('please input a word')
self.data={'highlight': 'true',
'matchPubs':'true',
'newsearch': 'true',
'queryText': self.key,
'returnFacets': ["ALL"],
'returnType': "SEARCH"}
self.headers = {
'Accept': 'application/json,text/plain,*/*',
'Accept-Encoding': 'gzip,deflate,br',
'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/json',
'Referer': 'https://ieeexplore.ieee.org/search/searchresult.jsp?newsearch=true&queryText=mechanical',
'User-Agent': 'Mozilla/5.0 '}
#要是不行 将user agent补齐
#得到响应数据(json)
def get_page(self):
self.response = requests.post(url=self.url, data=json.dumps(self.data), headers=self.headers,verify=False).text
time.sleep(2)
self.n=len(json.loads(self.response)['records'])
print(type(self.n))
print('搜索到'+str(self.n)+'篇文章')
self.get_id()
#字典解析,得到id号,题目与作者list
def get_id(self):
self.ids=[]
self.titles=[]
self.authors=[]
for i in range(self.n):
id=json.loads(self.response)['records'][i]['articleNumber']
title=json.loads(self.response)['records'][i]['articleTitle']
author=json.loads(self.response)['records'][i]['authors']
# print(id,title)
# print(type(author))
self.ids.append(id)
self.titles.append(title)
self.authors.append(author)
self.down_pdf()
def down_pdf(self):
headers={'User-Agent': 'Mozilla/5.0 }
#可能需要补全user agent
requests.packages.urllib3.disable_warnings()
for id,title,author in zip(self.ids,self.titles,self.authors):
#下载网址
new_url = 'https://ieeexplore.ieee.org/stampPDF/getPDF.jsp?tp=&arnumber={}&ref='.format(id)
print(new_url)
text = requests.get(new_url, headers=headers).content
# time.sleep(2)
imgName = title + '.pdf'
imgPath = './ieee_Quan/' + imgName
#以二进制形式保存pdf
with open(imgPath, 'wb') as fp:
fp.write(text)
print(imgName, '打印成功')
#csv保存标题、作者
with open('./ieee_Quan/ieee.csv', 'a+', newline='') as fp:
writer = csv.writer(fp)
writer.writerow([title, author])
def main(self):
if not os.path.exists('./ieee_Quan'):
os.mkdir('ieee_Quan')
with open('./ieee_Quan/ieee.csv', 'a+', newline='') as fp:
writer = csv.writer(fp)
writer.writerow(['title', 'author'])
self.get_page()
ieee2=Ieee2()
ieee2.main()
爬取ieee
最新推荐文章于 2024-05-12 16:39:05 发布