import requests
import urllib.request
from bs4 import BeautifulSoup
from lxml import etree
import json
def get_links(html):
global links_list
links_list=[]
soup = BeautifulSoup(html, "lxml")
for target in soup.find_all('a'): # 通过find定位标签
try:
value=target.get('href')
except:
value=''
#if value.__contains__('www.computer.org/csdl'):
#value='http://www.ieee-security.org/TC/SP2015/'+value
if value:
print(value)
write_to_file(value)
def links():
response = urllib.request.urlopen('http://www.ieee-security.org/TC/SP2019/program-papers.html')
return get_links(response.read().decode('utf-8'))
def write_to_file(content):
with open('19allssplink.txt', 'a', encoding='utf-8') as f:
print(type(json.dumps(content)))
f.write(json.dumps(content,ensure_ascii=False)+'\n')
links()
A Record |爬取论文链接
最新推荐文章于 2021-03-23 12:24:50 发布