一.获取数据
import arxiv
import sqlite3
from datetime import datetime, timedelta
import time
conn = sqlite3.connect("spider_arxiv.db")
cursor = conn.cursor()
cursor.execute('''CREATE TABLE IF NOT EXISTS papers
(id, dateline, title, authors, abstract,
comments, subjects, cite_as, journal_reference)''')
id=0
arxiv_search = arxiv.Search(
query = None,
max_results = 5000,
# max_results = float('inf'),
sort_by=arxiv.SortCriterion.SubmittedDate
)
for result in arxiv_search.results():
id+=1
print(id)
publish_time = result.published.date()
paper_title = result.title
paper_author = result.authors
authors_with_punctuations = ", ".join([author.name for author in paper_author])
paper_abstract = result.summary
comments = result.comment
subjects = result.categories
subjects_with_punctuations = ", ".join([subject for subject in subjects])
cite_as = result.entry_id
journal_reference = result.journal_ref
cursor.execute("INSERT INTO papers VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
(id, publish_time, paper_title, authors_with_punctuations, paper_abstract, comments, subjects_with_punctuations, cite_as, journal_reference))
cursor.close()
conn.close()
二.打开数据库
import sqlite3
conn = sqlite3.connect("spider_arxiv.db")
cursor = conn.cursor()
cursor.execute("SELECT * FROM papers")
result = cursor.fetchall()
for row in result:
print("", row[0])
print("", row[1])
print("", row[2])
print("--------------------")
conn.close()