从PubMed的HTML页面取标题和摘要文本
import urllib.request
import re
pmid = '31226949' # Pubmed的文献号
url = 'https://www.ncbi.nlm.nih.gov/pubmed?term={}'.format(pmid)
handler = urllib.request.urlopen(url)
html = handler.read()
title_regexp = re.compile(b'<h1>.{5,400}</h1>')
title_text = title_regexp.search(html)
abstract_regexp = re.compile(b'<h3>Abstract</h3><div class="">.{20,3000}</p></div></div>')
abstract_text = abstract_regexp.search(html)
print('title:',title_text.group())
print('Abstract:',abstract_text.group())