coding:utf-8
import requests
import json
from lxml import etree
import sys
“”"
根据药品国字号爬取www.yaofangwang.com上的药品名称
有些药品名称商业网站上没有需要在www.nmpa.gov.cn上手动搜索,进口药品要选择进口目录查找
执行:
python3 file.py 多条或者单条药品的国字号,用英文逗号隔开
python3 file.py Z10910055,Z45021680,H20010217,Z20010131,Z10920002
在终端输出:
Z10910055 枫蓼肠胃康颗粒
…
Z10920002 活血止痛胶囊
“”"
def get_drug_name(drug_number=None):
“”"
drug_number:国药准字号
https://www.yaofangwang.com/search.html?keyword=H20013003&price=1
return:药品名称
“”"
url = ‘https://www.yaofangwang.com/search.html?keyword=’+drug_number+‘&price=1’
From_data={
“User-Agent”: “Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36”
}
response = requests.post(url,data=From_data)
# print(response)
content = response.text
# print(content)
# 使用xpath取名字
# //*[@id=“wrap”]/div[1]/ul/li[1]/div/a[1]/img
html = etree.HTML(content)
# print(etree.tostring(html.xpath("//li[@class='item-inactive']")[0]))
drug_name = html.xpath('//*[@id="wrap"]/div[1]/ul/li[1]/div/a[2]')
if not drug_name:
print(drug_number,' ',)
return ''
# print(drug_name[0].text)
drug_name = drug_name[0].text
clear = ''
for i in drug_name:
if i == '-':
break
clear += i
drug_name = ''
for i in clear.split(' '):
if len(drug_name) < len(i):
drug_name = i
print(drug_number,drug_name )
return drug_name
if name==‘main’:
for i in sys.argv[1].split(','):
if len(i)<1:
continue
get_drug_name(i)