药房网爬虫

 
import requests,csv
from lxml import etree
from urllib.parse import urljoin

def download(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
    }
    response = requests.get(url,headers)
    response = response.text
    return response



def parse(url):
    print(f"正在解析{url}网址")
    html = download(url)
    tree = etree.HTML(html)
    li_list = tree.xpath("//div[@id='wrap']/div[2]/ul/li")
    data = []
    for li in li_list:
        img_src = li.xpath("./div/a/img/@src")[0]
        name = li.xpath("./div/a/img/@alt")[0]
        price = li.xpath("./div/a/@data-commodity_price")[0]
        specifications = li.xpath(".//p[2]/text()")[0]
        approval_number = li.xpath(".//p[4]/span/text()")[0]
        manufacturer = li.xpath(".//p[5]/text()")[0]
        manufacturer = str(manufacturer).split(":")
        specifications = str(specifications).split(":")
        specifications = specifications[1]
        manufacturer = manufacturer[1]
        img_src = urljoin(url,img_src)
        data.append([img_src,price,name,specifications,approval_number,manufacturer])
    if data != []:
        data_writing(data)
        print("保存成功")
    else:
        print("保存失败")

def data_table():
    with open("药房.csv","w",encoding="utf-8",newline="")as f:
        writer = csv.writer(f)
        writer.writerow(["药品图地址", "价格", "药品名", "规格", "批准文号", "生产厂家"])
def data_writing(data):
    with open("药房.csv","a+",encoding="utf-8",newline="")as f:
        writer = csv.writer(f)
        writer.writerows(data)




def run():
    data_table()
    for i in range(1,1081):
        parse(url=f"https://www.yaofangwang.com/catalog-1/p{i}/")



if __name__ == '__main__':
    run()

                
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值