Python 爬猫信息

最新推荐文章于 2025-02-27 19:00:08 发布

laocooon523857886

最新推荐文章于 2025-02-27 19:00:08 发布

阅读量576

点赞数

分类专栏： Python

本文链接：https://blog.csdn.net/laocooon/article/details/117238521

版权

Python 专栏收录该内容

129 篇文章

订阅专栏

import requests  # 导入模块
import parsel
import pprint  # 格式化输出模块
import re
import os
import csv

f=open('data.csv',mode='w',encoding='utf-8-sig',newline='')
csv_writer=csv.DictWriter(f,fieldnames=['标题'])
csv_writer.writeheader()#写入表头
for page in range(1,10):
    print(f'===========================第{page}页======================================')
    url=f'http://www.maomijiaoyi.com/index.php?/chanpinliebiao_c_2_{page}--24.html'
    headers = {
      'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36'
    }

    request=requests.get(url=url,headers=headers)
    selector=parsel.Selector(request.text)
    href=selector.css('div.content:nth-child(1) a::attr(href)').getall()
    areas=selector.css('div.content:nth-child(1) .area .color_333::text').getall()
    areas=[i.strip() for i in areas]
    for index in zip(href,areas):
        index_url='http://www.maomijiaoyi.com'+index[0]
        area=index[1]
        r_1=requests.get(url=index_url,headers=headers)
        s_1=parsel.Selector(r_1.text)
        title=s_1.css('.detail_text div.title::text').get().strip()
        pre=s_1.css('.detail_text span.red.size_24::text').get().strip()
        sl  = s_1.css('div.info2 div:nth-child(1) div.red::text').get().strip()
        nl = s_1.css('div.info2 div:nth-child(2) div.red::text').get().strip()
        pz= s_1.css('div.info2 div:nth-child(3) div.red::text').get().strip()
        yf= s_1.css('div.info2 div:nth-child(4) div.red::text').get().strip()

        dit={'标题':title,
             '价格':pre,
             '数量':sl,
             '年龄':nl,
             '品种':pz,
             '预防':yf,
             }
        csv_writer.writerow(dit)
        print(dit)