Request和BeautifulSoup爬LightUp

import requests
from bs4 import BeautifulSoup
import json
import csv

base_url = "https://www.lightup.com/standard-household-lighting.html?p="
page_number = 5
results=[]
for i in range(1,page_number):
    url = base_url + str(i)
    print('Get Response from : %s' %url,end="")
    response = requests.get(url)
    print('| Status Code : %s' %response.status_code)
    content = BeautifulSoup(response.text,'lxml')
    cards = content.findAll('article',{'class':'card'})
    for card in cards:
        try:
            price = card.find('span',{'class','price--withoutTax'}).text
        except:
            price = card.find('div',{'class','price--withoutTax'}).text.replace('\r\n','').strip()
        item={
            'title' : card.find('h4',{'class','card-title'}).find('a').text,
            'link' : card.find('h4',{'class','card-title'}).find('a')['href'],
            'SKU' : card.find('span',{'class','productView-info-value'}).text,
            'MPN' : card.findAll('span',{'class','productView-info-name'})[1].text.strip('MPN:').strip(),
            'Brand' :''.join( [brand.text.split(':')[1].strip() for brand in card.findAll('li',{'class','ols-card-text__list--item'}) if 'Brand' in brand.text]),
            'Base Type' :''.join( [base_type.text.split(':')[1].strip() for base_type in card.findAll('li',{'class','ols-card-text__list--item'}) if 'Base Type' in base_type.text]),
            'Life Hours' :''.join( [life_hours.text.split(':')[1].strip() for life_hours in card.findAll('li',{'class','ols-card-text__list--item'}) if 'Life Hours' in life_hours.text]),
            'Watt Equivalent' :''.join( [watt_equivalent.text.split(':')[1].strip() for watt_equivalent in card.findAll('li',{'class','ols-card-text__list--item'}) if 'Watt Equivalent' in watt_equivalent.text]),
            'Warranty' :''.join( [warranty.text.split(':')[1].strip() for warranty in card.findAll('li',{'class','ols-card-text__list--item'}) if 'Warranty:' in warranty.text]),
            'Wattage' :''.join( [wattage.text.split(':')[1].strip() for wattage in card.findAll('li',{'class','ols-card-text__list--item'}) if 'Wattage' in wattage.text]),
            'Features' :''.join( [features.text.split(':')[1].strip() for features in card.findAll('li',{'class','ols-card-text__list--item'}) if 'Features' in features.text]).replace('\r\n','').replace('  ','').strip(',').replace(',',', '),
            # 'Avaliable colors':card.find('div',{'class','swatch-titles'}).text
            'Price' : price
        }
        results.append(item)
        print(json.dumps(item,indent=2))

with open('LightUpResult.csv','w',encoding='utf-8',newline='') as csv_file:
    writer = csv.DictWriter(csv_file, results[0].keys())
    writer.writeheader()
    for row in results:
        writer.writerow(row)
    
    print('Exported results to "LightUpResult.csv" file')

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值