import requests
from bs4 import BeautifulSoup
import json
import csv
base_url = "https://www.lightup.com/standard-household-lighting.html?p="
page_number = 5
results=[]
for i in range(1,page_number):
url = base_url + str(i)
print('Get Response from : %s' %url,end="")
response = requests.get(url)
print('| Status Code : %s' %response.status_code)
content = BeautifulSoup(response.text,'lxml')
cards = content.findAll('article',{'class':'card'})
for card in cards:
try:
price = card.find('span',{'class','price--withoutTax'}).text
except:
price = card.find('div',{'class','price--withoutTax'}).text.replace('\r\n','').strip()
item={
'title' : card.find('h4',{'class','card-title'}).find('a').text,
'link' : card.find('h4',{'class','card-title'}).find('a')['href'],
'SKU' : card.find('span',{'class','productView-info-value'}).text,
'MPN' : card.findAll('span',{'class','productView-info-name'})[1].text.strip('MPN:').strip(),
'Brand' :''.join( [brand.text.split(':')[1].strip() for brand in card.findAll('li',{'class','ols-card-text__list--item'}) if 'Brand' in brand.text]),
'Base Type' :''.join( [base_type.text.split(':')[1].strip() for base_type in card.findAll('li',{'class','ols-card-text__list--item'}) if 'Base Type' in base_type.text]),
'Life Hours' :''.join( [life_hours.text.split(':')[1].strip() for life_hours in card.findAll('li',{'class','ols-card-text__list--item'}) if 'Life Hours' in life_hours.text]),
'Watt Equivalent' :''.join( [watt_equivalent.text.split(':')[1].strip() for watt_equivalent in card.findAll('li',{'class','ols-card-text__list--item'}) if 'Watt Equivalent' in watt_equivalent.text]),
'Warranty' :''.join( [warranty.text.split(':')[1].strip() for warranty in card.findAll('li',{'class','ols-card-text__list--item'}) if 'Warranty:' in warranty.text]),
'Wattage' :''.join( [wattage.text.split(':')[1].strip() for wattage in card.findAll('li',{'class','ols-card-text__list--item'}) if 'Wattage' in wattage.text]),
'Features' :''.join( [features.text.split(':')[1].strip() for features in card.findAll('li',{'class','ols-card-text__list--item'}) if 'Features' in features.text]).replace('\r\n','').replace(' ','').strip(',').replace(',',', '),
# 'Avaliable colors':card.find('div',{'class','swatch-titles'}).text
'Price' : price
}
results.append(item)
print(json.dumps(item,indent=2))
with open('LightUpResult.csv','w',encoding='utf-8',newline='') as csv_file:
writer = csv.DictWriter(csv_file, results[0].keys())
writer.writeheader()
for row in results:
writer.writerow(row)
print('Exported results to "LightUpResult.csv" file')
Request和BeautifulSoup爬LightUp
最新推荐文章于 2023-12-21 00:00:00 发布