#猫舍数据爬取
main.py
#pip install requests -i http://pypi.douban.com/simple --trusted-host=pypi.douban.com
import requests
import parsel
import csv
f=open('data.csv',mode='a',encoding='ANSI',newline='')
csv_writer=csv.DictWriter(f,fieldnames=["标题","地区","店名","价格"])
csv_writer.writeheader()
for page in range(1,2):
print(f'=================正在爬取第{page}页数据内容=================')
url=f'http://maomijiaoyi.com/index.php?/chanpinliebiao_c_2_{page}--5.html'
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
}
response=requests.get(url=url,headers=headers)
# print(response.text)
selector=parsel.Selector(response.text)
href=selector.css('div.content:nth-child(1) a::attr(href)').getall()
areas=selector.css('div.content:nth-child(1) .area .color_333::text').getall()
areas=[i.strip() for i in areas]
for index in zip(href,areas):
index_url='http://www.maomijiaoyi.com'+index[0]
area=index[1]
# print(index_url+' '+index[1])
response_1=requests.get(url=index_url,headers=headers)
selector_1=parsel.Selector(response_1.text)
title=selector_1.css('.detail_text div.title::text').get().strip() #标题
shop=selector_1.css('.dinming::text').get().strip() #店名
price=selector_1.css('span.red.size_24::text').get() #价格
dit={
"标题":title,
"地区":area,
"店名":shop,
"价格":price
}
csv_writer.writerow(dit)
print(title,area,shop,price,sep=' | ')