基于Django的电商购物项目通过网络爬虫技术以及ORM框架获取项目商品数据
这里先提供源码 , 具体思路下一期再进行解释
if __name__ == '__main__':
import os
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'JiXuShopSystem.settings')
import django
django.setup()
from goods.models import *
import json
import random
import requests
import re
from pypinyin import pinyin, Style
class Spider():
def __init__(self):
self.headers = {
"accept": "application/json, text/javascript, */*; q=0.01",
"accept-language": "zh-CN,zh;q=0.9",
"authorization": "OAuth api_sign=d463028caef4688f248d1d05cac958d0153afe12",
"cache-control": "no-cache",
"content-type": "application/x-www-form-urlencoded; charset=UTF-8",
"origin": "https://detail.vip.com",
"pragma": "no-cache",
"priority": "u=1, i",
"referer": "https://detail.vip.com/",
"sec-ch-ua": "\"Chromium\";v=\"124\", \"Google Chrome\";v=\"124\", \"Not-A.Brand\";v=\"99\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
"x-requested-with": "XMLHttpRequest"
}
self.cookies = {
"vip_cps_cid": "1703946155095_f8aebf721aa4d69f55487762e3ca4c21",
"PAPVisitorId": "58460c7a22e31f6b4acb2a1ed741f921",
"vip_new_old_user": "1",
"mars_cid": "1703946163504_4eebec221de3364e0da3bbe4a2182454",
"mars_pid": "0",
"vip_cps_cuid": "CU1715695090775e67fbb574e7ca3b54",
"vip_city_name": "%E5%B9%BF%E5%B7%9E%E5%B8%82",
"VipUINFO": "luc%3Aa%7Csuc%3Aa%7Cbct%3Ac_new%7Chct%3Ac_new%7Cbdts%3A0%7Cbcts%3A0%7Ckfts%3A0%7Cc10%3A0%7Crcabt%3A0%7Cp2%3A0%7Cp3%3A1%7Cp4%3A0%7Cp5%3A0%7Cul%3A3105",
"vip_address": "%257B%2522pname%2522%253A%2522%255Cu5b89%255Cu5fbd%255Cu7701%2522%252C%2522pid%2522%253A%2522103104%2522%252C%2522cname%2522%253A%2522%255Cu5e7f%255Cu5dde%255Cu5e02%2522%252C%2522cid%2522%253A%2522103104105%2522%257D",
"vip_province": "103104",
"vip_province_name": "%E5%AE%89%E5%BE%BD%E7%9C%81",
"vip_city_code": "103104105",
"vip_wh": "VIP_HZ",
"vip_ipver": "31",
"user_class": "a",
"visit_id": "DBEBA17A04F9893A4CBF22BF2C37ADB4",
"mars_sid": "f9ea9bcbdad7fcd940397690e20183a7",
"VIP_QR_FIRST": "1",
"vip_tracker_source_from": "",
"vipshop_passport_src": "https%3A%2F%2Fdetail.vip.com%2Fdetail-1711573035-6920320379434192651.html",
"pg_session_no": "11",
"VipDFT": "-1",
"vip_access_times": "%7B%22list%22%3A3%2C%22detail%22%3A0%7D"
}
self.url = "https://mapi.vip.com/vips-mobile/rest/shopping/pc/detail/main/v6"
def brand(self,response):
# 品牌
brandStoreInfo = response['data']['brandStoreInfo']['brandStoreName']
# Logo
brandStoreLogo = response['data']['brandStoreInfo']['brandStoreLogo']
# 首字母
if brandStoreInfo:
first_char_pinyin_list = pinyin(brandStoreInfo[0], style=Style.FIRST_LETTER, strict=False)
# 取第一个拼音的第一个字母(转换为大写)
first_letter = first_char_pinyin_list[0][0].upper()
else:
first_letter = None
brand_data = {
'brandStoreInfo':brandStoreInfo,
'brandStoreLogo':brandStoreLogo,
'first_letter':first_letter,
}
return brand_data
def spu(self,response):
# spu
spu_name = response['data']['base']['title']
spu_sales = 0
spu_comments = 0
images_detls = response['data']['images']['groups'].values()
detail = ''
for detil in images_detls:
# 介绍
detailImages = detil['detailImages']
for imageUrls in detailImages[1:]:
imageUrl = imageUrls['imageUrl']
detail += f'<p><img src="">{imageUrl}</p>'
break
url = "https://mapi.vip.com/vips-mobile/rest/shopping/pc/detail/vendorqa/v1"
params = {
"callback": "getVendorQaCb",
"app_name": "shop_pc",
"app_version": "4.0",
"warehouse": "VIP_HZ",
"fdc_area_id": "103104105",
"client": "pc",
"mobile_platform": "1",
"province_id": "103104",
"api_key": "70f71280d5d547b2a7bb370a529aeea1",
"user_id": "",
"mars_cid": "1703946163504_4eebec221de3364e0da3bbe4a2182454",
"wap_consumer": "a",
"productId": "6920549355391848472"
}
response_pack = requests.get(url, headers=self.headers, cookies=self.cookies, params=params).text.replace('getVendorQaCb(','').replace(')','')
response_pack_dict = json.loads(response_pack)
decs_pack = response_pack_dict['data']['list']
# 包装清单
decs_data = ''
for decs in decs_pack:
decs_data += f"""
<dl class="q-tit-item">
<dt class="qti-title">
<i class="qti-question"></i>
<p class="qti-txt" style="color: red">{decs['question']}</p>
</dt>
<dd class="qti-content">
<i class="qti-answer"></i>
<p class="qti-txt">
{decs['answer']}
</p>
</dd>
</dl>
"""
# 售后服务
tsaleService_data = ''
tsaleServiceListitle = response['data']['saleServiceList']
for tsaleService in tsaleServiceListitle:
tsaleService_data += f"""
<dl class="q-tit-item">
<dt class="qti-title">
<i class="qti-question"></i>
<p class="qti-txt" style="color: red">{tsaleService['name']}</p>
</dt>
<dd class="qti-content">
<i class="qti-answer"></i>
<p class="qti-txt">
{tsaleService['value']}
</p>
</dd>
</dl>
"""
spu_data = {
'name' : spu_name,
'sales' : spu_sales,
'comments' : spu_comments,
'desc_detail' : detail,
'desc_pack' : decs_data,
'desc_service' : tsaleService_data
}
return spu_data
def sku(self,response):
# sku
# 名称
title = response['data']['base']['title']
saleProps_list = []
products = response['data']['products'].values()
for product in products:
# 标题
longTitle = product['merchandiseSn']
# 名称
merchandiseSn = product['merchandiseSn']
# 市场价
saleMarketPrice = product['priceView']['salePrice']['saleMarketPrice']
# 售价
salePrice = product['priceView']['salePrice']['salePrice']
# 折扣
cost_price = int(saleMarketPrice) - int(salePrice)
# 库存
stock = 1000
# 销量
sales = random.randint(1, 800)
# 评价
comments = 0
# 是否上架
is_launched = True
# 默认图片
default_image = product['smallImage'].replace('http://h2.appsimg.com/a.appsimg.com/upload/merchandise/','').replace('.jpg','')
saleProps_dict = {
'name' : merchandiseSn,
'price' : salePrice,
'caption' : title,
'cost_price' : cost_price ,
'market_price' : saleMarketPrice,
'stock' : stock,
'sales' : sales,
'comments' : comments,
'is_launched':is_launched,
'default_image':default_image
}
saleProps_list.append(saleProps_dict)
return saleProps_list
def sku_image(self,response):
images_detls = response['data']['images']['groups'].values()
SKUImage_detail = []
# 商品详情图
for detil in images_detls:
image_lsit = detil['previewImages']
for image_urls in image_lsit:
image_url = image_urls['imageUrl'].replace('http://h2.appsimg.com/a.appsimg.com/upload/merchandise/','').replace('.jpg','')
SKUImage_detail.append(image_url)
break
return SKUImage_detail
def spu_pecification(self,response):
# 规格
saleProps_list = []
saleProps = response['data']['saleProps']
for saleProp in saleProps:
name = saleProp['name']
saleProps_list.append(name)
return saleProps_list
def spu_optiuons(self,response):
saleProps = response['data']['saleProps']
sption_list = []
for saleProp in saleProps:
values = saleProp['values']
for value in values:
sub_value = value['name']
sption_list.append(sub_value)
return sption_list
def main(self):
for page in range(0, 2400, 120):
url = "https://mapi.vip.com/vips-mobile/rest/shopping/pc/search/product/rank"
params = {
"callback": "getMerchandiseIds",
"app_name": "shop_pc",
"app_version": "4.0",
"warehouse": "VIP_HZ",
"fdc_area_id": "103104105",
"client": "pc",
"mobile_platform": "1",
"province_id": "103104",
"api_key": "70f71280d5d547b2a7bb370a529aeea1",
"user_id": "",
"mars_cid": "1703946163504_4eebec221de3364e0da3bbe4a2182454",
"wap_consumer": "a",
"standby_id": "nature",
"keyword": "手机",
"lv3CatIds": "",
"lv2CatIds": "",
"lv1CatIds": "",
"brandStoreSns": "",
"props": "",
"priceMin": "",
"priceMax": "",
"vipService": "",
"sort": "0",
"pageOffset": f"{page}",
"channelId": "1",
"gPlatform": "PC",
"batchSize": "120",
"_": "1715836699310"
}
response = requests.get(url, headers=self.headers, cookies=self.cookies, params=params).text
# 获得产品编码
pid_code = re.findall('{"pid":"(.*?)"}', response)
for pid in pid_code:
self.spider_data(pid)
def spider_data(self,pid):
self.data = {
"app_name": "shop_pc",
"app_version": "4.0",
"warehouse": "VIP_HZ",
"fdc_area_id": "103104105",
"client": "pc",
"mobile_platform": "1",
"province_id": "103104",
"api_key": "70f71280d5d547b2a7bb370a529aeea1",
"user_id": "",
"mars_cid": "1703946163504_4eebec221de3364e0da3bbe4a2182454",
"wap_consumer": "a",
"scene": "detail",
"productId": f"{pid}",
"opts": "priceView:13;quotaInfo:1;restrictTips:1;panelView:3;foreShowActive:1;invisible:1;floatingView:1;announcement:1;svipView:2;showSingleColor:1;svipPriceMode:1;promotionTips:6;foldTips:3;formula:2;extraDetailImages:1;shortVideo:1;countryFlagStyle:1;saleServiceList:1;storeInfo:2;brandCountry:1;freightTips:3;priceBannerView:1;bannerTagsView:1;buyMoreFormula:1;mergeGiftTips:0;kf:1;priceIcon:1;tuv:3;promotionTags:7;mergeGiftTips:3;topDetailImage:2;deliveryInfo:1;relatedProdSpu:1"
}
response = requests.post(self.url, headers=self.headers, cookies=self.cookies, data=self.data).json()
brand_data = self.brand(response)
print(brand_data)
brand_object = Brand.objects.create(
name=brand_data['brandStoreInfo'],
logo=brand_data['brandStoreLogo'],
first_letter=brand_data['first_letter']
)
good_category1 = GoodsCategory.objects.get(id=1)
good_category2 = GoodsCategory.objects.get(id=38)
good_category3 = GoodsCategory.objects.get(id=115)
spu_data = self.spu(response)
print(spu_data)
spu_object = SPU.objects.create(
name=spu_data['name'],
sales=spu_data['sales'],
comments=spu_data['comments'],
desc_detail=spu_data['desc_detail'],
desc_pack=spu_data['desc_pack'],
desc_service=spu_data['desc_service'],
brand=brand_object,
category1=good_category1,
category2=good_category2,
category3=good_category3,
)
sku_data_list = self.sku(response)
for sku_data in sku_data_list:
sku_object = SKU.objects.create(
name=sku_data['name'],
price=sku_data['price'],
caption=sku_data['caption'],
cost_price=sku_data['cost_price'],
market_price=sku_data['market_price'],
stock=sku_data['stock'],
sales=sku_data['sales'],
comments=sku_data['comments'],
is_launched=sku_data['is_launched'],
default_image=sku_data['default_image'],
category=good_category3,
spu=spu_object
)
sku_image_list = self.sku_image(response)
for sku_image_data in sku_image_list:
sku_image_object = SKUImage.objects.create(
image=sku_image_data,
sku= sku_object
)
spu_pecification_list = self.spu_pecification(response)
for spu_pecification_data in spu_pecification_list:
spu_pecification_object = SPUSpecification.objects.create(
name=spu_pecification_data,
spu=spu_object
)
spu_optiuons_list = self.spu_optiuons(response)
for spu_optiuons in spu_optiuons_list:
spu_optiuons_object = SpecificationOption.objects.create(
value=spu_optiuons,
spec=spu_pecification_object
)
SKUSpecification_object = SKUSpecification.objects.create(
option=spu_optiuons_object,
spec=spu_pecification_object,
sku=sku_object
)
spider = Spider()
spider.main()