Python爬虫系列之多多买菜小程序数据爬取

直接上代码

# -*- coding:utf-8 -*-
import requests
import json
import time
from general import getAntiContent
import random
import configparser
import MySQLdb
import os

accesstoken = ""
headers = {
	"content-type": "application/json;charset=UTF-8",
	"accesstoken": accesstoken,
	"referer": "https://servicewechat.com/wxd9813e0a0d4d4156/49/page-frame.html",
	"user-agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/7.0.17(0x17001124) NetType/WIFI Language/zh_CN",
	"code-version": "0.0.43",
	"verifyauthtoken": "",
	"p-appname": "mobile-xcx-vegetable",
}
retry = 3
timeout = 20
provinceMap = {}
cf = configparser.ConfigParser()
try:
	cf.read(os.getcwd() + "/conf.ini", encoding="utf-8-sig")
except Exception as e:
	print("程序目录下不存在conf.ini配置文件~")
	exit(0)

keywords = ""
try:
	keywords = getConf("app-sys", "keywords").split(",")
except Exception as e:
	print("keywords参数错误!")
	exit(0)
# 启动时间点
startTime = getConf("app-sys", "start")
startTimes = []
try:
	startTimes = startTime.split(",")
	if startTimes is not None and len(startTimes) == 1 and startTimes[0] == "":
		startTimes = []
except Exception as e:
	pass
# 数据库账号
mysql_user = getConf("Mysql-Database", "user")
# 数据库密码
mysql_password = getConf("Mysql-Database", "password")
# 数据库名称
mysql_database = getConf("Mysql-Database", "database")
# 主机地址
mysql_host = getConf("Mysql-Database", "host")
# 端口
mysql_port = getConf("Mysql-Database", "port")

def querySQL(sql):
	try:
		conn = MySQLdb.connect(user=mysql_user, password=mysql_password, host=mysql_host, database=mysql_database, charset='utf8')
		cursor = conn.cursor()
		cursor.execute(sql)
		return cursor.fetchall()
	except Exception as e:
		return False

def getCurrDate():
	return str(time.strftime('%Y{y}%m{m}%d{d}').format(y='年', m='月', d='日'))

def tsToDate(ts):
	if ts:
		timeArray = time.localtime(int(ts))
		return str(time.strftime("%Y-%m-%d %H:%M:%S", timeArray))
	return ""

def getCurrentTime():
	return str(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))

def getCityMaps():
	cityMaps = {}
	if keywords and isinstance(keywords, list) and len(keywords) > 0:
		for keyword in keywords:
			try:
				arr = keyword.split("-")
				cityMaps[arr[0]] = {"city": arr[1], "scity": arr[2], "key": arr[3], }
			except Exception as e:
				pass
	return cityMaps

def iniProvinceMap():
	global provinceMap
	url = "https://api.pinduoduo.com/api/mc/v1/user/regions"
	data = {
		"open_app_source": 1089,
		"anti_content": getAntiContent(),
		"region_id": 1,
		"xcx_version": "0.0.64"
	}
	res = postHtml(url, json.dumps(data))
	try:
		regions = res['regions']
		for region in regions:
			try:
				provinceMap[region['region_name']] = region
			except Exception as e:
				pass
		return True
	except Exception as e:
		pass
	return False

def searchCity(region_id, cityName):
	url = "https://api.pinduoduo.com/api/mc/v1/user/regions"
	data = {
		"open_app_source": 1089,
		"anti_content": getAntiContent(),
		"region_id": int(region_id),
		"xcx_version": "0.0.64"
	}
	res = postHtml(url, json.dumps(data))
	try:
		regions = res['regions']
		for region in regions:
			try:
				if cityName in region['region_name']:
					return region
			except Exception as e:
				pass
	except Exception as e:
		pass

def searchPoi(provinceId, cityId, districtId, key):
	url = "https://api.pinduoduo.com/api/mc/v1/search_poi"
	data = {
		"open_app_source": 1089,
		"anti_content": getAntiContent(),
		"provinceId": int(provinceId),
		"query": str(key),
		"cityId": int(cityId),
		"districtId": int(districtId),
		"xcx_version": "0.0.64"
	}
	res = postHtml(url, json.dumps(data))
	try:
		poi_list = res['poi_list']
		return poi_list
	except Exception as e:
		pass

def getStore(provinceId, cityId, key):
	url = "https://api.pinduoduo.com/api/mc/v1/user/regions"
	data = {
		"open_app_source": 1089,
		"anti_content": getAntiContent(),
		"region_id": int(cityId),
		"xcx_version": "0.0.64"
	}
	res = postHtml(url, json.dumps(data))
	try:
		regions = res['regions']
		for region in regions:
			try:
				districtId = region['region_id']
				poiList = searchPoi(provinceId, cityId, districtId, key)
				if poiList and isinstance(poiList, list) and len(poiList) > 0:
					for poi in poiList:
						try:
							poiId = poi['poi_id']
							store = searchStore(poiId)
							if store:
								return store
						except Exception as e:
							pass
			except Exception as e:
				pass
	except Exception as e:
		pass

def getGoodsDetail(store_id, goods_id, city):
	url = "https://api.pinduoduo.com/api/mc/v0/goods_detail"
	data = {
		"open_app_source": 1089,
		"anti_content": getAntiContent(),
		"store_id": str(store_id),
		"goods_id": str(goods_id),
		"xcx_version": "0.0.64"
	}
	res = postHtml(url, json.dumps(data))
	try:
		datas = {}
		try:
			datas['goods_id'] = int(appflag + str(res['goods_id']))
		except Exception as e:
			return
		try:
			datas['area'] = city
		except Exception as e:
			datas['area'] = ""
		try:
			goods_name = str(res['goods_name'])
			if "【" not in goods_name and "】" not in goods_name:
				pname = goods_name.split(" ")
				if len(pname) > 1:
					goods_name = goods_name.replace(pname[0], "【" + pname[0] + "】")
			datas['goods_name'] = goods_name
		except Exception as e:
			datas['goods_name'] = ""
		try:
			datas['sc_price'] = float("%.2f" % (float(res['market_price']) / 100))
		except Exception as e:
			datas['sc_price'] = 0.00
		try:
			datas['ysj_price'] = float("%.2f" % (float(res['price']) / 100))
		except Exception as e:
			datas['ysj_price'] = 0.00
		try:
			datas['xg_num'] = res['regular_limit']
		except Exception as e:
			datas['xg_num'] = 0
		try:
			datas['xs_nums'] = sellNum
		except Exception as e:
			datas['xs_nums'] = 0
		try:
			datas['start_time'] = int(res['pre_sale_time'])
		except Exception as e:
			datas['start_time'] = 0
		try:
			datas['end_time'] = int(res['end_sale_time'])
		except Exception as e:
			datas['end_time'] = 0
		try:
			datas['qy_address'] = city + "多多买菜"
		except Exception as e:
			datas['qy_address'] = ""
		try:
			datas['imageb_url'] = detailPre + str(datas['goods_id'])
		except Exception as e:
			datas['imageb_url'] = ""
		try:
			sy_image = res['image_url']
			if "?" in sy_image:
				sy_image = sy_image[:sy_image.find("?")]
			datas['sy_image'] = sy_image
		except Exception as e:
			datas['sy_image'] = ""
		return datas
	except Exception as e:
		pass

def checkGoodsExists(pid):
	try:
		conn = MySQLdb.connect(user=mysql_user, password=mysql_password, database=mysql_database, charset='utf8',
							   host=mysql_host)
		cursor = conn.cursor()
		cursor.execute(
			"select * from goods_list where goods_id = %d" % (int(pid))
		)
		return len(cursor.fetchall()) > 0
	except Exception as e:
		return False

def add(data):
	print("insert ----------------------------------------------------")
	print(data)
	try:
		conn = MySQLdb.connect(user=mysql_user, host=mysql_host, password=mysql_password, database=mysql_database,
							   charset='utf8')
		cursor = conn.cursor()
		sql = ""
		cursor.execute(sql)
		conn.commit()
	except Exception as e:
		pass

def update(data):
	print("update ----------------------------------------------------")
	print(data)
	try:
		conn = MySQLdb.connect(user=mysql_user, host=mysql_host, password=mysql_password, database=mysql_database,
							   charset='utf8')
		cursor = conn.cursor()
		sql = ""
		cursor.execute(sql)
		conn.commit()
	except Exception as e:
		pass

def parser(storeId, city):
	page = 0
	url = "https://api.pinduoduo.com/api/mc/v0/goods_list"
	while True:
		try:
			data = {
				"open_app_source": 1089,
				"anti_content": getAntiContent(),
				"store_id": int(storeId),
				"list_id": "0d95f10a-620f-4d29-a087-894ff90239a4",
				"offset": page * 10,
				"count": 10,
				"xcx_version": "0.0.64"
			}
			res = postHtml(url, json.dumps(data))
			has_more = res['has_more']
			goods_list = res['goods_list']
			for goods in goods_list:
				try:
					goodsId = goods['goods_id']
					datas = getGoodsDetail(storeId, goodsId, city)
					existsStatus = checkGoodsExists(datas['goods_id'])
					if existsStatus:
						update(datas)
					else:
						add(datas
				except Exception as e:
					pass
			if has_more:
				page += 1
				time.sleep(getSleepTime())
			else:
				break
		except Exception as e:
			break

def main():
	global provinceMap
	cityMaps = getCityMaps()
	if cityMaps:
		for cityMap in cityMaps:
			try:
				province = provinceMap[cityMap]
				provinceId = province['region_id']
				bcity = cityMaps[cityMap]
				cityName = bcity['city']
				key = bcity['key']
				scity = bcity['scity']
				acity = searchCity(provinceId, cityName)
				cityId = acity['region_id']
				store = getStore(provinceId, cityId, key)
				if store:
					storeId = store['store_id']
					parser(storeId, scity)
				else:
					print("关键词组:%s 未搜索到任何店铺!" % (cityMap + " - " + cityName + " - " + key))
			except Exception as e:
				pass
	else:
		print("获取城市列表失败!")
	else:
		print("登录过期!")

if __name__ == '__main__':
	main()

关于Python技术储备

学好 Python 不论是就业还是做副业赚钱都不错,但要学会 Python 还是要有一个学习规划。最后大家分享一份全套的 Python 学习资料,给那些想学习 Python 的小伙伴们一点帮助!

一、Python所有方向的学习路线

Python所有方向的技术点做的整理,形成各个领域的知识点汇总,它的用处就在于,你可以按照上面的知识点去找对应的学习资源,保证自己学得较为全面。在这里插入图片描述

二、Python必备开发工具

在这里插入图片描述

三、精品Python学习书籍

当我学到一定基础,有自己的理解能力的时候,会去阅读一些前辈整理的书籍或者手写的笔记资料,这些笔记详细记载了他们对一些技术点的理解,这些理解是比较独到,可以学到不一样的思路。
在这里插入图片描述

四、Python视频合集

观看零基础学习视频,看视频学习是最快捷也是最有效果的方式,跟着视频中老师的思路,从基础到深入,还是很容易入门的。
在这里插入图片描述
在这里插入图片描述

五、实战案例

光学理论是没用的,要学会跟着一起敲,要动手实操,才能将自己的所学运用到实际当中去,这时候可以搞点实战案例来学习。
在这里插入图片描述

六、Python练习题

检查学习结果。
在这里插入图片描述

七、面试资料

我们学习Python必然是为了找到高薪的工作,下面这些面试题是来自阿里、腾讯、字节等一线互联网大厂最新的面试资料,并且有阿里大佬给出了权威的解答,刷完这一套面试资料相信大家都能找到满意的工作。
在这里插入图片描述
在这里插入图片描述

这份完整版的Python全套学习资料已经上传CSDN,朋友们如果需要可以微信扫描下方CSDN官方认证二维码免费领取【保证100%免费

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值