前端页面使用的是开源layuimini框架(和这个博客所说的内容联系不大),后端采用flask框架。
效果图
主要的爬虫获取数据使用这个老哥写的博客上稍加改进的https://blog.csdn.net/gqv2009/article/details/86526194
主要流程:
1.前端输入商品url(京东淘宝小米华为等等商城)点击搜索(点击后前端页面会等待返回的数据并重新加载相关的div)
2.后端接受商品url,爬虫获取商品数据,并清洗获取前30天商品价格日期和名称,保存为json文件。
3.前端页面接受json数据并使用ajax获取json数据。
贴一下主要代码
run.py:
app = Flask(import_name=__name__,
static_url_path='/static', # 配置静态文件的访问 url 前缀
static_folder='static', # 配置静态文件的文件夹
template_folder='templates') # 配置模板文件的文件夹
@app.route("/history_price")
def history_price():
return render_template('/page/history_price.html')
@app.route("/get_goodsid",methods=['POST'])
def get_goodsid():
goods_id = request.form.get('goods_id')
goods_url = request.form.get('goods_url')
print("goods_url为:"+str(goods_url))
# 获取历史数据保存在接送文件里
json_data = trend_price.get_json(goods_url)
with open('./static/api/history_price.json','w',encoding='utf8')as fp:
json.dump(json_data,fp,ensure_ascii=False)
return json_data
@app.route("/history_data")
def history_data():
data = trend_price.json_trend()
return data
trend_price.py
from flask import Flask,jsonify
import json,random,time,datetime,re
from urllib.parse import quote_plus
import requests
import warnings
warnings.filterwarnings('ignore')
# //前端访问的数据
def json_trend():
dict = {}
price_list = []
date_list = []
with open('./static/api/history_price.json','r',encoding='utf8')as fp:
json_data = json.load(fp)
# print(json_data)
price_date= json_data["thirty_days_price_dict"]
for date in price_date:
date_list.append(date)
for price in price_date.values():
price_list.append(price)
dict["price_list"] = price_list
dict["date_list"] = date_list
dict["title"] = json_data["title"]
return dict
# 爬取数据
import warnings
warnings.filterwarnings('ignore')
def random_web_ua():
web_user_agent = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:64.0) Gecko/20100101 Firefox/64.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3493.3 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134',
]
return random.choice(web_user_agent)
def days_ago():
today = time.strftime('%Y,%m,%d') # <class 'str'>
t = time.strptime(today, '%Y,%m,%d') # # <class 'time.struct_time'>
y, m, d = t[0:3]
# print(y, m, d)
thirty_days_list = []
# print("thirty_days_list:",thirty_days_list)
for dd in range(30, 0, -1):
Date = str(datetime.datetime(y, m, d) - datetime.timedelta(dd)).split()
days_b = Date[0] # .replace('-', '') # <class 'str'>
# print(days_b)
# if days_b[1][0] == '0':
# days_b[1] = days_b[1][1]
# if days_b[2][0] == '0':
# days_b[2] = days_b[2][1]
# days_before = '-'.join(days_b)
# print('--', days_before)
thirty_days_list.append(days_b)
return thirty_days_list
def get_timestamp_str(timestamp):
# print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(timestamp)))
# print(type(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(timestamp))))
return time.strftime('%Y-%m-%d',time.localtime(timestamp))
def get_guoke_price_web(goods_url):
# 获取代理,这里需要自己加上代理池或者云代理!!!!!!!!
# 下面一行,可以注释代理不用,请求的时候(proxies=proxies)删除!!!!!!
# proxies = get_proxy2()
ua = random_web_ua()
k = quote_plus(goods_url)
btnSearch = quote_plus('搜索')
# --------------------------------
url_01 = 'http://www.tool168.cn/?'
header = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
# 'Cookie':'PHPSESSID=l31o4o91itpmeh7m38ol196t47; Hm_lvt_61e842dc51946642fa309fd4e1c752aa=1547202812; Hm_lpvt_61e842dc51946642fa309fd4e1c752aa=1547283438',
'Host': 'www.tool168.cn',
'Referer': 'http://www.tool168.cn/history/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': ua,
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36',
}
params = {
'm': 'history',
'a': 'view',
'k': k,
'btnSearch': btnSearch
}
response_html_01 = requests.get(url=url_01, headers=header, params=params, verify=False,
timeout=20)
result_html_01 = response_html_01.text
# print(result_html_01)
# print(result)
# print("result_html_01:",result_html_01)
checkCode = re.search('id="checkCodeId" value="(.*?)"', result_html_01).group(1)
# print(checkCode)
url_02 = "http://www.tool168.cn/dm/ptinfo.php"
header = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Content-Length': '108',
'Content-Type': 'application/x-www-form-urlencoded',
# 'Cookie':'PHPSESSID=l31o4o91itpmeh7m38ol196t47; Hm_lvt_61e842dc51946642fa309fd4e1c752aa=1547202812; Hm_lpvt_61e842dc51946642fa309fd4e1c752aa=1547210101',
'Host': 'www.tool168.cn',
'Origin': 'http://www.tool168.cn',
'Referer': 'http://www.tool168.cn/?m=history&a=view&k={}&btnSearch={}'.format(k, btnSearch),
# 'Referer': f'http://www.tool168.cn/?m=history&a=view&k={k}&btnSearch={btnSearch}',
'User-Agent': ua,
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
}
data = {
# 'checkCode': "ce5e75b10ad46b1927895e0de48b5134",
'checkCode': checkCode,
'con': goods_url,
# 'con': 'https://detail.tmall.com/item.htm?id=534068049215'
}
response_html_02 = requests.post(url=url_02, headers=header, data=data, verify=False, timeout=20)
result_html_02 = response_html_02.text
# print(result_html_02)
code = json.loads(result_html_02).get("code")
# print(code)
# url_03 = f"http://www.tool168.cn/dm/history.php?code={code}&t="
url_03 = "http://www.tool168.cn/dm/history.php?"
header = {
'Accept': 'text/plain, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Content-Length': '0',
'Cookie':'PHPSESSID=l31o4o91itpmeh7m38ol196t47; Hm_lvt_61e842dc51946642fa309fd4e1c752aa=1547202812; Hm_lpvt_61e842dc51946642fa309fd4e1c752aa=1547203682',
'Host': 'www.tool168.cn',
'Origin': 'http://www.tool168.cn',
'Referer': 'http://www.tool168.cn/?m=history&a=view&k={}'.format(goods_url),
'Referer': 'http://www.tool168.cn/?m=history&a=view&k=https%3A%2F%2Fdetail.tmall.com%2Fitem.htm%3Fid%3D534068049217',
'User-Agent': ua,
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
}
params = {
"code": code,
# 'code': "0f72c0c84e6f722de6fb57f9feb3691e26545bc2991ffc290ed35271bb85549977d831788ac687b919d2670d35df4641b9ccc7be6e917dfc",
't': ''
}
response_html_03 = requests.post(url=url_03, headers=header, params=params, verify=False,
timeout=20)
# result_response = response_html_03.text
# print(result_response)
response_html_03.encoding = "utf-8"
result_response = response_html_03.text.strip()
# print('result_response = ', result_response)
try:
if "对不起,没有找到。" in result_response:
result = "对不起,该商品未收录或加载异常!"
# return result
return None
else:
return result_response
except Exception as e:
# print(e)
return None
def parse(result_history_price):
print(result_history_price[0])
thirty_date = days_ago()[0]
# 历史价格列表
history_price_list = []
for res in result_history_price:
dates = re.search(r'.*?(\(.*?\))', res).group(1).replace("(","").replace(")","")
price = re.search(r'\),(.*?)]', res).group(1)
dates_prices = dates.split(",")
year = dates_prices[0]
month = dates_prices[1]
month = int(month) + 1
if len(str(month)) == 1:
month = '0' + str(month)
day = dates_prices[2]
if len(day) == 1:
day = '0' + day
shop_history_time = f"{year}-{month}-{day}"
end_price = price
history_price_list.append([shop_history_time, end_price])
# print('result_list_true = ', history_price_list)
# history_price[shop_history_time] = end_price
# result_response = json.dumps(history_price)
# 8.判断取出30天商品历史价格
thirty_days_price = [] # 删选出最近30天价格列表
for i in history_price_list:
if int(i[0].replace('-', '')) >= int(thirty_date.replace('-', '')):
thirty_days_price.append(i)
# print('thirty_days_price = ', thirty_days_price)
if thirty_days_price == []: # 没有最近一个月日期, 说明价格和几个月前价格一致
thirty_days_price = [[thirty_date, history_price_list[-1][1]]]
try: # 查询第一天日期
if history_price_list != [] and thirty_days_price != []:
if len(history_price_list) > len(thirty_days_price):
if int(thirty_date.replace('-', '')) not in [int(i[0].replace('-', '')) for i in thirty_days_price]:
h_days = [int(i[0].replace('-', '')) for i in history_price_list]
for i in range(0, len(h_days)):
if h_days[i] < int(thirty_date.replace('-', '')) < h_days[i + 1]:
p_index = i
break
thirty_days_price.insert(0, [thirty_date, history_price_list[p_index][1]])
except: # 否则,说明慢慢买也是在这个日期第一次收录进来的
pass
# print('thirty_days_price = ', thirty_days_price)
thirty_days_price_dict = {} # 接口最终返回
for price in thirty_days_price:
# thirty_days_price_dict[price[0]] = int(float(price[1]) * 1)
thirty_days_price_dict[price[0]] = float(price[1])
# print('thirty_days_price_dict = ', thirty_days_price_dict)
return thirty_days_price_dict
def get_json(goods_url):
try:
result = get_guoke_price_web(goods_url)
# print(result)
except:
print(goods_url,'--response_erro')
return None
try:
title = re.findall(">(.*?)<", result)[0]
result_history_price = re.search("\[.*\]", result, re.S).group()
result_history_price = result_history_price.replace("Date.UTC", "").replace("],[", "],[").split(",")
thirty_days_price_dict = parse(result_history_price)
price_dict = {}
price_dict['thirty_days_price_dict'] = thirty_days_price_dict
price_dict['title'] = title
# print(goods_url, '--', thirty_days_price_dict)
except:
print(goods_url, '--parse_erro')
return None
return price_dict
history_price.html (里面用到了layui前端技术)
<script src="static/js/lay-module/echarts/echarts.js"></script>
<script src="../static/js/jquery-1.11.1.min.js"></script>
<!-- 注意这里的引用路径,要换成自己的 -->
<div class="layuimini-container layuimini-page-anim">
<div class="layuimini-main">
<fieldset class="table-search-fieldset">
<legend>搜索信息</legend>
<div style="margin: 10px 10px 10px 10px">
<form class="layui-form layui-form-pane" action="get_goodsid" id="currentformid">
<div class="layui-form-item">
<div class="layui-inline">
<label class="layui-form-label">商品url</label>
<div class="layui-input-inline" style="width: 500px;">
<input type="text" name="goods_url" autocomplete="off" class="layui-input">
</div>
</div>
<div class="layui-inline">
<button type="submit" class="layui-btn layui-btn-primary" lay-submit lay-filter="data-search-btn"><i class="layui-icon"></i> 搜 索</button>
<button type="reset" class="layui-btn layui-btn-primary">重置</button>
</div>
</div>
</form>
</div>
</fieldset>
<div class="layui-card">
<div class="layui-card-header"><i class="fa fa-line-chart icon"></i>历史价格</div>
<div class="layui-card-body">
<div id="echarts_history" style="width: 100%;min-height:500px"></div>
</div>
</div>
</div>
</div>
<script>
layui.use(['form', 'table','miniPage','element'], function () {
var $ = layui.jquery,
form = layui.form,
table = layui.table,
miniPage = layui.miniPage;
// 监听搜索操作
form.on('submit(data-search-btn)', function (data) {
$.ajax({
url:'/get_goodsid',
method:'post',
data:data.field,
dataType:'JSON',
success:function (res) {
refalsh2()
if(res.code='0'){
// parent.closeIframe(res.msg);
}else{
alert(res.msg);
}
},
error:function (data) {
}
});
return false; //阻止跳转
});
});
</script>
<script>
function refalsh2() {
// 1实例化对象
var myChart1 = echarts.init(document.getElementById("echarts_history"));
// 2. 指定配置项和数据
myChart1.setOption({
title: {
text: '该商品的历史价格',
// subtext: '历史价格'
},
tooltip: {
trigger: 'axis'
},
legend: {
data: ['价格'],
y:'center'
},
toolbox: {
show: true,
feature: {
dataZoom: {
yAxisIndex: 'none'
},
dataView: {readOnly: false},
magicType: {type: ['line', 'bar']},
restore: {},
saveAsImage: {}
}
},
xAxis: {
type: 'category',
boundaryGap: false,
// data: ['周一', '周二', '周三', '周四', '周五', '周六', '周日']
},
yAxis: {
type: 'value',
scale:true,
axisLabel: {
formatter: '¥{value}'
}
},
series: [
{
// name: '价格',
type: 'line',
step:'end',
// data: [11, 11, 15, 13, 12, 13, 10],
symbolSize: 3,
markPoint: {
data: [
{type: 'max', name: '最大值'},
{type: 'min', name: '最小值'}
]
},
markLine: {
data: [
{type: 'average', name: '平均值'}
]
}
}
]
});
// // 3. 把配置项给实例对象
// myChart1.setOption(option);
// 异步加载数据
$.get('history_data').done(function (data) {
myChart1.hideLoading(); // 隐藏加载动画
// 填入数据
myChart1.setOption({
title:{
text:data.title
},
xAxis:{
data:data['date_list']
},
series: [{
data: data['price_list'],
}]
});
// 4. 让图表跟随屏幕自动的去适应
window.addEventListener("resize", function () {
myChart1.resize();
});
})}
</script>