获取电商商品历史价格,echarts呈现,附上代码

前端页面使用的是开源layuimini框架(和这个博客所说的内容联系不大),后端采用flask框架。
效果图
静态图片
动态图
主要的爬虫获取数据使用这个老哥写的博客上稍加改进的https://blog.csdn.net/gqv2009/article/details/86526194

主要流程:
1.前端输入商品url(京东淘宝小米华为等等商城)点击搜索(点击后前端页面会等待返回的数据并重新加载相关的div)
2.后端接受商品url,爬虫获取商品数据,并清洗获取前30天商品价格日期和名称,保存为json文件。
3.前端页面接受json数据并使用ajax获取json数据。

贴一下主要代码

run.py:

app = Flask(import_name=__name__,
            static_url_path='/static', # 配置静态文件的访问 url 前缀
            static_folder='static',    # 配置静态文件的文件夹
            template_folder='templates') # 配置模板文件的文件夹

@app.route("/history_price")
def history_price():
    return render_template('/page/history_price.html')

@app.route("/get_goodsid",methods=['POST'])
def get_goodsid():
    goods_id = request.form.get('goods_id')
    goods_url = request.form.get('goods_url')
    print("goods_url为:"+str(goods_url))
    # 获取历史数据保存在接送文件里
    json_data = trend_price.get_json(goods_url)     
    with open('./static/api/history_price.json','w',encoding='utf8')as fp:
        json.dump(json_data,fp,ensure_ascii=False)
    return json_data

@app.route("/history_data")
def history_data():
    data = trend_price.json_trend()
    return data

trend_price.py

from flask import Flask,jsonify
import json,random,time,datetime,re
from urllib.parse import quote_plus
import requests
import warnings
warnings.filterwarnings('ignore')

# //前端访问的数据
def json_trend():
    dict = {}
    price_list = []
    date_list = []
    with open('./static/api/history_price.json','r',encoding='utf8')as fp:
        json_data = json.load(fp)
    # print(json_data)
    price_date= json_data["thirty_days_price_dict"]
    for date in price_date:
        date_list.append(date)
    for price in price_date.values():
        price_list.append(price)
    dict["price_list"] = price_list
    dict["date_list"] = date_list
    dict["title"] = json_data["title"]
    return dict


# 爬取数据
import warnings
warnings.filterwarnings('ignore')
def random_web_ua():
    web_user_agent = [
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:64.0) Gecko/20100101 Firefox/64.0',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3493.3 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134',
    ]

    return random.choice(web_user_agent)

def days_ago():
    today = time.strftime('%Y,%m,%d')   # <class 'str'>
    t = time.strptime(today, '%Y,%m,%d')    # # <class 'time.struct_time'>
    y, m, d = t[0:3]
    # print(y, m, d)
    thirty_days_list = []
    # print("thirty_days_list:",thirty_days_list)
    for dd in range(30, 0, -1):
        Date = str(datetime.datetime(y, m, d) - datetime.timedelta(dd)).split()
        days_b = Date[0]  # .replace('-', '') #  <class 'str'>
        # print(days_b)
        # if days_b[1][0] == '0':
        #     days_b[1] = days_b[1][1]
        # if days_b[2][0] == '0':
        #     days_b[2] = days_b[2][1]
        # days_before = '-'.join(days_b)
        # print('--', days_before)
        thirty_days_list.append(days_b)
    return thirty_days_list

def get_timestamp_str(timestamp):
    # print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(timestamp)))
    # print(type(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(timestamp))))
    return time.strftime('%Y-%m-%d',time.localtime(timestamp))


def get_guoke_price_web(goods_url):

    # 获取代理,这里需要自己加上代理池或者云代理!!!!!!!!

    # 下面一行,可以注释代理不用,请求的时候(proxies=proxies)删除!!!!!!
    # proxies = get_proxy2()
    ua = random_web_ua()
    k = quote_plus(goods_url)
    btnSearch = quote_plus('搜索')

    # --------------------------------
    url_01 = 'http://www.tool168.cn/?'
    header = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        # 'Cookie':'PHPSESSID=l31o4o91itpmeh7m38ol196t47; Hm_lvt_61e842dc51946642fa309fd4e1c752aa=1547202812; Hm_lpvt_61e842dc51946642fa309fd4e1c752aa=1547283438',
        'Host': 'www.tool168.cn',
        'Referer': 'http://www.tool168.cn/history/',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': ua,
        # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36',
    }

    params = {
        'm': 'history',
        'a': 'view',
        'k': k,
        'btnSearch': btnSearch
    }

    response_html_01 = requests.get(url=url_01, headers=header, params=params, verify=False,
                                    timeout=20)
    result_html_01 = response_html_01.text
    # print(result_html_01)
    # print(result)
    # print("result_html_01:",result_html_01)
    checkCode = re.search('id="checkCodeId" value="(.*?)"', result_html_01).group(1)

    # print(checkCode)

    url_02 = "http://www.tool168.cn/dm/ptinfo.php"
    header = {
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        'Content-Length': '108',
        'Content-Type': 'application/x-www-form-urlencoded',
        # 'Cookie':'PHPSESSID=l31o4o91itpmeh7m38ol196t47; Hm_lvt_61e842dc51946642fa309fd4e1c752aa=1547202812; Hm_lpvt_61e842dc51946642fa309fd4e1c752aa=1547210101',
        'Host': 'www.tool168.cn',
        'Origin': 'http://www.tool168.cn',
        'Referer': 'http://www.tool168.cn/?m=history&a=view&k={}&btnSearch={}'.format(k, btnSearch),
        # 'Referer': f'http://www.tool168.cn/?m=history&a=view&k={k}&btnSearch={btnSearch}',
        'User-Agent': ua,
        # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36',
        'X-Requested-With': 'XMLHttpRequest',
    }

    data = {
        # 'checkCode': "ce5e75b10ad46b1927895e0de48b5134",
        'checkCode': checkCode,
        'con': goods_url,
        # 'con': 'https://detail.tmall.com/item.htm?id=534068049215'
    }

    response_html_02 = requests.post(url=url_02, headers=header, data=data,  verify=False, timeout=20)
    result_html_02 = response_html_02.text
    # print(result_html_02)
    code = json.loads(result_html_02).get("code")
    # print(code)

    # url_03 = f"http://www.tool168.cn/dm/history.php?code={code}&t="
    url_03 = "http://www.tool168.cn/dm/history.php?"
    header = {
        'Accept': 'text/plain, */*; q=0.01',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        'Content-Length': '0',
        'Cookie':'PHPSESSID=l31o4o91itpmeh7m38ol196t47; Hm_lvt_61e842dc51946642fa309fd4e1c752aa=1547202812; Hm_lpvt_61e842dc51946642fa309fd4e1c752aa=1547203682',
        'Host': 'www.tool168.cn',
        'Origin': 'http://www.tool168.cn',
        'Referer': 'http://www.tool168.cn/?m=history&a=view&k={}'.format(goods_url),
        'Referer': 'http://www.tool168.cn/?m=history&a=view&k=https%3A%2F%2Fdetail.tmall.com%2Fitem.htm%3Fid%3D534068049217',
        'User-Agent': ua,
        # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36',
        'X-Requested-With': 'XMLHttpRequest',
    }

    params = {
        "code": code,
        # 'code': "0f72c0c84e6f722de6fb57f9feb3691e26545bc2991ffc290ed35271bb85549977d831788ac687b919d2670d35df4641b9ccc7be6e917dfc",
        't': ''
    }
    response_html_03 = requests.post(url=url_03, headers=header, params=params,  verify=False,
                                     timeout=20)
    # result_response = response_html_03.text
    # print(result_response)
    response_html_03.encoding = "utf-8"
    result_response = response_html_03.text.strip()
    # print('result_response = ', result_response)
    try:
        if "对不起,没有找到。" in result_response:
            result = "对不起,该商品未收录或加载异常!"
            # return result
            return None
        else:
            return result_response
    except Exception as e:
        # print(e)
        return None
def parse(result_history_price):
    print(result_history_price[0])
    thirty_date = days_ago()[0]
    # 历史价格列表
    history_price_list = []
    for res in result_history_price:
        dates = re.search(r'.*?(\(.*?\))', res).group(1).replace("(","").replace(")","")
        price = re.search(r'\),(.*?)]', res).group(1)
        dates_prices = dates.split(",")
        year = dates_prices[0]
        month = dates_prices[1]
        month = int(month) + 1
        if len(str(month)) == 1:
            month = '0' + str(month)
        day = dates_prices[2]
        if len(day) == 1:
            day = '0' + day
        shop_history_time = f"{year}-{month}-{day}"
        end_price = price
        history_price_list.append([shop_history_time, end_price])

    # print('result_list_true = ', history_price_list)
    #     history_price[shop_history_time] = end_price
    # result_response = json.dumps(history_price)

# 8.判断取出30天商品历史价格
    thirty_days_price = []  # 删选出最近30天价格列表
    for i in history_price_list:
        if int(i[0].replace('-', '')) >= int(thirty_date.replace('-', '')):
            thirty_days_price.append(i)
    # print('thirty_days_price = ', thirty_days_price)
    if thirty_days_price == []:  # 没有最近一个月日期, 说明价格和几个月前价格一致
        thirty_days_price = [[thirty_date, history_price_list[-1][1]]]
    try:  # 查询第一天日期
        if history_price_list != [] and thirty_days_price != []:
            if len(history_price_list) > len(thirty_days_price):
                if int(thirty_date.replace('-', '')) not in [int(i[0].replace('-', '')) for i in thirty_days_price]:
                    h_days = [int(i[0].replace('-', '')) for i in history_price_list]
                    for i in range(0, len(h_days)):
                        if h_days[i] < int(thirty_date.replace('-', '')) < h_days[i + 1]:
                            p_index = i
                            break
                    thirty_days_price.insert(0, [thirty_date, history_price_list[p_index][1]])
    except:  # 否则,说明慢慢买也是在这个日期第一次收录进来的
        pass
    # print('thirty_days_price = ', thirty_days_price)
    thirty_days_price_dict = {}  # 接口最终返回
    for price in thirty_days_price:
        # thirty_days_price_dict[price[0]] = int(float(price[1]) * 1)
        thirty_days_price_dict[price[0]] = float(price[1])
    # print('thirty_days_price_dict = ', thirty_days_price_dict)
    return thirty_days_price_dict


def get_json(goods_url):
    try:
        result = get_guoke_price_web(goods_url)
        # print(result)
    except:
        print(goods_url,'--response_erro')
        return None

    try:
        title = re.findall(">(.*?)<", result)[0]
        result_history_price = re.search("\[.*\]", result, re.S).group()
        result_history_price = result_history_price.replace("Date.UTC", "").replace("],[", "],[").split(",")
        thirty_days_price_dict = parse(result_history_price)
        price_dict = {}
        price_dict['thirty_days_price_dict'] = thirty_days_price_dict
        price_dict['title'] = title
        # print(goods_url, '--', thirty_days_price_dict)
    except:
        print(goods_url, '--parse_erro')
        return None

    return price_dict

history_price.html (里面用到了layui前端技术)

<script src="static/js/lay-module/echarts/echarts.js"></script>
<script src="../static/js/jquery-1.11.1.min.js"></script>
<!-- 注意这里的引用路径,要换成自己的 -->
<div class="layuimini-container layuimini-page-anim">
    <div class="layuimini-main">
        <fieldset class="table-search-fieldset">
            <legend>搜索信息</legend>
            <div style="margin: 10px 10px 10px 10px">
                <form class="layui-form layui-form-pane" action="get_goodsid" id="currentformid">
                    <div class="layui-form-item">
                        <div class="layui-inline">
                            <label class="layui-form-label">商品url</label>
                            <div class="layui-input-inline" style="width: 500px;">
                                <input type="text" name="goods_url" autocomplete="off" class="layui-input">
                            </div>
                        </div>
                        <div class="layui-inline">
                            <button type="submit" class="layui-btn layui-btn-primary"  lay-submit lay-filter="data-search-btn"><i class="layui-icon"></i> 搜 索</button>
                            <button type="reset" class="layui-btn layui-btn-primary">重置</button>
                        </div>
                    </div>
                </form>
            </div>
        </fieldset>
            <div class="layui-card">
                <div class="layui-card-header"><i class="fa fa-line-chart icon"></i>历史价格</div>
                <div class="layui-card-body">
                    <div id="echarts_history" style="width: 100%;min-height:500px"></div>
                </div>
            </div>

    </div>
</div>

<script>
    layui.use(['form', 'table','miniPage','element'], function () {
        var $ = layui.jquery,
            form = layui.form,
            table = layui.table,
            miniPage = layui.miniPage;
        // 监听搜索操作
        form.on('submit(data-search-btn)', function (data) {
            $.ajax({
                url:'/get_goodsid',
                method:'post',
                data:data.field,
                dataType:'JSON',
                success:function (res) {
                    refalsh2()
                    if(res.code='0'){
                        // parent.closeIframe(res.msg);
                    }else{
                        alert(res.msg);
                    }
                },
                error:function (data) {
                }
            });
            return false;  //阻止跳转
        });

    });
</script>
<script>
function refalsh2() {
    // 1实例化对象
    var myChart1 = echarts.init(document.getElementById("echarts_history"));
    // 2. 指定配置项和数据
    myChart1.setOption({
        title: {
        text: '该商品的历史价格',
        // subtext: '历史价格'
        },
        tooltip: {
            trigger: 'axis'
        },
        legend: {
            data: ['价格'],
            y:'center'
        },
        toolbox: {
            show: true,
            feature: {
                dataZoom: {
                    yAxisIndex: 'none'
                },
                dataView: {readOnly: false},
                magicType: {type: ['line', 'bar']},
                restore: {},
                saveAsImage: {}
            }
        },
        xAxis: {
            type: 'category',
            boundaryGap: false,
            // data: ['周一', '周二', '周三', '周四', '周五', '周六', '周日']
        },
        yAxis: {
            type: 'value',
            scale:true,
            axisLabel: {
                formatter: '¥{value}'
            }
        },
        series: [
            {
                // name: '价格',
                type: 'line',
                step:'end',
                // data: [11, 11, 15, 13, 12, 13, 10],
                symbolSize: 3,
                markPoint: {
                    data: [
                        {type: 'max', name: '最大值'},
                        {type: 'min', name: '最小值'}
                    ]
                },
                markLine: {
                    data: [
                        {type: 'average', name: '平均值'}
                    ]
                }
            }
        ]
    });
    // // 3. 把配置项给实例对象
    // myChart1.setOption(option);
    // 异步加载数据
    $.get('history_data').done(function (data) {
        myChart1.hideLoading(); // 隐藏加载动画

        // 填入数据
        myChart1.setOption({
            title:{
                text:data.title
            },
            xAxis:{
                data:data['date_list']
            },
            series: [{
                data: data['price_list'],
            }]
        });
        // 4. 让图表跟随屏幕自动的去适应
        window.addEventListener("resize", function () {
            myChart1.resize();
        });
    })}
</script>
哈夫曼树是一种常用的数据压缩算法,它通过构建二叉树来实现对数据的压缩。具体来说,哈夫曼树的构建过程是:首先将每个字符出现的频率作为权值,构建出一棵森林;然后从森林中选择两个权值最小的节点作为左右子节点,构建一棵新的二叉树,并将新的二叉树的根节点的权值设为左右子节点权值之和;不断重复这个过程,直到最后只剩下一棵二叉树,即为哈夫曼树。在对数据进行压缩时,对于每个字符,使用哈夫曼树中该字符对应的路径编码来进行压缩,即可实现高效的数据压缩。 以下是一个简单的哈夫曼树构建的代码实现(C++): ``` #include <iostream> #include <queue> #include <vector> using namespace std; struct Node { char ch; // 字符 int freq; // 频率 Node* left; // 左子节点 Node* right; // 右子节点 Node(char c, int f): ch(c), freq(f), left(nullptr), right(nullptr) {} }; struct cmp { bool operator()(Node* a, Node* b) { return a->freq > b->freq; } }; Node* buildHuffmanTree(vector<int>& freq, vector<char>& ch) { priority_queue<Node*, vector<Node*>, cmp> pq; for (int i = 0; i < freq.size(); i++) { pq.push(new Node(ch[i], freq[i])); } while (pq.size() > 1) { Node* left = pq.top(); pq.pop(); Node* right = pq.top(); pq.pop(); Node* parent = new Node('\0', left->freq + right->freq); parent->left = left; parent->right = right; pq.push(parent); } return pq.top(); } void printHuffmanCode(Node* root, string code) { if (root == nullptr) return; if (root->ch != '\0') { cout << root->ch << ": " << code << endl; } printHuffmanCode(root->left, code + "0"); printHuffmanCode(root->right, code + "1"); } int main() { vector<char> ch = {'a', 'b', 'c', 'd', 'e', 'f'}; vector<int> freq = {5, 9, 12, 13, 16, 45}; Node* root = buildHuffmanTree(freq, ch); printHuffmanCode(root, ""); return 0; } ``` 以上代码实现了对字符集 {a,b,c,d,e,f} 中各个字符出现频率的统计,并通过优先队列实现了对节点按照权值(即频率)进行排序。最后调用 `buildHuffmanTree` 函数构建哈夫曼树,并通过 `printHuffmanCode` 函数输出每个字符对应的编码。
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值