【python】matplotlib绘图

import numpy as np

def main():
    pass

def draw(data, label_list, whole, picture_name):
    from matplotlib import pyplot as plt
    plt.rcParams['font.sans-serif'] = ['SimHei']
    # 画布大小
    fig = plt.figure(figsize=(40, 8), dpi=200)
    # 横坐标
    scale_ls = range(56)
    with open("class.56", "r", encoding="utf8") as fin:
        c = fin.readlines()
        c = [ci.split("::###::")[2] for ci in c]
    plt.xticks(scale_ls, c, fontproperties='SimHei')
    # 样式
    colors = np.asarray(['green', 'red', 'blue', "orange", "pink", "gray", "brown", "aqua", "blueviolet"])
    sizes = np.asarray([10 for i in range(10)])

    ax1 = fig.add_subplot(111)
    for i in range(len(data)):
        ax1.scatter([i for i in range(56)], data[i], c=colors[i], s=sizes[i], label=label_list[i])
    ax1.set_ylabel('点击比例')
    ax1.legend(loc=2)

    ax2 = ax1.twinx()
    ax2.plot([i for i in range(56)], whole, "-", color="black", label="人数")
    ax2.set_ylabel('访问人数')
    ax2.legend(loc=0)

    plt.savefig(picture_name)
    plt.show()

def find_age_period(age, seg_list):
    for i, a in enumerate(seg_list):
        if age < a:
            return i
    return "error"

def get_date(sample_path_list, feature_name):
    feature = ["gender", "grade", "city_level", "age", "all"]
    type_class = [3, 8, 8, 9, 1]
    label = [["未知", "男", "女"], ["unknow", "doctor", "硕士", "本科", "高中", "初中", "小学", "大专"],
             ["北上广深", "二线城市", "三线城市", "四线城市", "五线城市", "港澳台",  "国外", "未知"],
             ["0-12", "13-19", "20-25", "26-30", "31-40", "40-50", "51-60", "61-70", "70+"],
             ["click-ratio"]]
    age_seg_list = [12, 19, 25, 30, 40, 50, 60, 70, 100000]
    feature_dict = dict(zip(feature, type_class))
    label_dict = dict(zip(feature, label))
    if feature_name not in feature_dict.keys():
        print("wrong feature")
        return None
    class_num = feature_dict[feature_name]
    label_list = label_dict[feature_name]
    data = [[[0, 0] for i in range(class_num)]for j in range(56)] # 有56个容器, 每个容器内部统计class_num的点击情况
    for sample_path in sample_path_list:
        with open(sample_path, "r", encoding="utf8") as fin:
            for line in fin:
                line = line.split("\t")
                if line[2][:5] == "ERROR":
                    continue
                label = int(float(line[-3]))
                title = line[-2]
                cur_title_vec = [float(v) for v in line[-1].split(",")]
                class_index = int(np.argmax(cur_title_vec))
                concat_feature_list = line[-4].split("||")  # "age", "gender", "grade", "city_level"
                concat_feature_dict = {}
                cur_age = int(concat_feature_list[1])
                concat_feature_dict["age"] = find_age_period(cur_age, age_seg_list)
                concat_feature_dict["gender"] = int(concat_feature_list[2])
                concat_feature_dict["grade"] = int(concat_feature_list[3])
                concat_feature_dict["city_level"] = int(concat_feature_list[4])-1
                if feature_name != "all":
                    value = concat_feature_dict[feature_name]
                else:
                    value = 0
                data[class_index][value][label] += 1

    click_ratio = [[0 for i in range(56)] for j in range(class_num)]
    whole = [0 for i in range(56)]
    for i in range(56):
        for j in range(class_num):
            all = sum(data[i][j])
            whole[i] += all
            if all > 0:
                click_ratio[j][i] = data[i][j][1]/all


    return click_ratio, label_list, whole


if __name__ == "__main__":
    feature = "gender"
    sample_list = ["sample.20210115", "sample.20210116", "sample.20210117"]
    click_ratio, label_list, whole = get_date(sample_list, feature)
    draw(click_ratio, label_list, whole, feature+".png")

效果如下:

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值