描述性数据分析-Python实现

描述性数据分析-Python实现

一、集中趋势

  • 算术平均值

    data=[1,4,8,10,12]
    #Python 函数实现
    def arithmetic_mean(list):
        '''
            算术平均值:数据集合中的数据之和除以数据值个数
        '''
        list_length = len(list)
        sum = 0
        for i in list:
            sum += i
        return sum / list_length
    
    print(arithmetic_mean(data))
    
    
    #Numpy实现
    import numpy as np
    arithmetic_mean = np.mean(data)
    print(arithmetic_mean)
    
    
  • 加权平均值

    data=[1,4,8,10,12]
    weight=[0.1,0.2,0.3,0.4,0.5]
    #Python 函数实现
    def weighted_average(data_list,weight_list):
        '''
            加权平均数:所有数值与权重的乘积之和除以权重和
        '''
        return sum([data_list[i]*weight_list[i] for i in range(len(weight_list))])/sum(weight_list)
    
    print(weighted_average(data,weight))
    
    #Numpy实现
    import numpy as np
    weighted_average = np.average(data,weights=weight)
    print(weighted_average)
    
  • 几何平均值

    data=[1,4,8,10,12]
    #Python 函数实现
    def geometric_mean(data_list):
        '''
            几何平均数:对数值乘积开n次方根,n为数值的个数
        '''
        sum = 1 
        for i in data_list:
            sum *=i
        return pow(sum,1.0/len(data_list))
    
    print(geometric_mean(data))
    
    #Scipy实现
    from scipy import stats as sts
    geometric_mean = str(sts.gmean(data))
    print(geometric_mean)
    
  • 众数

    data=[1,4,8,10,12,12]
    #Python 函数实现
    def majority_element(nums):
            count = {}
            for num in nums:                                                # 统计每个数字出现的次数
                if num in count:
                    count[num] += 1
                else:
                    count[num] = 1
            return {v: k for k, v in count.items()}[max(count.values())]    # 字典键值反转,找到出现次数最多的数字
    print(majority_element(data))
    
    
    #Numpy实现
    import numpy as np
    counts = np.bincount(data)
    #返回众数
    majority_element = np.argmax(counts)
    print(majority_element)
    
  • 中位数

    data=[1,4,8,10,12,12]
    #Python 函数实现
    def median(data_list):
        length = len(data_list)
        data_list.sort()
        if (length % 2) == 1:
            z= length // 2
            y = data_list[z]
        else:
            y = (data_list[length // 2]+data_list[length // 2 - 1]) / 2
        return y
    print(median(data))
    
    #Nump 实现
    import numpy as np
    median = np.median(data)
    print(median)
    

二、离散程度

  • 极差

    data=[1,4,8,10,12,12]
    #Python 函数实现
    def very_poor(data_list):
        return max(data_list)-min(data_list)
    
    print(very_poor(data))
    
    #Numpy实现
    import numpy as np
    very_poor = np.ptp(data)
    print(very_poor)
    
  • 平均差

    data=[4,8]
    #Python 函数实现
    def average_difference(data):
        '''
            平均差:数据与平均值之间的绝对值之和除以数值个数
        '''
        avg = arithmetic_mean(data)
        sum = 0 
        for i in data:
            sum += abs(i-avg)
        return sum / len(data)
    
    print(average_difference(data))
    
    #Numpy实现
    import numpy as np
    a = np.array(data)
    average_difference = np.sum(abs(a-np.mean(a)))/len(a)
    print(average_difference)
    
  • 方差和标准差

    data=[1,4,8,10,12]
    #Python 函数实现
    def variance(data_list):
        #方差
    	avg = arithmetic_mean(data_list)
    	sum = 0
    	for i in data_list:
    		sum += pow(i-avg,2)
    	
    	return sum/len(data_list)
    
    
    def standard_deviation(data_list):
        #标准差
    	return pow(variance(data_list),1/2)
    
    
    print(variance(data))
    print(standard_deviation(data))
    
    
    #Numpy实现
    import numpy  as np
    print(str(np.var(data))) #方差
    print(str(np.std(data))) #标准差
    
  • 四分位极差和四分位差以及离散系数

    import numpy  as np
    data=[1,4,8,10,12]
    print(str(np.percentile(data,75))) #四分位 75%
    print(str(np.percentile(data,25))) #四分位 25%
    print(str(np.percentile(data,75) - np.percentile(data,25)))#四分位极差
    print(str(np.std(data)/np.mean(data))) #离散系数
    
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值