简书python估算遗传相关系数_用Python 实现 POI类别与人口密度相关性

将POI的类别抽象为文档中的词

划分的区域 抽象为 文档

则可利用TFIDF的值 计算 与 人口密度的协方差 ,计算出二者的相关性,相关系数越大,则该类别对人口的影响越大 ,后续则可利用得出的结论 研究分析更多的问题。

下面将给出用python实现TFIDF与人口密度的相关系数

image.png

TLij为相应的TFIDF值

PDj为第j区域的人口密度值

用python实现该公式:

def relative(tf_idf,idf,area):

#N是第i类所占的分区数

rou = {}

for m in idf:

#tl_pd 是(第i类所在分区的tf_idf值)与(第i类所在分区的栅格值)的乘积 后的求和

#sum_2 是 (第i类所在分区的tf_idf值)的平方的求和

#sum 是 第 (第i类所在分区的tf_idf值)的求和

#PD 是第i类所在的分区的栅格值的求和

#PD_2 是第i类所在的分区的栅格值的平方的求和

N = 0

tl_pd = 0

sum_2 = 0

sum = 0

PD = 0

PD_2 = 0

for l in tf_idf:

if m in tf_idf[l].keys():

N += 1

for l in tf_idf:

if m in tf_idf[l].keys():

sum_2+=tf_idf[l][m]*tf_idf[l][m]

tl_pd+=tf_idf[l][m]*area[l]

sum+=tf_idf[l][m]

for l in tf_idf:

if m in tf_idf[l].keys():

PD += area[l]

PD_2 += area[l]*area[l]

fenzi = N*tl_pd-sum*PD

fenmu = (N*sum_2-sum*sum)**0.5- (N*PD_2-PD*PD)**0.5

poi_md = fenzi/fenmu

rou[m] = poi_md

return rou

完整的代码如下:

输入数据为:

data1.json

{

"id":POI的编号

"type":POI的类别

"number":POI所属区域的区域编号

}

area.json

{

"区域编号":区域人口值

}

输出为:

[

["h", 232.7468753744591],

["f",511.4907105256037],

["e",614.9901766893532],

["g",697.3614562757045],

["d",812.2365216229458],

["b",878.0307964717691],

["a",1137.981560137959],

["c",1448.4753152430358]

]

a b c d e f g h 分别代表不同类别的POI

import csv

import json

import math

filename = "data1.json"

# 处理基础数据 转换成字典

# key为分区代码 相应的value为嵌套字典

# 键为某一分区内 POI的类型代码 相应的值为该类POI出现的次数

def data_parse(filename):

number_counts = {}

with open(filename,'r',encoding='utf-8') as f:

data = json.load(f)

for l in data:

if l["number"] not in number_counts.keys():

number_counts[l["number"]] = {}

if l["type"] not in number_counts[l["number"]].keys():

number_counts[l["number"]][l["type"]] = 1

else:

if l["type"] not in number_counts[l["number"]].keys():

number_counts[l["number"]][l["type"]] = 1

else:

number_counts[l["number"]][l["type"]] += 1

return number_counts

#计算TF值(TF=当前分区含有的I类POI数目/当前分区含有的POI数目)

def tf(number_counts):

for m in number_counts:

fenmu = 0

for j in number_counts[m]:

fenmu += number_counts[m][j]

for k in number_counts[m]:

number_counts[m][k] = number_counts[m][k]/fenmu

return number_counts

#计算IDF的值

def idf(number_counts):

idf = {"a":0,"b":0,"c":0,"d":0,"e":0,"f":0,"g":0,"h":0}

for l in idf:

count = 0

D = 0

for m in number_counts:

D += 1

if l in number_counts[m].keys():

count+=1

idf[l] = math.log(D/count)

return idf

#计算TF-IDF的值

def TFIDF(tf,idf):

for m in tf:

for k in idf:

if k in tf[m].keys():

tf[m][k] = tf[m][k]*idf[k]

# for j in tf[m]:

# print(m,k)

# tf[m][j] = idf[k]*tf[m][j]

# print(tf[m][j])

return tf

#将最终的TF值写入成json文件

def writetojson(number_counts,filename):

with open(filename,'a') as json_file:

json.dump(number_counts, json_file, indent=2,ensure_ascii=False)

def read_area(filename):

with open(filename,'r',encoding='utf-8') as f:

area = json.load(f)

return area

def relative(tf_idf,idf,area):

#N是第i类所占的分区数

#tl_pd 是(第i类所在分区的tf_idf值)与(第i类所在分区的栅格值)的乘积 后的求和

#sum_2 是 (第i类所在分区的tf_idf值)的平方的求和

#sum 是 第 (第i类所在分区的tf_idf值)的求和

#PD 是第i类所在的分区的栅格值的求和

#PD_2 是第i类所在的分区的栅格值的平方的求和

rou = {}

for m in idf:

N = 0

tl_pd = 0

sum_2 = 0

sum = 0

PD = 0

PD_2 = 0

for l in tf_idf:

if m in tf_idf[l].keys():

N += 1

if m in tf_idf[l].keys():

sum_2+=tf_idf[l][m]*tf_idf[l][m]

tl_pd+=tf_idf[l][m]*area[l]

sum+=tf_idf[l][m]

for l in tf_idf:

if m in tf_idf[l].keys():

PD += area[l]

PD_2 += area[l]*area[l]

fenzi = N*tl_pd-sum*PD

fenmu = (N*sum_2-sum*sum)**0.5- (N*PD_2-PD*PD)**0.5

poi_md = fenzi/fenmu

rou[m] = poi_md

return rou

if __name__ == '__main__':

filename = "data1.json"

number_counts = data_parse(filename)

tf = tf(number_counts)

idf = idf(number_counts)

tf_idf = TFIDF(tf,idf)

print(tf_idf)

area = read_area('area.json',)

rou = relative(tf_idf,idf,area)

rou=sorted(rou.items(),key=lambda x:x[1],reverse=False)

writetojson(rou,"rou.json")

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值