linux 合并csv文件 cat,Python:将两个CSV文件合并为多级JSON

还有一个选择。当你开始添加更多文件时,我试着让它更容易管理。可以在命令行上运行并提供参数,每个要添加的文件都有一个参数。基因/样本名称存储在字典中以提高效率。所需JSON对象的格式化是在每个类的format()方法中完成的。希望这有帮助。在import csv, json, sys

class Sample(object):

def __init__(self, name, extras):

self.name = name

self.extras = [extras]

def format(self):

map = {}

map['sample'] = self.name

map['extras'] = self.extras

return map

def add_extras(self, extras):

#edit 8/20

#always just add the new extras to the list

for extra in extras:

self.extras.append(extra)

class Gene(object):

def __init__(self, name, samples):

self.name = name

self.samples = samples

def format(self):

map = {}

map ['gene'] = self.name

map['samples'] = sorted([self.samples[sample_key].format() for sample_key in self.samples], key=lambda sample: sample['sample'])

return map

def create_or_add_samples(self, new_samples):

# loop through new samples, seeing if they already exist in the gene object

for sample_name in new_samples:

sample = new_samples[sample_name]

if sample.name in self.samples:

self.samples[sample.name].add_extras(sample.extras)

else:

self.samples[sample.name] = sample

class Genes(object):

def __init__(self):

self.genes = {}

def format(self):

return sorted([self.genes[gene_name].format() for gene_name in self.genes], key=lambda gene: gene['gene'])

def create_or_add_gene(self, gene):

if not gene.name in self.genes:

self.genes[gene.name] = gene

else:

self.genes[gene.name].create_or_add_samples(gene.samples)

def row_to_gene(headers, row):

gene_name = ""

sample_name = ""

extras = {}

for value in enumerate(row):

if headers[value[0]] == "gene":

gene_name = value[1]

elif headers[value[0]] == "sample":

sample_name = value[1]

else:

extras[headers[value[0]]] = value[1]

sample_dict = {}

sample_dict[sample_name] = Sample(sample_name, extras)

return Gene(gene_name, sample_dict)

if __name__ == '__main__':

delim = "\t"

genes = Genes()

files = sys.argv[1:]

for file in files:

print("Reading " + str(file))

with open(file,'r') as f1:

reader = csv.reader(f1, delimiter=delim)

headers = []

for row in reader:

if len(headers) == 0:

headers = row

else:

genes.create_or_add_gene(row_to_gene(headers, row))

result = json.dumps(genes.format(), indent=4)

print(result)

with open('json_output.txt', 'w') as output:

output.write(result)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值