f=open('gencode.gene.gtf')
list1=[]
dict1={}
for i in f.readlines():
a=i.split()
list1.append(a[0])
for each in list1:
if each not in dict1:
dict1[each] = 1
else:
dict1[each] += 1
for key,values in dict1.items():
print(key,values)
f.close()
f=open('gencode.gene.gtf')
for i in f.readlines():
a=i.split()
if a[11]=='"protein_coding";':
print(a[0],a[3],a[4],a[9][1:-2])#第二次索引是为了去除双引号和分号
f.close()
f=open('gencode.gene.gtf')
dict2={}
for i in f.readlines():
a=i.split()
dict2['所属染色体']=a[0]
dict2['起始位置']=a[3]
dict2['终止位置']=a[4]
dict2['基因ID']=a[9][1:-2]
dict2['基因类型']=a[11][1:-2]
if dict2['基因类型']=='protein_coding':
print(dict2['所属染色体'],dict2['起始位置'],dict2['终止位置'],dict2['基因ID'])
f.close()