<span style="font-family: Arial, Helvetica, sans-serif;">#!/usr/bin/python</span>
# -*- coding: UTF-8 -*-
import re
from itertools import islice
fin = open('/Users/tangchao/Desktop/GD462.GeneQuantRPKM.50FN.samplename.resk10.txt')
fout = open('/Users/tangchao/Desktop/snp1.txt','a+')
for i in range(2):
#get ten lines(list)
nolines = list(islice(fin, 10))
str_newlines = ""
for j in range(len(nolines)):
#get elements in nolines(str)
element = nolines[j]
#process
processed_Element = "\t".join(element.split()[1:4]) + "\n"
#sum the element to total
str_newlines = str_newlines + processed_Element
fout.write(str_newlines)
fin.close()
fout.close()
aa fin file 格式
提取文件多少到多少列;
###################################################
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import re
from itertools import islice
#############main code###############
#step 1
#####################################
#get oneline to save the index
##########################illustration####################################
### file2 contains all of "hgxxx" tables, while file1 has redundant talbes
#FPKM file, tag"hg000" is row
file1 = open ('/Users/tangchao/Desktop/GD462.GeneQuantRPKM.50FN.samplename.resk10.txt','r')
#snp file, only one column containing tag "hg000"
file2 = open('/Users/Tangchao/Desktop/1KG_GD462_indiv_ROW_1.txt')
# measure file1
fstdText1 = file1.readline()
print fstdText1
#convert string to list
list1 = re.split("\t", fstdText1)
'''
#measure file2
list2 = []
for line in file2.readlines():
newline = line.split()
_newline = "\t".join(newline)
list2.append(_newline)
#find the different contents between 2 lists
diff = []
for i in list1:
if i not in list2:
diff.append(i)
for j in list2:
if j not in list1:
diff.append(j)
#get the index of list which the content is not indentical
index = []
for i in range(len(list1)):
for j in range(len(diff)):
if (list1[i] == diff[j]):
index.append(i)
#file close
file1.close()
file2.close()
print list1
#####################################
#step 2
#####################################
#save the same content to file
#set the iteration
iteration = 100
#open file measure to remove some columns tagged "hgxxx"(superfluous)
#then save to new file
fin = open('/Users/tangchao/Desktop/GD462.GeneQuantRPKM.50FN.samplename.resk10.txt','r')
fout = open("/Users/tangchao/Desktop/snp1.txt",'w+')
for i in range(iteration):
#define processed lines(str)
islice_newlines = ""
#get ten lines(list)
islice_lines = list(islice(fin, 10))
for j in range(len(islice_lines)):
#get the element(str)
element = islice_lines[j]
#convert str(element) to list
eleList = re.split("\t", element)
#define the new list (element)
new_eleList = []
#process
for k in range(len(eleList)):
if k not in index:
new_eleList.append(eleList[k])
#convery list(new_elelist) to str, plus "\n"
new_element = "\t".join(new_eleList) + "\n"
#sum element to total
islice_newlines = islice_newlines + new_element
#write str to file
fout.write(islice_newlines)
#file close
fin.close()
fout.close()
文件格式
找出两个文件的不同的index ,然后对FPKM文件进行列删除