纯Python:在给定pvalue的情况下比较cafe结果文件中任意两个节点的基因数目

该文描述了一个Python脚本,用于处理CAFE(比较进化家庭扩展)软件的输出结果,比较在给定p值下任意两个节点间基因家族内的基因数目差异。脚本读取输入文件,解析树结构,找到目标节点,并分析其基因数量变化情况,如扩张、收缩和无变化等。此外,它还提供了一个示例输出,显示了不同节点之间的比较结果和统计信息。
摘要由CSDN通过智能技术生成

cafe运行结果文件中,在给定p值的情况下比较任意两个节点的所有基因家族内的基因数目差异
注意:请在代码内部176行指定节点,以<XX>的格式,184行指定pvalue值

运行格式

python compare_GF_branch_geneNum.py report_run.cafe

结果形式:

<7>_vs_<19>:	Expansion:10	Contraction:4	NoChange:2

输入文件head 20

Tree:(D20A:78.0334,(((D15A:42.0569,(D19A:24.509,D1A:24.509):17.5479):4.15507,(D8A:16.7282,(D5A:8.752,(D23A:7.71805,(D14A:4.17075,D13A:4.17075):3.5473):1.03395):7.97622):29.4837):26.7067,(D17A:63.6391,(D10A:24.4991,((D2A:5.87362,D4A:5.87362):15.5526,(D3A:14.2972,((D18A:5.4519,D16A:5.4519):4.97616,(D7A:8.67218,(D9A:7.36182,(D29A:5.72085,D24A:5.72085):1.64097):1.31036):1.75588):3.86919):7.12897):3.07286):39.14):9.2796):5.11474)
Lambda:	0.00345497
# IDs of nodes:(D20A<0>,(((D15A<2>,(D19A<4>,D1A<6>)<5>)<3>,(D8A<8>,(D5A<10>,(D23A<12>,(D14A<14>,D13A<16>)<15>)<13>)<11>)<9>)<7>,(D17A<18>,(D10A<20>,((D2A<22>,D4A<24>)<23>,(D3A<26>,((D18A<28>,D16A<30>)<29>,(D7A<32>,(D9A<34>,(D29A<36>,D24A<38>)<37>)<35>)<33>)<31>)<27>)<25>)<21>)<19>)<17>)<1>
# Output format for: ' Average Expansion', 'Expansions', 'No Change', 'Contractions', and 'Branch-specific P-values' = (node ID, node ID): (0,17) (2,5) (4,6) (3,9) (8,11) (10,13) (12,15) (14,16) (7,19) (18,21) (20,25) (22,24) (23,27) (26,31) (28,30) (29,33) (32,35) (34,37) (36,38) 
# Output format for 'Branch cutting P-values' and 'Likelihood Ratio Test': (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)
Average Expansion:	(-0.275867,-0.00123638)	(-0.041496,-0.0156093)	(-0.0156866,-0.0272004)	(-0.0486825,-0.0353141)	(-0.0294413,-0.00510007)	(-0.034155,-0.00595008)	(-0.0387914,-0.0144502)	(-0.0107411,-0.0110502)	(-0.13577,-0.094274)	(-0.183989,-0.00378642)	(-0.0863921,-0.00340005)	(-0.0119002,-0.0105865)	(-0.0730237,-0.012132)	(-0.0656827,-0.00703191)	(0.00100456,-0.00610463)	(-0.0482188,-0.016382)	(-0.043737,-0.0172321)	(-0.0294413,-0.0185457)	(-0.0284368,-0.0225639)
Expansion :	(444,0)	(165,39)	(215,141)	(7,102)	(166,47)	(105,15)	(103,31)	(115,100)	(63,12)	(147,121)	(172,8)	(110,106)	(63,28)	(134,14)	(1040,140)	(54,16)	(133,19)	(128,25)	(99,67)
nRemain :	(8012,12928)	(11992,12656)	(12250,12249)	(12296,12188)	(12090,12764)	(12192,12835)	(12221,12682)	(12536,12513)	(11049,11697)	(10150,12608)	(11263,12882)	(12501,12547)	(11867,12713)	(11685,12826)	(10781,12477)	(12187,12681)	(12048,12664)	(12208,12677)	(12184,12413)
nDecrease :	(4485,13)	(784,246)	(476,551)	(638,651)	(685,130)	(644,91)	(617,228)	(290,328)	(1829,1232)	(2644,212)	(1506,51)	(330,288)	(1011,200)	(1122,101)	(1120,324)	(700,244)	(760,258)	(605,239)	(658,461)
'ID'	'Newick'	'Family-wide P-value'	'Viterbi P-values'	'cut P-value'	'Likelihood Ratio'
2	(D20A_82:78.0334,(((D15A_33:42.0569,(D19A_33:24.509,D1A_32:24.509)_32:17.5479)_31:4.15507,(D8A_26:16.7282,(D5A_26:8.752,(D23A_25:7.71805,(D14A_24:4.17075,D13A_25:4.17075)_25:3.5473)_25:1.03395)_25:7.97622)_26:29.4837)_31:26.7067,(D17A_19:63.6391,(D10A_24:24.4991,((D2A_23:5.87362,D4A_26:5.87362)_25:15.5526,(D3A_24:14.2972,((D18A_23:5.4519,D16A_26:5.4519)_26:4.97616,(D7A_27:8.67218,(D9A_26:7.36182,(D29A_27:5.72085,D24A_25:5.72085)_26:1.64097)_26:1.31036)_26:1.75588)_26:3.86919)_26:7.12897)_26:3.07286)_26:39.14)_30:9.2796)_33:5.11474)_35	0	((7.634e-16,0.0588293),(0.448258,0.502734),(0.5804,0.911131),(0.746974,0.0381908),(0.875539,0.481526),(0.284329,0.576181),(0.79107,0.681889),(0.344421,0.718658),(0.466579,0.0326812),(0.00145406,0.185851),(0.392353,0.686762),(0.033923,0.195279),(0.637721,0.795835),(0.264531,0.686762),(0.00459271,0.753182),(0.723804,0.578845),(0.292717,0.578845),(0.795835,0.578845),(0.201943,0.403979))	
4	(D20A_5:78.0334,(((D15A_14:42.0569,(D19A_7:24.509,D1A_10:24.509)_9:17.5479)_10:4.15507,(D8A_3:16.7282,(D5A_26:8.752,(D23A_11:7.71805,(D14A_6:4.17075,D13A_5:4.17075)_6:3.5473)_10:1.03395)_10:7.97622)_9:29.4837)_10:26.7067,(D17A_12:63.6391,(D10A_31:24.4991,((D2A_14:5.87362,D4A_3:5.87362)_14:15.5526,(D3A_35:14.2972,((D18A_20:5.4519,D16A_20:5.4519)_21:4.97616,(D7A_21:8.67218,(D9A_24:7.36182,(D29A_31:5.72085,D24A_28:5.72085)_27:1.64097)_24:1.31036)_23:1.75588)_23:3.86919)_23:7.12897)_21:3.07286)_21:39.14)_12:9.2796)_11:5.11474)_11	0	((0.00717389,0.645638),(0.0317765,0.463194),(0.154706,0.296526),(0.613472,0.578316),(9.76727e-07,0.104013),(4.84155e-20,0.532826),(0.114966,1.24451e-06),(0.573495,0.111272),(0.576066,0.157901),(0.910632,4.69686e-05),(1.20375e-05,0.660967),(0.673466,7.74542e-18),(3.00582e-05,0.0952332),(5.01642e-09,0.671725),(0.355076,0.355076),(0.019872,0.570774),(0.128209,0.0394314),(0.786039,0.000120057),(0.00132197,0.208507))	
7	(D20A_1:78.0334,(((D15A_5:42.0569,(D19A_0:24.509,D1A_2:24.509)_2:17.5479)_3:4.15507,(D8A_3:16.7282,(D5A_3:8.752,(D23A_2:7.71805,(D14A_0:4.17075,D13A_0:4.17075)_0:3.5473)_2:1.03395)_3:7.97622)_3:29.4837)_3:26.7067,(D17A_5:63.6391,(D10A_22:24.4991,((D2A_8:5.87362,D4A_6:5.87362)_8:15.5526,(D3A_38:14.2972,((D18A_10:5.4519,D16A_4:5.4519)_10:4.97616,(D7A_8:8.67218,(D9A_26:7.36182,(D29A_69:5.72085,D24A_25:5.72085)_26:1.64097)_24:1.31036)_16:1.75588)_16:3.86919)_16:7.12897)_14:3.07286)_14:39.14)_5:9.2796)_4:5.11474)_4	0	((0.0322795,0.562461),(0.0724884,0.208789),(0.00437968,0.631433),(0.538998,0.700995),(0.631305,0.565268),(0.573507,0.0153174),(0.545049,5.68173e-05),(0.5,0.5),(0.347482,0.060019),(0.854415,6.85475e-07),(4.09087e-05,0.618035),(0.613475,0.00400401),(2.7214e-05,0.0522527),(5.43383e-21,0.631207),(0.635435,5.68355e-09),(3.04281e-08,0.550964),(1.49741e-09,4.65899e-15),(0.114045,0.00449171),(4.45526e-59,0.403979))	
8	(D20A_0:78.0334,(((D15A_1:42.0569,(D19A_12:24.509,D1A_0:24.509)_6:17.5479)_6:4.15507,(D8A_35:16.7282,(D5A_47:8.752,(D23A_0:7.71805,(D14A_1:4.17075,D13A_40:4.17075)_21:3.5473)_21:1.03395)_24:7.97622)_24:29.4837)_6:26.7067,(D17A_1:63.6391,(D10A_13:24.4991,((D2A_8:5.87362,D4A_12:5.87362)_9:15.5526,(D3A_2:14.2972,((D18A_4:5.4519,D16A_15:5.4519)_7:4.97616,(D7A_7:8.67218,(D9A_9:7.36182,(D29A_6:5.72085,D24A_2:5.72085)_6:1.64097)_7:1.31036)_7:1.75588)_7:3.86919)_7:7.12897)_8:3.07286)_8:39.14)_5:9.2796)_5:5.11474)_5	0	((0.000528388,0.576188),(0.000189648,0.721388),(3.7598e-05,1.81019e-07),(0.573495,1.2707e-15),(2.13668e-07,0.786039),(1.00806e-24,3.92353e-05),(3.7148e-35,0.660967),(1.52139e-36,9.92373e-26),(0.19546,0.624745),(0.00340247,0.0253595),(0.000526893,0.573493),(0.190861,0.000674858),(0.185568,0.226831),(2.3145e-06,0.565258),(9.17531e-05,9.06414e-12),(0.584096,0.523331),(0.647637,0.523331),(0.0161262,0.035097),(0.589241,8.90156e-07))	
9	(D20A_12:78.0334,(((D15A_18:42.0569,(D19A_7:24.509,D1A_16:24.509)_12:17.5479)_12:4.15507,(D8A_15:16.7282,(D5A_9:8.752,(D23A_4:7.71805,(D14A_16:4.17075,D13A_18:4.17075)_16:3.5473)_11:1.03395)_11:7.97622)_12:29.4837)_12:26.7067,(D17A_6:63.6391,(D10A_7:24.4991,((D2A_18:5.87362,D4A_8:5.87362)_10:15.5526,(D3A_12:14.2972,((D18A_7:5.4519,D16A_7:5.4519)_7:4.97616,(D7A_12:8.67218,(D9A_9:7.36182,(D29A_5:5.72085,D24A_3:5.72085)_5:1.64097)_8:1.31036)_8:1.75588)_8:3.86919)_9:7.12897)_9:3.07286)_9:39.14)_10:9.2796)_11:5.11474)_11	0	((0.598705,0.645638),(0.00365378,0.809369),(0.0009555,0.0125544),(0.631209,0.862035),(0.0260629,0.305784),(0.0180608,0.535927),(7.04062e-10,1.90775e-07),(0.662806,0.0261159),(0.360005,0.340888),(0.0448611,0.634028),(0.154706,0.581483),(1.0845e-10,0.00625356),(0.205553,0.660985),(0.00987448,0.123706),(0.601659,0.601659),(0.143385,0.526528),(9.41931e-05,0.526528),(0.0928989,1.15805e-06),(0.576188,0.00149766))	
10	(D20A_5:78.0334,(((D15A_2:42.0569,(D19A_1:24.509,D1A_8:24.509)_6:17.5479)_6:4.15507,(D8A_7:16.7282,(D5A_6:8.752,(D23A_5:7.71805,(D14A_5:4.17075,D13A_6:4.17075)_6:3.5473)_6:1.03395)_6:7.97622)_6:29.4837)_6:26.7067,(D17A_16:63.6391,(D10A_9:24.4991,((D2A_21:5.87362,D4A_14:5.87362)_15:15.5526,(D3A_24:14.2972,((D18A_15:5.4519,D16A_6:5.4519)_12:4.97616,(D7A_11:8.67218,(D9A_6:7.36182,(D29A_11:5.72085,D24A_20:5.72085)_12:1.64097)_12:1.31036)_12:1.75588)_12:3.86919)_13:7.12897)_13:3.07286)_12:39.14)_10:9.2796)_8:5.11474)_8	0	((0.179734,0.613475),(0.00360542,0.721388),(1.24342e-05,0.0913331),(0.573495,0.790707),(0.151515,0.61805),(0.631228,0.5201),(0.17999,0.55677),(0.111272,0.573495),(0.147873,0.031425),(0.00786196,0.169418),(0.029978,0.0610304),(4.00934e-07,0.282793),(0.0691518,0.706397),(9.69715e-10,0.169874),(0.00150494,1.83028e-08),(0.631209,0.538996),(0.33435,0.538996),(1.41393e-07,0.538996),(0.239667,4.62591e-10))	
11	(D20A_4:78.0334,(((D15A_0:42.0569,(D19A_5:24.509,D1A_3:24.509)_4:17.5479)_4:4.15507,(D8A_3:16.7282,(D5A_6:8.752,(D23A_8:7.71805,(D14A_12:4.17075,D13A_7:4.17075)_7:3.5473)_7:1.03395)_6:7.97622)_4:29.4837)_4:26.7067,(D17A_5:63.6391,(D10A_4:24.4991,((D2A_6:5.87362,D4A_3:5.87362)_6:15.5526,(D3A_16:14.2972,((D18A_22:5.4519,D16A_19:5.4519)_19:4.97616,(D7A_18:8.67218,(D9A_22:7.36182,(D29A_3:5.72085,D24A_9:5.72085)_9:1.64097)_16:1.31036)_16:1.75588)_16:3.86919)_15:7.12897)_7:3.07286)_6:39.14)_5:9.2796)_5:5.11474)_5	0	((0.628407,0.576188),(0.000277421,0.670103),(0.149032,0.330282),(0.550967,0.738592),(0.24927,0.00562789),(0.631228,0.0103271),(0.0816348,0.565258),(1.04263e-07,0.584096),(0.398388,0.624745),(0.854415,0.269686),(0.0421178,0.0307219),(0.589241,5.42265e-05),(0.35246,1.16779e-10),(0.293169,0.0759091),(0.00509157,0.711965),(0.00181296,0.550964),(0.0781961,0.550964),(3.48735e-06,3.9836e-14),(9.90516e-10,0.624724))	
12	(D20A_13:78.0334,(((D15A_17:42.0569,(D19A_6:24.509,D1A_7:24.509)_8:17.5479)_11:4.15507,(D8A_12:16.7282,(D5A_10:8.752,(D23A_13:7.71805,(D14A_7:4.17075,D13A_6:4.17075)_7:3.5473)_10:1.03395)_10:7.97622)_11:29.4837)_11:26.7067,(D17A_24:63.6391,(D10A_5:24.4991,((D2A_5:5.87362,D4A_14:5.87362)_6:15.5526,(D3A_2:14.2972,((D18A_0:5.4519,D16A_0:5.4519)_0:4.97616,(D7A_2:8.67218,(D9A_11:7.36182,(D29A_6:5.72085,D24A_4:5.72085)_5:1.64097)_5:1.31036)_3:1.75588)_3:3.86919)_3:7.12897)_5:3.07286)_5:39.14)_11:9.2796)_11:5.11474)_11	0	((0.345268,0.645638),(0.00268311,0.0109791),(0.13445,0.489449),(0.622515,0.854719),(0.255529,0.287678),(0.689961,0.532826),(0.00224744,6.486e-05),(0.584096,0.12761),(0.844818,0.717375),(4.01828e-06,0.000412314),(0.74293,0.548019),(0.135429,3.82347e-12),(0.120557,0.00292153),(0.178783,0.529694),(0.5,0.5),(1.75285e-06,0.510206),(0.110811,7.05994e-05),(2.28054e-08,0.516836),(0.0423294,0.115291))	
13	(D20A_0:78.0334,(((D15A_4:42.0569,(D19A_7:24.509,D1A_4:24.509)_6:17.5479)_6:4.15507,(D8A_13:16.7282,(D5A_12:8.752,(D23A_12:7.71805,(D14A_15:4.17075,D13A_14:4.17075)_14:3.5473)_12:1.03395)_12:7.97622)_12:29.4837)_6:26.7067,(D17A_6:63.6391,(D10A_9:24.4991,((D2A_9:5.87362,D4A_7:5.87362)_7:15.5526,(D3A_8:14.2972,((D18A_9:5.4519,D16A_7:5.4519)_7:4.97616,(D7A_7:8.67218,(D9A_8:7.36182,(D29A_6:5.72085,D24A_7:5.72085)_7:1.64097)_7:1.31036)_7:1.75588)_7:3.86919)_7:7.12897)_7:3.07286)_7:39.14)_6:9.2796)_6:5.11474)_6	0	((0.000100917,0.589241),(0.181972,0.721388),(0.214288,0.0421178),(0.573495,9.12573e-05),(0.273483,0.696127),(0.713348,0.538996),(0.696127,0.00941183),(0.093561,0.647616),(0.777025,0.64366),(0.869192,0.310088),(0.113193,0.565258),(0.00879369,0.601659),(0.725164,0.633344),(0.155006,0.565258),(0.00879369,0.601659),(0.584096,0.523331),(0.647637,0.523331),(0.0816348,0.523331),(0.154708,0.601659))	
14	(D20A_3:78.0334,(((D15A_8:42.0569,(D19A_7:24.509,D1A_6:24.509)_7:17.5479)_7:4.15507,(D8A_5:16.7282,(D5A_11:8.752,(D23A_6:7.71805,(D14A_12:4.17075,D13A_10:4.17075)_10:3.5473)_9:1.03395)_9:7.97622)_7:29.4837)_7:26.7067,(D17A_6:63.6391,(D10A_6:24.4991,((D2A_3:5.87362,D4A_3:5.87362)_3:15.5526,(D3A_10:14.2972,((D18A_6:5.4519,D16A_6:5.4519)_6:4.97616,(D7A_14:8.67218,(D9A_8:7.36182,(D29A_14:5.72085,D24A_12:5.72085)_12:1.64097)_11:1.31036)_11:1.75588)_9:3.86919)_9:7.12897)_6:3.07286)_6:39.14)_6:9.2796)_6:5.11474)_6	0	((0.124308,0.589241),(0.362634,0.741688),(0.786269,0.458634),(0.584096,0.809019),(0.0273977,0.0161262),(0.0315194,0.529693),(0.000556168,0.0459542),(0.0113452,0.613472),(0.228632,0.64366),(0.869192,0.824997),(0.766746,0.55677),(0.548022,0.548022),(0.00141672,0.000541599),(0.193972,0.581483),(0.589241,0.589241),(0.000108085,0.000667175),(0.00409853,0.535927),(0.00102403,0.0189206),(0.0232894,0.65536))	

代码:

#!/public/home/wangwen_lab/zhangjiexiong/anaconda3/bin/python
import re,sys
class Node:
	def __init__(self,nodeNum,geneNum,nodeLength,upbranch,downbranch,s):
		self.nodeNum = nodeNum
		self.geneNum = geneNum
		self.upbranch = upbranch
		self.downbranch = downbranch
		self.nodeLength = nodeLength
		self.nstring = s
class family:
	def __init__(self,me,brother,father,uncle="NULL",ancestor="NULL"):
		self.me = me
		self.brother = brother
		self.father = father
		self.uncle = uncle
		self.ancestor = ancestor
def findComma(s): # find the core comma of a tree string
	place,sta = 0,0
	for i in s:
		if i == "(":
			sta += 1
		elif i  == ")":
			sta -= 1
		if i == ",":
			if sta == 1:
				commaplace = place
				break
		place += 1
	return commaplace
def findPairedBracket(s): #find the coord of fist "(" and the last ")"
	place,sta = 0,0
	for i in s:
		if i == "(":
			sta += 1
			if sta == 1:
				firstbracket = place
		elif i  == ")":
			sta -= 1
			if place != 0 and sta == 0:
				pairedbracket = place
		place += 1
	return [firstbracket,pairedbracket]

def NodeInfo(s): #store the information of the node to the class--Node which is definded in the first
	treelen = len(s)
	if "(" in s:
		bracket = findPairedBracket(s)
		nodeInfo = s[bracket[1]+1:treelen]
		try:
			nodeNum = nodeInfo.split(">")[0].split("<")[1]
		except:
			print("Error"+s)
		geneNum = nodeInfo.split("_")[1].split(":")[0]
		nodeLength = nodeInfo.split(":")[1]
		nodeContent = s[0:bracket[1]+1]	
		commaplace = findComma(nodeContent)
		nodelength = len(nodeContent)
		upbranch = nodeContent[1:commaplace]
		downbranch = nodeContent[commaplace+1:nodelength-1]
		node = Node(nodeNum,geneNum,nodeLength,upbranch,downbranch,s)
	else: #D20A_82:78.0334
		geneNum = s.split("_")[1].split(":")[0]
		nodeNum = s.split(">")[0].split("<")[1]
		nodeLength = s.split(":")[1]
		node = Node(nodeNum,geneNum,nodeLength,"","",s)
	return node

def judgeNode(s): #judge whether the string is a tree
	if type(s) == type('str'):
		if "(" in s:
			bracket = findPairedBracket(s)
			tail  = s[bracket[1]::]
			if re.match("\)<\d+>_\d+",tail):
				return 1
			else:
				return 0
	else:
		return 1
def circle(node): # !!!! the most important function which carry out the iteration of the tree to send each subtrees to a global list named "dicNode"
	if node != "":
		node = NodeInfo(node)
		if judgeNode(node):
			dicNode.append(node)
			circle(node.upbranch)
			circle(node.downbranch)
		else:
			dicNode.append(node)
def judgeCertainSpecies(node,species): #judge whether the input branch "species" is the only branch under the node's two subbranch and distinguish whether the train is in upbranch or downbranch
	if species in node.upbranch:
		branch = node.upbranch
		elipbranch = branch.replace(species,"")
		if "(" in elipbranch:
			return 0
		else:
			return "up"
	elif species in node.downbranch:
		branch = node.downbranch
		elipbranch = branch.replace(species,"")
		if "(" in elipbranch:
			return 0
		else:
			return "down"
	else:
		return 0
def findDirectFamiliesOfAimedNode(node): #node is a string type. This function can find the adjioning branch of the target node and return the adjioning nodes
	for i in dicNode:
		if i.upbranch != "":
			if judgeCertainSpecies(i,node) != 0:
				father = "("+i.upbranch+","+i.downbranch+")"+"<"+i.nodeNum+">"+"_"+i.geneNum+":"+i.nodeLength
				if judgeCertainSpecies(i,node) == "up":
					me = i.upbranch
					brother = i.downbranch
				elif judgeCertainSpecies(i,node) =="down":
					brother = i.upbranch
					me = i.downbranch
	try:
		families = family(me,brother,father)
	except:
		print("Error"+node)
	return families

dicNode = []
#def judgeGFtyoe(tree,target):
def getTragetGeneNum(tree,target):
	tree = tree+":0"
	circle(tree)
	for i in dicNode:
		ptree = i.nstring
		a = re.sub("_[^_]*$","",ptree)
		if a[-3::] == target or a[-4::]== target:
			target = a.replace(target,"")
	allfolks = findDirectFamiliesOfAimedNode(target)
	myInfoNum = allfolks.me.split("_")[-1].split(":")[0]
	return int(myInfoNum)
def addnodes(tree1,treenode): #see addNode.py
	strains = re.findall("\)_",tree1)
	nodes = re.findall("\)<\d+>",treenode)
	for i in nodes:
		tree1 = tree1.replace(")_",i+"_",1)
	nodes = re.findall(r"A<\d+>",treenode)
	for i in nodes:
		tree1 = tree1.replace("A_",i+"_",1)
	return tree1

f = open(sys.argv[1],"r")
flag = 0
treelist = []
tplist = []
nplist = []
trenumlist = []
for i in f:
	if flag == 0:
		if re.match("'ID'",i):
			flag = 1
			continue
		elif re.match("# IDs of nodes:",i):
			treenode = i.strip().split(":")[1]
		elif re.match("# Output format for:",i):
			idstring = i.strip().split(":")[2].strip(" ")
			idstring = idstring.replace(")","")
			idstring = idstring.replace("(","")
			idstring = idstring.replace(" ",",")
			idlist = idstring.split(",")
	else:
		wholeLine = i.strip().split()
		trenumlist.append(wholeLine[0])
		treelist.append(wholeLine[1])
		tplist.append(wholeLine[2])
		nplist.append(wholeLine[3])
def cleanbrac(s):
	s = s.replace(")","")
	s = s.replace("(","")
	sl = s.split(",")
	return sl
target = ["<7>","<19>"]
Excount = 0
Cocount = 0
Nccount = 0
count = 0
explist = []
conlist = []
nchlist = []
pvalue = 0.5
for i in range(len(treelist)):
	if float(tplist[i]) > 0.01:
		continue
	tree = treelist[i]
	tree = addnodes(tree,treenode)
	dicNode = []
	tagret1GnenNum = getTragetGeneNum(tree,target[0])
	dicNode = []
	target2GnenNum = getTragetGeneNum(tree,target[1])
	tp = float(tplist[i])
	np = nplist[i]
	npl = cleanbrac(np)
	c = zip(idlist,npl)
	tnum1 = target[0].split(">")[0].split("<")[1]
	tnum2 = target[1].split(">")[0].split("<")[1]
	for j in c:
		if tnum1 == j[0]:
			nodep1 = j[1]
		elif tnum2 == j[0]:
			nodep2 = j[1]
	if nodep1 == "-" or nodep2 == "-":
		count += 1
		continue
	else:
		nodep1 = float(nodep1)
		nodep2 = float(nodep2)
#		print(count)
	if nodep1<= pvalue and nodep2 <= pvalue:
#		print(trenumlist[i])
#	else:
#		print(nodep1,trenumlist[i])
		if tagret1GnenNum < target2GnenNum:
#			print(tnum1,tnum2)
			CEtype = "Expansion"
			Excount += 1
			explist.append(trenumlist[i])
		elif tagret1GnenNum == target2GnenNum:
			CEtype = "NoChange"
			Nccount += 1
			nchlist.append(trenumlist[i])
#			print(tagret1GnenNum,target2GnenNum,trenumlist[i])
		else:
			CEtype = "Contraction"
			Cocount += 1
			conlist.append(trenumlist[i])
			#print(tagret1GnenNum,target2GnenNum,trenumlist[i])
	count += 1
print("{}_vs_{}:\tExpansion:{}\tContraction:{}\tNoChange:{}".format(target[0],target[1],Excount,Cocount,Nccount))

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值