import
re
import
xlrd
f1
=
open
(
"v9_c8_a3_a16.txt"
,
"w"
)
f2
=
open
(
"a9_not_c8a3a16.txt"
,
"w"
)
f3
=
open
(
"c8_not_v9a3a16.txt"
,
"w"
)
f4
=
open
(
"a3_not_v9c8a16.txt"
,
"w"
)
f5
=
open
(
"a16_not_v9c8a3.txt"
,
"w"
)
def
read(
file
, sheet_index
=
0
):
workbook
=
xlrd.open_workbook(
file
)
sheet
=
workbook.sheet_by_index(sheet_index)
print
(
"工作表名称:"
, sheet.name,
"行数:"
, sheet.nrows,
"列数:"
, sheet.ncols)
data
=
[]
for
i
in
range
(
0
, sheet.nrows):
data.append(sheet.row_values(i))
return
data
def
red(text):
with
open
(text,
'r'
) as f:
file
=
f.read()
regexp
=
r
'MGG_\d{5}'
pat
=
re.
compile
(regexp)
MGG_all
=
re.findall(pat,
file
)
Mgg_unique
=
set
(MGG_all)
return
Mgg_unique
v9
=
read(r
'zhu.xlsx'
)
c8
=
read(r
'liu.xlsx'
)
a3
=
red(r
'ATG3.csv'
)
a16
=
red(r
'ATG16.csv'
)
def
reg(data):
regexp
=
r
'MGG_\d{5}'
pat
=
re.
compile
(regexp)
MGG_all
=
re.findall(pat,
str
(data))
#需为string格式
Mgg_unique
=
set
(MGG_all)
return
Mgg_unique
def
vps9():
return
reg(v9)
def
cdk8():
return
reg(c8)
def
Atg3():
return
reg(a3)
def
Atg16():
return
reg(a16)
def
Mgg1_Mgg2():
v9
=
vps9()
c8
=
cdk8()
a3
=
Atg3()
a16
=
Atg16()
v9_c8_a3_a16
=
v9&c8&a3&a16
v9_not_c8a3a16
=
v9
-
(c8|a3|a16)
c8_not_v9a3a16
=
c8
-
(v9|a3|a16)
a3_not_v9c8a16
=
a3
-
(v9|c8|a16)
a16_not_v9c8a3
=
a16
-
(v9|a3|c8)
return
v9_c8_a3_a16, v9_not_c8a3a16, c8_not_v9a3a16, a3_not_v9c8a16,a16_not_v9c8a3
def
message():
v9_c8_a3a16, v9_not_c8a3a16, c8_not_v9a3a16, a3_not_v9c8a16, a16_not_v9c8a3
=
Mgg1_Mgg2()
with
open
(
'magnaporthe.txt'
,
'r'
) as f:
file
=
f.read()
infile
=
file
.split(
'>'
)
for
m
in
infile:
for
i
in
v9_c8_a3a16:
if
i
in
m:
f1.write(i
+
' '
+
m)
for
i2
in
v9_not_c8a3a16:
if
i2
in
m:
f2.write(i2
+
' '
+
m )
for
i3
in
c8_not_v9a3a16:
if
i3
in
m:
f3.write(i3
+
' '
+
m )
for
i4
in
a3_not_v9c8a16:
if
i4
in
m:
f4.write(i4
+
' '
+
m )
for
i5
in
a16_not_v9c8a3:
if
i5
in
m:
f5.write(i5
+
' '
+
m )
message()