在autogen.sh中会执行下面的python脚本
echo "Importing unicode..."
${PYTHON} util/import_unicode.py unicode/UnicodeData.txt unicode/BidiMirroring.txt unicode/ArabicShaping.txt grub-core/unidata.c
其中这个util/import_unicode.py 的输入参数有三个,因此是分为3部分来解析的。
第二部分:
infile = open (sys.argv[1], "r") #通过只读方式打开unicode/UnicodeData.txt
outfile = open (sys.argv[4], "w") #通过只写方式打开grub-core/unidata.c
outfile.write ("#include <grub/unicode.h>\n")
outfile.write ("\n")
outfile.write ("struct grub_unicode_compact_range grub_unicode_compact[] = {\n")#这三句就是往grub-core/unidata.c 中写
begincode = -2
lastcode = -2
lastbiditype = "X"
lastmirrortype = False
lastcombtype = -1
arabicsubst = {}
for line in infile:
sp = line.split (";") #以;分割,和第一部分讲的类似.
curcode = int (sp[0], 16)
curcombtype = int (sp[3], 10)
curbiditype = sp[4]
curmirrortype = (sp[9] == "Y")#python 中比较字符串相等直接用的是==
if curcombtype <= 255 and curcombtype >= 253:
print ("UnicodeData.txt uses combination type %d. Conflict." \
% curcombtype)
raise# 产生异常.
if sp[2] != "Lu" and sp[2] != "Ll" and sp[2] != "Lt" and sp[2] != "Lm" \ #这段主要是字符串的比较
and sp[2] != "Lo"\
and sp[2] != "Me" and sp[2] != "Mc" and sp[2] != "Mn" \
and sp[2] != "Nd" and sp[2] != "Nl" and sp[2] != "No" \
and sp[2] != "Pc" and sp[2] != "Pd" and sp[2] != "Ps" \
and sp[2] != "Pe" and sp[2] != "Pi" and sp[2] != "Pf" \
and sp[2] != "Po" \
and sp[2] != "Sm" and sp[2] != "Sc" and sp[2] != "Sk" \
and sp[2] != "So"\
and sp[2] != "Zs" and sp[2] != "Zl" and sp[2] != "Zp" \
and sp[2] != "Cc" and sp[2] != "Cf" and sp[2] != "Cs" \
and sp[2] != "Co":
print ("WARNING: Unknown type %s" % sp[2])
if curcombtype == 0 and sp[2] == "Me":
curcombtype = 253
if curcombtype == 0 and sp[2] == "Mc":
curcombtype = 254
if curcombtype == 0 and sp[2] == "Mn":
curcombtype = 255
if (curcombtype >= 2 and curcombtype <= 6) \
or (curcombtype >= 37 and curcombtype != 84 and curcombtype != 91 and curcombtype != 103 and curcombtype != 107 and curcombtype != 118 and curcombtype != 122 and curcombtype != 129 and curcombtype != 130 and curcombtype != 132 and curcombtype != 202 and \
curcombtype != 214 and curcombtype != 216 and \
curcombtype != 218 and curcombtype != 220 and \
curcombtype != 222 and curcombtype != 224 and curcombtype != 226 and curcombtype != 228 and \
curcombtype != 230 and curcombtype != 232 and curcombtype != 233 and \
curcombtype != 234 and \
curcombtype != 240 and curcombtype != 253 and \
curcombtype != 254 and curcombtype != 255):
print ("WARNING: Unknown combining type %d" % curcombtype)
if curcode in joining:
curjoin = joining[curcode]
elif sp[2] == "Me" or sp[2] == "Mn" or sp[2] == "Cf":
curjoin = "TRANSPARENT"
else:
curjoin = "NONJOINING"
if sp[1].startswith ("ARABIC LETTER "):#以ARABIC LETTER 开始的行
arabname = sp[1][len ("ARABIC LETTER "):]#跳过ARABIC LETTER的长度
form = 0
if arabname.endswith (" ISOLATED FORM"):#以ISOLATED FORM 结尾的行
arabname = arabname[0:len (arabname) - len (" ISOLATED FORM")] #去掉ARABIC LETTER 和 ISOLATED FORM剩下的部分,加入06E2;ARABIC SMALL HIGH MEEM ISOLATED FORM;Mn;230;NSM;;;;;N;;;;; 这里的话arabname=HIGH MEEM
form = 1
if arabname.endswith (" FINAL FORM"):
arabname = arabname[0:len (arabname) - len (" FINAL FORM")]
form = 2
if arabname.endswith (" MEDIAL FORM"):
arabname = arabname[0:len (arabname) - len (" MEDIAL FORM")]
form = 3
if arabname.endswith (" INITIAL FORM"):
arabname = arabname[0:len (arabname) - len (" INITIAL FORM")]
form = 4
if arabname not in arabicsubst:
arabicsubst[arabname]={}
arabicsubst[arabname][form] = curcode;
if form == 0:
arabicsubst[arabname]['join'] = curjoin
if lastcode + 1 != curcode or curbiditype != lastbiditype \
or curcombtype != lastcombtype or curmirrortype != lastmirrortype \
or curjoin != lastjoin:
if begincode != -2 and (lastbiditype != "L" or lastcombtype != 0 or \
lastmirrortype):
outfile.write (("{0x%x, 0x%x, GRUB_BIDI_TYPE_%s, %d, %d, GRUB_JOIN_TYPE_%s},\n" \
% (begincode, lastcode - begincode + 1, \
lastbiditype, \
lastcombtype, lastmirrortype, \
lastjoin)))#往grub-core/unidata.c 中写
if lastcode - begincode + 1 >= 0x200:
print ("Too long range")
raise
begincode = curcode
lastcode = curcode
lastjoin = curjoin
lastbiditype = curbiditype
lastcombtype = curcombtype
lastmirrortype = curmirrortype
if lastbiditype != "L" or lastcombtype != 0 or lastmirrortype:
outfile.write (("{0x%x, 0x%x, GRUB_BIDI_TYPE_%s, %d, %d, GRUB_JOIN_TYPE_%s},\n" \
% (begincode, lastcode, lastbiditype, lastcombtype, \
lastmirrortype, lastjoin)))
outfile.write ("{0, 0, 0, 0, 0, 0},\n")
outfile.write ("};\n")
infile.close ()
echo "Importing unicode..."
${PYTHON} util/import_unicode.py unicode/UnicodeData.txt unicode/BidiMirroring.txt unicode/ArabicShaping.txt grub-core/unidata.c
其中这个util/import_unicode.py 的输入参数有三个,因此是分为3部分来解析的。
第二部分:
infile = open (sys.argv[1], "r") #通过只读方式打开unicode/UnicodeData.txt
outfile = open (sys.argv[4], "w") #通过只写方式打开grub-core/unidata.c
outfile.write ("#include <grub/unicode.h>\n")
outfile.write ("\n")
outfile.write ("struct grub_unicode_compact_range grub_unicode_compact[] = {\n")#这三句就是往grub-core/unidata.c 中写
begincode = -2
lastcode = -2
lastbiditype = "X"
lastmirrortype = False
lastcombtype = -1
arabicsubst = {}
for line in infile:
sp = line.split (";") #以;分割,和第一部分讲的类似.
curcode = int (sp[0], 16)
curcombtype = int (sp[3], 10)
curbiditype = sp[4]
curmirrortype = (sp[9] == "Y")#python 中比较字符串相等直接用的是==
if curcombtype <= 255 and curcombtype >= 253:
print ("UnicodeData.txt uses combination type %d. Conflict." \
% curcombtype)
raise# 产生异常.
if sp[2] != "Lu" and sp[2] != "Ll" and sp[2] != "Lt" and sp[2] != "Lm" \ #这段主要是字符串的比较
and sp[2] != "Lo"\
and sp[2] != "Me" and sp[2] != "Mc" and sp[2] != "Mn" \
and sp[2] != "Nd" and sp[2] != "Nl" and sp[2] != "No" \
and sp[2] != "Pc" and sp[2] != "Pd" and sp[2] != "Ps" \
and sp[2] != "Pe" and sp[2] != "Pi" and sp[2] != "Pf" \
and sp[2] != "Po" \
and sp[2] != "Sm" and sp[2] != "Sc" and sp[2] != "Sk" \
and sp[2] != "So"\
and sp[2] != "Zs" and sp[2] != "Zl" and sp[2] != "Zp" \
and sp[2] != "Cc" and sp[2] != "Cf" and sp[2] != "Cs" \
and sp[2] != "Co":
print ("WARNING: Unknown type %s" % sp[2])
if curcombtype == 0 and sp[2] == "Me":
curcombtype = 253
if curcombtype == 0 and sp[2] == "Mc":
curcombtype = 254
if curcombtype == 0 and sp[2] == "Mn":
curcombtype = 255
if (curcombtype >= 2 and curcombtype <= 6) \
or (curcombtype >= 37 and curcombtype != 84 and curcombtype != 91 and curcombtype != 103 and curcombtype != 107 and curcombtype != 118 and curcombtype != 122 and curcombtype != 129 and curcombtype != 130 and curcombtype != 132 and curcombtype != 202 and \
curcombtype != 214 and curcombtype != 216 and \
curcombtype != 218 and curcombtype != 220 and \
curcombtype != 222 and curcombtype != 224 and curcombtype != 226 and curcombtype != 228 and \
curcombtype != 230 and curcombtype != 232 and curcombtype != 233 and \
curcombtype != 234 and \
curcombtype != 240 and curcombtype != 253 and \
curcombtype != 254 and curcombtype != 255):
print ("WARNING: Unknown combining type %d" % curcombtype)
if curcode in joining:
curjoin = joining[curcode]
elif sp[2] == "Me" or sp[2] == "Mn" or sp[2] == "Cf":
curjoin = "TRANSPARENT"
else:
curjoin = "NONJOINING"
if sp[1].startswith ("ARABIC LETTER "):#以ARABIC LETTER 开始的行
arabname = sp[1][len ("ARABIC LETTER "):]#跳过ARABIC LETTER的长度
form = 0
if arabname.endswith (" ISOLATED FORM"):#以ISOLATED FORM 结尾的行
arabname = arabname[0:len (arabname) - len (" ISOLATED FORM")] #去掉ARABIC LETTER 和 ISOLATED FORM剩下的部分,加入06E2;ARABIC SMALL HIGH MEEM ISOLATED FORM;Mn;230;NSM;;;;;N;;;;; 这里的话arabname=HIGH MEEM
form = 1
if arabname.endswith (" FINAL FORM"):
arabname = arabname[0:len (arabname) - len (" FINAL FORM")]
form = 2
if arabname.endswith (" MEDIAL FORM"):
arabname = arabname[0:len (arabname) - len (" MEDIAL FORM")]
form = 3
if arabname.endswith (" INITIAL FORM"):
arabname = arabname[0:len (arabname) - len (" INITIAL FORM")]
form = 4
if arabname not in arabicsubst:
arabicsubst[arabname]={}
arabicsubst[arabname][form] = curcode;
if form == 0:
arabicsubst[arabname]['join'] = curjoin
if lastcode + 1 != curcode or curbiditype != lastbiditype \
or curcombtype != lastcombtype or curmirrortype != lastmirrortype \
or curjoin != lastjoin:
if begincode != -2 and (lastbiditype != "L" or lastcombtype != 0 or \
lastmirrortype):
outfile.write (("{0x%x, 0x%x, GRUB_BIDI_TYPE_%s, %d, %d, GRUB_JOIN_TYPE_%s},\n" \
% (begincode, lastcode - begincode + 1, \
lastbiditype, \
lastcombtype, lastmirrortype, \
lastjoin)))#往grub-core/unidata.c 中写
if lastcode - begincode + 1 >= 0x200:
print ("Too long range")
raise
begincode = curcode
lastcode = curcode
lastjoin = curjoin
lastbiditype = curbiditype
lastcombtype = curcombtype
lastmirrortype = curmirrortype
if lastbiditype != "L" or lastcombtype != 0 or lastmirrortype:
outfile.write (("{0x%x, 0x%x, GRUB_BIDI_TYPE_%s, %d, %d, GRUB_JOIN_TYPE_%s},\n" \
% (begincode, lastcode, lastbiditype, lastcombtype, \
lastmirrortype, lastjoin)))
outfile.write ("{0, 0, 0, 0, 0, 0},\n")
outfile.write ("};\n")
infile.close ()