使用SQLServer的bcp工具生成了格式文件,现在要修改格式文件符合要导入的文本数据。数据文件内容如下(因为太长,已省略部分行)。
xmlns="http://schemas.microsoft.com/sqlserver/2004/bulkload/format"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="24"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="24"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR=","
MAX_LENGTH="30"/>
xsi:type="CharTerm" TERMINATOR="\r\n"
MAX_LENGTH="30"/>
NAME="起点日期" xsi:type="SQLDATETIME"/>
NAME="终点日期" xsi:type="SQLDATETIME"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
NAME="MAX(D)" xsi:type="SQLFLT8"/>
NAME="MIN(D)" xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
xsi:type="SQLFLT8"/>
NAME="M156" xsi:type="SQLFLT8"/>
NAME="M157" xsi:type="SQLFLT8"/>
NAME="M158" xsi:type="SQLFLT8"/>
NAME="M159" xsi:type="SQLFLT8"/>
NAME="M160" xsi:type="SQLFLT8"/>
现在要做的就是把每行的前面一个数字变成它本身减1的值,因为字段数太多,无法手工完成,虽然有很多工具可以完成这个任务,不过为了重新练下手,以下使用Python来对它作修改。
因为生成的文件是Unicode编码,所以在读取时,必须注意编码解码。对数据的搜索主要通过正则表达式来完成。
# encoding=utf-8
import re
import codecs
import os
def getformat(filename):
r = codecs.open(filename,
'w', 'utf-16-le');
with
codecs.open('datafmt.xml', 'r', 'utf-16-le') as f:
for item in f:
m =
re.search('[
if m
is None:
r.write(item)
continue
num =
int(m.group(1)) - 1
s =
re.sub(m.group(1), str(num), item) + os.linesep
r.write(s)
r.close()
if __name__ == '__main__':
getformat('fmt.xml')