PyPb系列-pb和python交互支持xp(一)-数据库连接
PyPb系列-pb和python交互支持xp(二)-数据库增删查改CRUD
数据窗口导出成psreport格式文件,psr文件包含报表的格式以及数据
"saveastype","psreport"
ds.saveas("100000.psr", psreport, True)
针对导入的数据直接可以用下面的代码直接导入
ds.dataobject = "100000.psr"
import pb
ds = pb.Obj("datastore")
ds.dataobject = "100000.psr"
print( ds.rowcount() )# 输出 10,00000
方式一:传统的数据窗口数据导出成一个json代码 # 耗时 25.122791051864624
import time
starttime = time.time()
import pb
ds = pb.Obj("datastore")
ds.dataobject = "10000.psr"
rowcount = ds.rowcount()# 1万行数据
colcount = int(ds.describe("datawindow.column.count") ) # 128列
# 获取报表中的所有列名
colnamelist = [ds.describe("#%d.name"%(i+1)) for i in range(colcount)]
alldata = [{} for _ in range(rowcount)] # 初始化10万个字典用来存储数据
# 通过行列的遍历取值
for row in range(rowcount):
for col in range(colcount):
name = colnamelist[col]
value = ds.describe("Evaluate('Lookupdisplay(%s)',%d)"%(name,row+1))
alldata[row][name] = value
# 写入json文件到本地
import json
with open("传统方式.json","w") as f:
json.dump(alldata ,f, ensure_ascii=False)
print("耗时", time.time() - starttime ) # 耗时 25.122791051864624
方式二:getfullstate获取报表数据然后用python解析 耗时 6.983316898345947
import time
starttime = time.time()
import pb,struct
gl = pb.Obj()
def getData(ds):
'''getfullstate获取blob数据'''
data = gl.blob("")
d = data.blob_ref()
ds.getfullstate(d)
return data.blob_bytes()
def dwdata(type, bytes ,byteslen):
#char(n),date,datetime,decimal(n),int,long,number,real,time,timestamp及ulong
formatstr = ""
if not bytes : return None
if type=='char(':
formatstr = "%ds" % (byteslen - 1)
return struct.unpack_from(formatstr , bytes ) [0].decode("GBK")
elif type=='date' or type=='datet' or type=='time' or type=='times' :
tm_msec,tm_year,tm_mon,tm_mday,tm_hour,tm_min,tm_sec,tm_filler=struct.unpack_from("ih6b",bytes)
return "{:0>4}-{:0>2}-{:0>2} {:0>2}:{:0>2}:{:0>2}".format(tm_year+1900,tm_mon+1,tm_mday,tm_hour,tm_min,tm_sec)
elif type=='decim' or type=='real':
if byteslen==4:
formatstr = "i"
else:
return gl.dec(struct.unpack_from("8H", bytes ))
elif type=='int' or type=='long' or type=='number':
formatstr = "i"
elif type=='ulong':
print(bytes)
return 'ulong type'
else:
raise Exception("无法识别的类型:%s" % bytes)
return struct.unpack_from(formatstr , bytes ) [0]
def getDAT(b,rowcount,colcount):
offset = 0x10c *(colcount-1)
startrow = b.find(b'DAT08') + 0x224 + offset + 4
for row in range(rowcount):
startrow = b.find(b"\xff\xff\xff\xff",startrow) + 4
for col in range(colcount):
bytedata = None
datalen = 0
if b[startrow:startrow + 4] == b"\xff\xff\xff\xff":
startrow = startrow + 4
else:
datalen = struct.unpack_from("i", b ,startrow)[0]
startrow = startrow + 4
bytedata = struct.unpack_from("%ss"%datalen, b ,startrow )[0]
startrow = startrow + datalen
yield (row,col,bytedata,datalen)
ds = pb.Obj("datastore")
ds.dataobject = "10000.psr"
rowcount = ds.rowcount()# 1万行数据
colcount = int(ds.describe("datawindow.column.count") ) # 128列
# 获取报表中的所有(列名,类型)
colnamelist = [(ds.describe("#%d.name"%(i+1)),ds.describe("#%d.coltype"%(i+1))[0:5]) for i in range(colcount)]
alldata = [{} for _ in range(rowcount)] # 初始化10万个字典用来存储数据
# 通过行列的遍历取值
dwobjdata = getData(ds)
for row,col,bytedata,datalen in getDAT(dwobjdata,rowcount,colcount):
name,type = colnamelist[col]
coldata = dwdata(type,bytedata,datalen)
alldata[row][name] = coldata
# 写入json文件到本地
import json
with open("传统方式fullstate.json","w") as f:
json.dump(alldata ,f, ensure_ascii=False)
print("耗时", time.time() - starttime ) # 耗时 6.983316898345947
如果用pb代码编写,对于文件写入以及json转换部分,还是比较麻烦的,这里就用不用代码来编写了。
关于对psreport文件解析
getfullstate返回的数据行内列数据都是连续的,NULL数据为FFFFFFFF,其他类型数据均是 len,bytes结构,也就是SH_BINARY结构体数据。
typedef struct shBinary
{
ULONG len;
BYTE data[1]; // Pointed to by PSH_BINARY_DATA
} SH_BINARY, HUGEPTR* PSH_BINARY;
但是如果是saveas保存的psr文件,则中间会插入很多的0x200大小的数据块,这部分还没又搞清楚。在以后的文档中再进行补充。