读取LMDB文件
env = lmdb.open('/usr/local/project/dataset/lmdb', readonly=True)
txn = env.begin()
count = 1
for key, value in txn.cursor():
k = key.decode('utf-8')
if 'image' in k:
continue
print(key)
print(value)
print(value.decode('utf-8'))
count += 1
if count <= 3:
continue
break
env.close()
创建 LMDB文件
import lmdb
env = lmdb.open('mydatabase.lmdb', map_size=1099511627776)
with env.begin(write=True) as txn:
txn.put(key='my_key'.encode(), value=b'my_value')
env.close()
获取图片流
def get_image_io(url):
with open(url,'rb') as f:
return f.read()
创建lmdb数据集 CVS -> lmdb
import lmdb
import pandas as pd
data = pd.read_csv('ICDAR13_HCTR_Dataset.csv')
imagePathArr = data.ImagePath
labelArr = data.Label
def get_image_io(url):
with open(url,'rb') as f:
return f.read()
env = lmdb.open('mydatabase.lmdb', map_size=314572800)
i = 0
with env.begin(write=True) as txn:
while i < len(data.ImagePath):
i += 1
indexStr = str(i).zfill(9)
imgKey = f'image-{indexStr}'.encode(encoding='UTF-8', errors='strict')
labelKey = f'label-{indexStr}'.encode(encoding='UTF-8', errors='strict')
imgIo = get_image_io(imagePathArr[i - 1])
text = labelArr[i - 1]
text = text.encode(encoding='UTF-8', errors='strict')
txn.put(key=labelKey, value=text)
txn.put(key=imgKey, value=imgIo)
txn.put(key="num-samples".encode(encoding='UTF-8', errors='strict'),value=str(len(imagePathArr)).encode(encoding='UTF-8', errors='strict'))
env.close()