import pyndri
import sys
index_path='../../Dataset/Robust2004/robust2004_idx'
max_doc_length=5
with pyndri.open(index_path) as index:
for document_id in range(index.document_base(), index.maximum_document()):
if index.document_length(document_id) < max_doc_length:
print(index.document(document_id)[0])
>>> index.document_base()
1
>>> index.maximum_document()
528156
>>> document_id=5
>>> index.document_length(document_id)
329