由于windows下无法安装jq,需要自己写JSON Loader。这里提供我的代码~
使用的时候需要自行根据自己的需要修改哦,重点是需要学习的部分要放在page_content
里,并且类型为str
。
import json
from pathlib import Path
from typing import List, Optional, Union
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
class JSONLoader(BaseLoader):
def __init__(
self,
file_path: Union[str, Path]
):
self.file_path = Path(file_path).resolve()
def create_documents(self, data):
documents = []
for item in data:
item_str = json.dumps(item, ensure_ascii=False)
document = Document(page_content=item_str, metadata={})
documents.append(document)
return documents
def load(self) -> List[Document]:
"""Load and return documents from the JSON file."""
docs = []
with open(self.file_path, mode="r", encoding="utf-8") as json_file:
try:
data = json.load(json_file)
docs = self.create_documents(data)
except json.JSONDecodeError:
print("Error: Invalid JSON format in the file.")
return docs
file_path = 'csvjson.json'
json_loader = JSONLoader(file_path)
docs = json_loader.load()
print(docs)