nlu_data数据加载
class RasaFileImporter(TrainingDataImporter):
"""Default `TrainingFileImporter` implementation."""
def __init__(
self,
config_file: Optional[Text] = None,
domain_path: Optional[Text] = None,
training_data_paths: Optional[Union[List[Text], Text]] = None,
training_type: Optional[TrainingType] = TrainingType.BOTH,
):
async def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData:
return utils.training_data_from_paths(self._nlu_files, language)
rasa/shared/importers/utils.py
def training_data_from_paths(paths: Iterable[Text], language: Text) -> TrainingData:
from rasa.shared.nlu.training_data import loading
training_data_sets = [loading.load_data(nlu_file, language) for nlu_file in paths]
return TrainingData().merge(*training_data_sets) # 一个空的TrainingData对象与List[TrainingData]合并
rasa/shared/nlu/training_data/loading.py
def load_data(resource_name: Text, language: Optional[Text] = "en") -> "TrainingData":
"""Load training data from disk.
Merges them if loaded from disk and multiple files are found."""
if not os.path.exists(resource_name):
raise ValueError(f"File '{resource_name}' does not exist.")
if os.path.isfile(resource_name):
files = [resource_name]
else:
files = rasa.shared.utils.io.list_files(resource_name)
data_sets = [_load(f, language) for f in files] ## list[TrainingData对象]
data_sets = [ds for ds in data_sets if ds]
if len(data_sets) == 0:
training_data = TrainingData()
elif len(data_sets) == 1:
training_data = data_sets[0]
else:
training_data = data_sets[0].merge(*data_sets[1:]) # 如果有多个文件需要合并
return training_data
rasa/shared/nlu/training_data/formats/readerwriter.py
class TrainingDataReader:
def __init__(self):
self.filename: Text = ""
def read(self, filename: Union[Text, Path], **kwargs: Any) -> "TrainingData":
"""Reads TrainingData from a file."""
self.filename = filename
return self.reads(rasa.shared.utils.io.read_file(filename), **kwargs)
def reads(self, s: Text, **kwargs: Any) -> "TrainingData":
"""Reads TrainingData from a string."""
raise NotImplementedError
rasa/shared/nlu/training_data/formats/rasa_yaml.py
, 返回的是TrainingData对象
def reads(self, string: Text, **kwargs: Any) -> "TrainingData":
self.validate(string)
yaml_content = rasa.shared.utils.io.read_yaml(string)
if not validation.validate_training_data_format_version(
yaml_content, self.filename
):
return TrainingData()
for key, value in yaml_content.items():
if key == KEY_NLU:
self._parse_nlu(value)
elif key == KEY_RESPONSES:
self.responses = value
return TrainingData(
self.training_examples,
self.entity_synonyms,
self.regex_features,
self.lookup_tables,
self.responses,
)
rasa/shared/nlu/training_data/formats/rasa_yaml.py
def _parse_nlu(self, nlu_data: Optional[List[Dict[Text, Any]]]) -> None:
if not nlu_data:
return
for nlu_item in nlu_data:
if not isinstance(nlu_item, dict):
rasa.shared.utils.io.raise_warning(
f"Unexpected block found in '{self.filename}':\n"
f"{nlu_item}\n"
f"Items under the '{KEY_NLU}' key must be YAML dictionaries. "
f"This block will be skipped.",
docs=DOCS_URL_TRAINING_DATA,
)
continue
if KEY_INTENT in nlu_item.keys():
self._parse_intent(nlu_item)
elif KEY_SYNONYM in nlu_item.keys():
self._parse_synonym(nlu_item)
elif KEY_REGEX in nlu_item.keys():
self._parse_regex(nlu_item)
elif KEY_LOOKUP in nlu_item.keys():
self._parse_lookup(nlu_item)
else:
rasa.shared.utils.io.raise_warning(
f"Issue found while processing '{self.filename}': "
f"Could not find supported key in the section:\n"
f"{nlu_item}\n"
f"Supported keys are: '{KEY_INTENT}', '{KEY_SYNONYM}', "
f"'{KEY_REGEX}', '{KEY_LOOKUP}'. "
f"This section will be skipped.",
docs=DOCS_URL_TRAINING_DATA,
)