# 需要导入模块: from spacy.tokens import Span [as 别名]
# 或者: from spacy.tokens.Span import set_extension [as 别名]
def __init__(
self,
nlp,
language="en_clinical",
ent_types=list(),
pseudo_negations=list(),
preceding_negations=list(),
following_negations=list(),
termination=list(),
chunk_prefix=list(),
):
if not language in LANGUAGES:
raise KeyError(
f"{language} not found in languages termset. "
"Ensure this is a supported language or specify "
"your own termsets when initializing Negex."
)
termsets = LANGUAGES[language]
if not Span.has_extension("negex"):
Span.set_extension("negex", default=False, force=True)
if not pseudo_negations:
if not "pseudo_negations" in termsets:
raise KeyError("pseudo_negations not specified for this language.")
pseudo_negations = termsets["pseudo_negations"]
if not preceding_negations:
if not "preceding_negations" in termsets:
raise KeyError("preceding_negations not specified for this language.")
preceding_negations = termsets["preceding_negations"]
if not following_negations:
if not "following_negations" in termsets:
raise KeyError("following_negations not specified for this language.")
following_negations = termsets["following_negations"]
if not termination:
if not "termination" in termsets:
raise KeyError("termination not specified for this language.")
termination = termsets["termination"]
# efficiently build spaCy matcher patterns
self.pseudo_patterns = list(nlp.tokenizer.pipe(pseudo_negations))
self.preceding_patterns = list(nlp.tokenizer.pipe(preceding_negations))
self.following_patterns = list(nlp.tokenizer.pipe(following_negations))
self.termination_patterns = list(nlp.tokenizer.pipe(termination))
self.matcher = PhraseMatcher(nlp.vocab, attr="LOWER")
self.matcher.add("pseudo", None, *self.pseudo_patterns)
self.matcher.add("Preceding", None, *self.preceding_patterns)
self.matcher.add("Following", None, *self.following_patterns)
self.matcher.add("Termination", None, *self.termination_patterns)
self.nlp = nlp
self.ent_types = ent_types
self.chunk_prefix = list(nlp.tokenizer.pipe(chunk_prefix))