Source code for nvm.aux_spacy.set_container_extensions

#!/usr/bin/env python3

import logging
from spacy.tokens import Doc, Span, Token
from typing import Callable, Optional, Dict, Union

# TODOs:
# from typing import Union
# from sklearn.utils import Bunch


[docs]def set_container_extensions_from_dict( container: Union[Doc, Span, Token], fn_dict: Dict[str, Callable], log0: Optional[logging.Logger] = logging.getLogger("dummy"), ): """Update Token extensions based on dictionary of functions. Parameters ---------- container : Union[Doc, Span, Token] SpaCy container to set extensions to (``Doc``, ``Span`` or ``Token``). fn_dict : Dict[str, Callable] Dictionary of functions. Extensions will be named after keys in the dictionary. log0 : Optional[logging.Logger] Logger (optional) Examples -------- For ``Token`` >>> import spacy >>> from spacy.tokens import Token >>> from nvm.aux_spacy import set_container_extensions_from_dict >>> >>> tok_fn_dict = dict( >>> is_good_color=lambda token: token.text in ("black", "blue", "orange"), >>> is_good_fruit=lambda token: token.text in ("apple", "pear", "orange"), >>> ) >>> set_container_extensions_from_dict(Token, fn_dict=tok_fn_dict) >>> >>> nlp = spacy.blank("en") >>> doc = nlp("I have an orange orange and a pink apple in the black box") >>> assert doc[3]._.is_good_fruit >>> assert doc[3]._.is_good_color >>> assert doc[4]._.is_good_fruit >>> assert doc[4]._.is_good_color >>> assert not doc[7]._.is_good_color >>> assert doc[8]._.is_good_fruit >>> assert doc[11]._.is_good_color >>> assert not doc[12]._.is_good_fruit >>> assert not doc[12]._.is_good_color For ``Doc`` >>> import spacy >>> from spacy.tokens import Doc >>> from nvm.aux_spacy import set_container_extensions_from_dict >>> >>> doc_fn_dict = dict( >>> has_good_fruit=lambda doc: any( >>> fruit in doc.text for fruit in ("apple", "pear", "orange") >>> ), >>> has_good_color=lambda doc: any( >>> color in doc.text for color in ("black", "blue", "orange") >>> ), >>> ) >>> set_container_extensions_from_dict(Doc, fn_dict=doc_fn_dict) >>> >>> nlp = spacy.blank("en") >>> doc = nlp("I have an orange orange and a pink apple in the black box") >>> assert doc._.has_good_fruit >>> assert doc._.has_good_color >>> doc = nlp("No fruits and no colors") >>> assert not doc._.has_good_fruit >>> assert not doc._.has_good_color For ``Span`` >>> import spacy >>> from spacy.tokens import Span >>> from nvm.aux_spacy import set_container_extensions_from_dict >>> >>> spn_fn_dict = dict( >>> has_good_fruit=lambda span: any( >>> fruit in span.text for fruit in ("apple", "pear", "orange") >>> ), >>> has_good_color=lambda span: any( >>> color in span.text for color in ("black", "blue", "orange") >>> ), >>> ) >>> set_container_extensions_from_dict(Span, fn_dict=spn_fn_dict) >>> >>> nlp = spacy.blank("en") >>> doc = nlp("I have an orange orange and a pink apple in the black box") >>> assert doc[0:4]._.has_good_fruit >>> assert doc[0:4]._.has_good_color >>> assert not doc[0:2]._.has_good_fruit >>> assert not doc[0:2]._.has_good_color """ for key1, val1 in fn_dict.items(): log0.debug(f"Adding {container!r} extension {key1!r}") if container.has_extension(key1): log0.warning(f"{container!r} extension {key1!r} was replaced.") container.remove_extension(key1) container.set_extension(key1, getter=val1)