nvm.aux_spacy.factories package
Submodules
nvm.aux_spacy.factories.get_doc_basic_metrics module
- nvm.aux_spacy.factories.get_doc_basic_metrics.get_doc_basic_metrics_component(nlp, name, log0)[source]
Get Doc basic metrics.
Examples
>>> import spacy >>> from dframcy import DframCy >>> >>> from nvm import disp_df >>> from nvm.aux_spacy import get_doc_basic_metrics_component >>> >>> nlp = spacy.load("en_core_web_sm") >>> nlp.add_pipe("get_doc_basic_metrics", "BASIC") >>> >>> dframcy = DframCy(nlp) >>> >>> doc = dframcy.nlp( >>> "This sentence contains two verbs and this is how many verbs should be found." >>> ) >>> >>> df0 = dframcy.to_dataframe( >>> doc, >>> columns=["text", "lemma_", "is_alpha", "pos_", "tag_", "is_sent_start"], >>> custom_attributes=tok_exts[:12], >>> ) >>> disp_df(df0)
nvm.aux_spacy.factories.get_doc_count_of_dict_items module
- nvm.aux_spacy.factories.get_doc_count_of_dict_items.get_doc_count_of_dict_items_component(nlp, name, dict0, prefix, suffix, exclude, pos, tag, log0)[source]
- class nvm.aux_spacy.factories.get_doc_count_of_dict_items.CountDictItemsComponent(nlp, dict0, name, prefix=None, suffix=None, exclude=None, pos=None, tag=None, log0=<Logger dummy (WARNING)>)[source]
Bases:
objectGet counts of items from arbitrary LIWC-like dictionary.
Examples
>>> from nvm import disp_df >>> from nvm import Log0 >>> logZ = Log0() >>> log0 = logZ.logger >>> >>> import textwrap >>> import srsly >>> import spacy >>> from spacy.tokens.underscore import Underscore >>> >>> from dframcy import DframCy >>> >>> from nvm import jsonable >>> from nvm.aux_spacy import get_doc_count_of_dict_items_component >>> from nvm.aux_spacy import get_doc_summary_dict_component >>> >>> dict0 = {"pos": ["good", "marvel*"], "neg": ["bad", "awful*"]} >>> >>> config0 = dict( >>> dict0=dict0, >>> ) >>> config1 = dict( >>> dict0=dict0, >>> pos = ["PROPN"], >>> ) >>> nlp = spacy.load("en_core_web_sm") >>> >>> nlp.add_pipe("get_doc_count_of_dict_items", "LEX0", config=config0) >>> nlp.add_pipe("get_doc_count_of_dict_items", "LEX1", config=config1) >>> nlp.add_pipe("get_doc_summary_dict", "SUMMARY") >>> >>> dframcy = DframCy(nlp) >>> >>> doc = dframcy.nlp( >>> "GoOd. Bad Good WhatEver Awful Marvelous." >>> "toobad not-marvelous unmarvel goodyear badZ bAD." >>> "Bad Bad WhatEver Awful Marvelous." >>> ) >>> >>> tok_exts = list(Underscore.token_extensions.keys()) >>> doc_exts = list(Underscore.doc_extensions.keys()) >>> >>> df0 = dframcy.to_dataframe( >>> doc, >>> columns=["text", "lemma_", "pos_", "tag_"], >>> custom_attributes=tok_exts[:12], >>> ) >>> disp_df(df0) >>> >>> print(nlp.pipe_names) >>> print(tok_exts) >>> print(doc_exts) >>> >>> print(textwrap.indent(srsly.yaml_dumps(jsonable(dict(doc._.SUMMARY))), ' '))
nvm.aux_spacy.factories.get_doc_sentences module
- nvm.aux_spacy.factories.get_doc_sentences.get_doc_sentences_as_list_component(nlp, name, log0)[source]
Get document sentences as a list.
Examples
>>> import spacy >>> from nvm.aux_spacy import get_doc_sentences_as_list_component >>> # nlp = spacy.blank("en") >>> nlp = spacy.load("en_core_web_sm") >>> nlp.add_pipe("get_doc_sentences_as_list", "SENTS") >>> doc = nlp("This is the first sentence. This is the second sentence.") >>> assert len(doc._.sents) == 2 >>> doc._.sents ['This is the first sentence.', 'This is the second sentence.']
nvm.aux_spacy.factories.get_doc_summary_dict module
- nvm.aux_spacy.factories.get_doc_summary_dict.get_doc_summary_dict_component(nlp, name, exclude, add_text, log0)[source]
Get underscore attributes as dictionary.
Important
CAUTION: Add this to
nlp.pipeafter elements that need to be in the summary dictionary.Examples
>>> import textwrap >>> import srsly >>> import spacy >>> from nvm import jsonable >>> from nvm.aux_spacy import get_doc_summary_dict_component >>> from nvm.aux_spacy import get_doc_basic_metrics_component >>> >>> nlp = spacy.load("en_core_web_sm") >>> nlp.add_pipe("get_doc_basic_metrics", "BASIC") >>> nlp.add_pipe("get_doc_summary_dict", "SUMMARY", last=True) # Add AFTER other elements >>> >>> doc = nlp("This is the first sentence. This is the second sentence.") >>> >>> print(textwrap.indent(srsly.yaml_dumps(jsonable(dict(doc._.SUMMARY))), ' '))
nvm.aux_spacy.factories.get_doc_word_count module
- nvm.aux_spacy.factories.get_doc_word_count.get_doc_word_count_component(nlp, name, log0)[source]
Get Doc word count.
Examples
>>> import spacy >>> from nvm.aux_spacy import get_doc_word_count_component >>> nlp = spacy.load("en_core_web_sm") >>> nlp.add_pipe("get_doc_word_count", "WC") >>> doc = nlp("One two three four five.") >>> assert doc._.word_count == 5 >>> doc._.word_count