import re
from functools import partial
from typing import Any
from typing import Callable
from typing import Mapping
from typing import Optional
from typing import Sequence
[docs]def preset(*,
intent_map: Mapping[str, str] = None,
entities: Optional[Sequence[str]] = None
) -> Callable:
"""Configures the Pipeline for LUIS test format.
Adheres to: https://docs.microsoft.com/en-us/azure/cognitive-services/luis/luis-tutorial-batch-testing.
This function should be used as the 'preset' argument of putput.Pipeline instead of
the 'LUIS' str to specify intents and entities.
Examples:
>>> import json
>>> from pathlib import Path
>>> from putput.pipeline import Pipeline
>>> from pprint import pprint
>>> import random
>>> random.seed(0)
>>> pattern_folder = Path(__file__).parent.parent.parent / 'tests' / 'doc'
>>> pattern_def_path = pattern_folder / 'example_pattern_definition_with_intents.yml'
>>> dynamic_token_patterns_map = {'ITEM': ('fries',)}
>>> p = Pipeline.from_preset('LUIS',
... pattern_def_path,
... dynamic_token_patterns_map=dynamic_token_patterns_map)
>>> for luis_result in p.flow(disable_progress_bar=True):
... print(json.dumps(luis_result, sort_keys=True))
... break
{"entities": [{"endPos": 16, "entity": "ITEM", "startPos": 12},
{"endPos": 34, "entity": "ITEM", "startPos": 30},
{"endPos": 44, "entity": "ITEM", "startPos": 40}],
"intent": "ADD_INTENT",
"text": "can she get fries can she get fries and fries"}
Args:
intent_map: A mapping from an utterance pattern string to a single intent.
The value '__DISCARD' is reserved.
entities: A sequence of tokens that are considered entities. To make all tokens
entities, give a list with only the value '__ALL'. E.g. entities=['_ALL']
Returns:
A Callable that when called returns parameters for instantiating a Pipeline.
This Callable can be passed into putput.Pipeline as the 'preset' argument.
"""
if intent_map:
if '__DISCARD' in set(intent_map.values()):
raise ValueError('__DISCARD is a reserved value.')
return partial(_preset,
intent_map=intent_map,
entities=entities)
def _preset(intent_map: Optional[Mapping[str, str]], # pylint: disable=W0613
entities: Optional[Sequence[str]],
__intent_map_from_pipeline: Mapping[str, str],
__entities_from_pipeline: Sequence[str],
**kwargs: Any
) -> Mapping:
# only override if caller does not specify intent_map/entities
if not intent_map and __intent_map_from_pipeline is not None:
intent_map = __intent_map_from_pipeline
if not entities and __entities_from_pipeline is not None:
entities = __entities_from_pipeline
combo_hooks_map = {}
# Combo hook per intent
for pattern, intent in intent_map.items():
combo_hooks_map[pattern] = (partial(_handle_intents_and_entities, intent=intent, entities=entities),)
# Handle entities and no intent
if entities and not intent_map:
combo_hooks_map['DEFAULT'] = (partial(_handle_intents_and_entities, intent=None, entities=entities),)
else:
# Default case
combo_hooks_map['DEFAULT'] = (partial(_handle_intents_and_entities, intent='__DISCARD', entities=entities),)
return {
'combo_hooks_map': combo_hooks_map
}
def _convert_to_luis_entities(utterance: str,
entities: Sequence[str],
handled_items: Sequence[str]
) -> Sequence[Mapping]:
ents = []
offset = 0
for handled_item in handled_items:
label = _token_extractor(handled_item)
phrase = ' '.join(re.findall(r'\(([^()]+)\)', handled_item))
start = offset + utterance[offset:].index(phrase)
end = start + len(phrase) - 1
if label in entities:
ent = {
'entity': label,
'startPos': start,
'endPos': end
}
ents.append(ent)
offset = end
return ents
def _token_extractor(handled_item: str) -> str:
return handled_item[handled_item.index('[') + 1: handled_item.index('(')]
def _handle_intents_and_entities(utterance: str,
handled_tokens: Sequence[str],
_: Sequence[str],
*,
intent: Optional[str] = None,
entities: Sequence[str]
) -> Optional[Mapping]:
if intent == '__DISCARD':
return None
if intent is None:
intent = 'None'
if len(entities) == 1 and entities[0] == '__ALL':
entities = list(map(_token_extractor, handled_tokens))
luis_entities = _convert_to_luis_entities(utterance, entities, handled_tokens)
return {
'text': utterance,
'intent': intent,
'entities': luis_entities
}