# -*- coding: utf-8 -*-
"""A module holding the :class:`IdentifierParser`."""
import logging
from collections import defaultdict
from typing import Mapping, Optional, Pattern, Set
from pyparsing import ParseResults, Suppress
from .baseparser import BaseParser
from .constants import NamespaceTermEncodingMapping
from .exc import (
MissingDefaultNameWarning, MissingNamespaceNameWarning, MissingNamespaceRegexWarning, NakedNameWarning,
UndefinedNamespaceWarning,
)
from .utils import quote, word
from ..constants import DIRTY, IDENTIFIER, NAME, NAMESPACE
__all__ = [
'ConceptParser',
]
logger = logging.getLogger(__name__)
[docs]class ConceptParser(BaseParser):
"""A parser for concepts in the form of ``namespace:name`` or ``namespace:identifier!name``.
Can be made more lenient when given a default namespace or enabling the use of naked names.
"""
def __init__(
self,
namespace_to_term_to_encoding: Optional[NamespaceTermEncodingMapping] = None,
namespace_to_pattern: Optional[Mapping[str, Pattern]] = None,
default_namespace: Optional[Set[str]] = None,
allow_naked_names: bool = False,
) -> None:
"""Initialize the concept parser.
:param namespace_to_term_to_encoding: A dictionary of {namespace: {(identifier, name): encoding}}
:param namespace_to_pattern: A dictionary of {namespace: regular expression string} to compile
:param default_namespace: A set of strings that can be used without a namespace
:param allow_naked_names: If true, turn off naked namespace failures
"""
self.identifier_fqualified = (
word(NAMESPACE)
+ Suppress(':')
+ (word | quote)(IDENTIFIER)
+ Suppress('!')
+ (word | quote)(NAME)
)
self.identifier_qualified = word(NAMESPACE) + Suppress(':') + (word | quote)(NAME)
if namespace_to_term_to_encoding is not None:
self.namespace_to_name_to_encoding = defaultdict(dict)
self.namespace_to_identifier_to_encoding = defaultdict(dict)
for namespace, term_mapping in namespace_to_term_to_encoding.items():
for (identifier, name), encoding in term_mapping.items():
self.namespace_to_name_to_encoding[namespace][name] = encoding
self.namespace_to_identifier_to_encoding[namespace][identifier] = encoding
self.namespace_to_name_to_encoding = dict(self.namespace_to_name_to_encoding)
self.namespace_to_identifier_to_encoding = dict(self.namespace_to_identifier_to_encoding)
self.identifier_fqualified.setParseAction(self.handle_identifier_qualified)
self.identifier_qualified.setParseAction(self.handle_identifier_qualified)
else:
self.namespace_to_name_to_encoding = {}
self.namespace_to_identifier_to_encoding = {}
self.namespace_to_pattern = namespace_to_pattern or {}
self.default_namespace = set(default_namespace) if default_namespace is not None else None
self.allow_naked_names = allow_naked_names
self.identifier_bare = (word | quote)(NAME)
self.identifier_bare.setParseAction(
self.handle_namespace_default if self.default_namespace else
self.handle_namespace_lenient if self.allow_naked_names else
self.handle_namespace_invalid,
)
super().__init__(
self.identifier_fqualified | self.identifier_qualified | self.identifier_bare,
)
[docs] def has_enumerated_namespace(self, namespace: str) -> bool:
"""Check that the namespace has been defined by an enumeration."""
return namespace in self.namespace_to_name_to_encoding
[docs] def has_regex_namespace(self, namespace: str) -> bool:
"""Check that the namespace has been defined by a regular expression."""
return namespace in self.namespace_to_pattern
[docs] def has_namespace(self, namespace: str) -> bool:
"""Check that the namespace has either been defined by an enumeration or a regular expression."""
return self.has_enumerated_namespace(namespace) or self.has_regex_namespace(namespace)
[docs] def has_enumerated_namespace_name(self, namespace: str, name: str) -> bool:
"""Check that the namespace is defined by an enumeration and that the name is a member."""
return self.has_enumerated_namespace(namespace) and name in self.namespace_to_name_to_encoding[namespace]
[docs] def has_regex_namespace_name(self, namespace: str, name: str) -> bool:
"""Check that the namespace is defined as a regular expression and the name matches it."""
return self.has_regex_namespace(namespace) and self.namespace_to_pattern[namespace].match(name)
[docs] def has_namespace_name(self, line: str, position: int, namespace: str, name: str) -> bool:
"""Check that the namespace is defined and has the given name."""
self.raise_for_missing_namespace(line, position, namespace, name)
return self.has_enumerated_namespace_name(namespace, name) or self.has_regex_namespace_name(namespace, name)
[docs] def raise_for_missing_namespace(self, line: str, position: int, namespace: str, name: str) -> None:
"""Raise an exception if the namespace is not defined."""
if not self.has_namespace(namespace):
raise UndefinedNamespaceWarning(self.get_line_number(), line, position, namespace, name)
[docs] def raise_for_missing_name(self, line: str, position: int, namespace: str, name: str) -> None:
"""Raise an exception if the namespace is not defined or if it does not validate the given name."""
self.raise_for_missing_namespace(line, position, namespace, name)
if self.has_enumerated_namespace(namespace) and not self.has_enumerated_namespace_name(namespace, name):
raise MissingNamespaceNameWarning(self.get_line_number(), line, position, namespace, name)
if self.has_regex_namespace(namespace) and not self.has_regex_namespace_name(namespace, name):
raise MissingNamespaceRegexWarning(self.get_line_number(), line, position, namespace, name)
[docs] def raise_for_missing_default(self, line: str, position: int, name: str) -> None:
"""Raise an exception if the name does not belong to the default namespace."""
if not self.default_namespace:
raise ValueError('Default namespace is not set')
if name not in self.default_namespace:
raise MissingDefaultNameWarning(self.get_line_number(), line, position, name)
[docs] def handle_identifier_qualified(self, line: str, position: int, tokens: ParseResults) -> ParseResults:
"""Handle parsing a qualified identifier."""
namespace, name = tokens[NAMESPACE], tokens[NAME]
self.raise_for_missing_namespace(line, position, namespace, name)
self.raise_for_missing_name(line, position, namespace, name)
return tokens
[docs] def handle_namespace_default(self, line: str, position: int, tokens: ParseResults) -> ParseResults:
"""Handle parsing an identifier for the default namespace."""
name = tokens[NAME]
self.raise_for_missing_default(line, position, name)
return tokens
[docs] @staticmethod
def handle_namespace_lenient(line: str, position: int, tokens: ParseResults) -> ParseResults:
"""Handle parsing an identifier for names missing a namespace that are outside the default namespace."""
tokens[NAMESPACE] = DIRTY
logger.debug('Naked namespace: [%d] %s', position, line)
return tokens
[docs] def handle_namespace_invalid(self, line: str, position: int, tokens: ParseResults) -> None:
"""Raise an exception when parsing a name missing a namespace."""
name = tokens[NAME]
raise NakedNameWarning(self.get_line_number(), line, position, name)