Source code for pybel.language

# -*- coding: utf-8 -*-

"""Language constants for BEL.

This module contains mappings between PyBEL's internal constants and BEL language keywords.
"""

from typing import Optional

from .constants import (
    ABUNDANCE, BEL_DEFAULT_NAMESPACE, BIOPROCESS, CELL_SURFACE, COMPLEX, COMPOSITE, EXTRACELLULAR, GENE, IDENTIFIER,
    INTRACELLULAR, MIRNA, NAME, NAMESPACE, PATHOLOGY, PROTEIN, RNA, TRANSCRIBED_TO, TRANSLATED_TO,
)
from .utils import ensure_quotes


[docs]class Entity(dict): """Represents a named entity with a namespace and name/identifier.""" def __init__( self, *, namespace: str, name: Optional[str] = None, identifier: Optional[str] = None ) -> None: """Create a dictionary representing a reference to an entity. :param namespace: The namespace to which the entity belongs :param name: The name of the entity :param identifier: The identifier of the entity in the namespace """ if name is None and identifier is None: raise ValueError('cannot create an entity with neither a name nor identifier') if not isinstance(namespace, str): raise TypeError('namespace should be a string: {}'.format(namespace)) if not namespace: raise ValueError('namespace should be non-empty') super().__init__({ NAMESPACE: namespace, }) if name is not None: if not isinstance(name, str): raise TypeError('name should be a string: {}'.format(name)) if not name: raise ValueError('name should be non-empty') self[NAME] = name if identifier is not None: if not isinstance(identifier, str): raise TypeError('identifier should be a string {}'.format(identifier)) if not identifier: raise ValueError('identifier shold be non-empty') self[IDENTIFIER] = identifier @property def namespace(self) -> str: # noqa: D401 """The entity's namespace.""" return self[NAMESPACE] @property def name(self) -> str: # noqa: D401 """The entity's name or label.""" return self.get(NAME) @property def identifier(self) -> str: # noqa: D401 """The entity's identifier.""" return self.get(IDENTIFIER) @property def curie(self) -> str: """Return this entity as a CURIE.""" if self[NAMESPACE] == BEL_DEFAULT_NAMESPACE: return self[NAME] return '{}:{}'.format( self.namespace, ensure_quotes(self.identifier if self.identifier else self.name), ) @property def obo(self) -> str: """Return this entity as an OBO-style CURIE.""" return '{}:{} ! {}'.format( self.namespace, ensure_quotes(self.identifier), ensure_quotes(self.name), ) def __str__(self): # noqa: D105 return self.curie
#: A dictionary of activity labels used in the ma() function in activity(p(X), ma(Y)) activity_labels = { 'catalyticActivity': 'cat', 'cat': 'cat', 'chaperoneActivity': 'chap', 'chap': 'chap', 'gtpBoundActivity': 'gtp', 'gtp': 'gtp', 'kinaseActivity': 'kin', 'kin': 'kin', 'peptidaseActivity': 'pep', 'pep': 'pep', 'phosphataseActivity': 'phos', 'phos': 'phos', 'ribosylationActivity': 'ribo', 'ribo': 'ribo', 'transcriptionalActivity': 'tscript', 'tscript': 'tscript', 'transportActivity': 'tport', 'tport': 'tport', 'molecularActivity': 'molecularActivity', # Added by PyBEL 'guanineNucleotideExchangeFactorActivity': 'gef', 'gef': 'gef', 'gtpaseActivatingProteinActivity': 'gap', 'gap': 'gap', } #: Maps the default BEL molecular activities to Gene Ontology Molecular Functions activity_mapping = { 'cat': Entity(namespace='go', name='catalytic activity', identifier='0003824'), 'chap': Entity(namespace='go', name='protein binding involved in protein folding', identifier='0044183'), 'gtp': Entity(namespace='go', name='GTP binding', identifier='0005525'), 'kin': Entity(namespace='go', name='kinase activity', identifier='0016301'), 'pep': Entity(namespace='go', name='peptidase activity', identifier='0008233'), 'phos': Entity(namespace='go', name='phosphatase activity', identifier='0016791'), 'ribo': Entity( namespace='go', name='NAD(P)+-protein-arginine ADP-ribosyltransferase activity', identifier='0003956', ), 'tscript': Entity( namespace='go', name='nucleic acid binding transcription factor activity', identifier='0001071', ), 'tport': Entity(namespace='go', name='transporter activity', identifier='0005215'), 'molecularActivity': Entity(namespace='go', name='molecular_function', identifier='0003674'), 'gef': Entity(namespace='go', name='guanyl-nucleotide exchange factor activity', identifier='0005085'), 'gap': Entity(namespace='go', name='GTPase activating protein binding', identifier='0032794'), } activities = list(activity_labels.keys()) cytoplasm = Entity(name='cytoplasm', namespace='go', identifier='0005737') nucleus = Entity(name='nucleus', namespace='go', identifier='0005634') #: Maps the default BEL cellular components to Gene Ontology Cellular Components compartment_mapping = { INTRACELLULAR: Entity(name='intracellular', namespace='go', identifier='0005622'), EXTRACELLULAR: Entity(name='extracellular space', namespace='go', identifier='0005615'), CELL_SURFACE: Entity(name='cell surface', namespace='go', identifier='0009986'), 'cytoplasm': cytoplasm, 'nucleus': nucleus, } compartments = list(compartment_mapping) #: Provides a mapping from BEL terms to PyBEL internal constants abundance_labels = { 'abundance': ABUNDANCE, 'a': ABUNDANCE, 'geneAbundance': GENE, 'g': GENE, 'microRNAAbundance': MIRNA, 'm': MIRNA, 'proteinAbundance': PROTEIN, 'p': PROTEIN, 'rnaAbundance': RNA, 'r': RNA, 'biologicalProcess': BIOPROCESS, 'bp': BIOPROCESS, 'pathology': PATHOLOGY, 'path': PATHOLOGY, 'composite': COMPOSITE, 'compositeAbundance': COMPOSITE, 'complex': COMPLEX, 'complexAbundance': COMPLEX, } #: Maps the BEL abundance types to the Systems Biology Ontology abundance_sbo_mapping = { MIRNA: Entity(namespace='sbo', name='microRNA', identifier='0000316'), BIOPROCESS: Entity(namespace='sbo', name='process', identifier='0000375'), GENE: Entity(namespace='sbo', name='gene', identifier='0000243'), RNA: Entity(namespace='sbo', name='messenger RNA', identifier='0000278'), COMPLEX: Entity(namespace='sbo', name='protein complex', identifier='0000297'), PATHOLOGY: Entity(namespace='sbo', name='phenotype', identifier='0000358'), } relation_sbo_mapping = { TRANSLATED_TO: Entity(namespace='sbo', name='translation', identifier='0000184'), TRANSCRIBED_TO: Entity(namespace='sbo', name='transcription', identifier='0000183'), } amino_acid_dict = { 'A': 'Ala', 'R': 'Arg', 'N': 'Asn', 'D': 'Asp', 'C': 'Cys', 'E': 'Glu', 'Q': 'Gln', 'G': 'Gly', 'H': 'His', 'I': 'Ile', 'L': 'Leu', 'K': 'Lys', 'M': 'Met', 'F': 'Phe', 'P': 'Pro', 'S': 'Ser', 'T': 'Thr', 'W': 'Trp', 'Y': 'Tyr', 'V': 'Val', } dna_nucleotide_labels = { 'A': 'Adenine', 'T': 'Thymine', 'C': 'Cytosine', 'G': 'Guanine', } rna_nucleotide_labels = { 'a': 'adenine', 'u': 'uracil', 'c': 'cytosine', 'g': 'guanine', } #: A dictionary of default protein modifications to their preferred value pmod_namespace = { 'Ac': 'Ac', 'acetylation': 'Ac', 'ADPRib': 'ADPRib', 'ADP-ribosylation': 'ADPRib', 'adenosine diphosphoribosyl': 'ADPRib', 'Farn': 'Farn', 'farnesylation': 'Farn', 'Gerger': 'Gerger', 'geranylgeranylation': 'Gerger', 'Glyco': 'Glyco', 'glycosylation': 'Glyco', 'Hy': 'Hy', 'hydroxylation': 'Hy', 'ISG': 'ISG', 'ISGylation': 'ISG', 'ISG15-protein conjugation': 'ISG', 'Me': 'Me', 'methylation': 'Me', 'Me1': 'Me1', 'monomethylation': 'Me1', 'mono-methylation': 'Me1', 'Me2': 'Me2', 'dimethylation': 'Me2', 'di-methylation': 'Me2', 'Me3': 'Me3', 'trimethylation': 'Me3', 'tri-methylation': 'Me3', 'Myr': 'Myr', 'myristoylation': 'Myr', 'Nedd': 'Nedd', 'neddylation': 'Nedd', 'NGlyco': 'NGlyco', 'N-linked glycosylation': 'NGlyco', 'NO': 'NO', 'Nitrosylation': 'NO', 'OGlyco': 'OGlyco', 'O-linked glycosylation': 'OGlyco', 'Palm': 'Palm', 'palmitoylation': 'Palm', 'Ph': 'Ph', 'phosphorylation': 'Ph', 'Sulf': 'Sulf', 'sulfation': 'Sulf', 'sulphation': 'Sulf', 'sulfur addition': 'Sulf', 'sulphur addition': 'Sulf', 'sulfonation': 'sulfonation', 'sulphonation': 'sulfonation', 'Sumo': 'Sumo', 'SUMOylation': 'Sumo', 'Ub': 'Ub', 'ubiquitination': 'Ub', 'ubiquitinylation': 'Ub', 'ubiquitylation': 'Ub', 'UbK48': 'UbK48', 'Lysine 48-linked polyubiquitination': 'UbK48', 'UbK63': 'UbK63', 'Lysine 63-linked polyubiquitination': 'UbK63', 'UbMono': 'UbMono', 'monoubiquitination': 'UbMono', 'UbPoly': 'UbPoly', 'polyubiquitination': 'UbPoly', # PyBEL Variants 'Ox': "Ox", 'oxidation': 'Ox', } #: Use Gene Ontology children of go_0006464: "cellular protein modification process" pmod_mappings = { 'Ac': { 'synonyms': ['Ac', 'acetylation'], 'xrefs': [ Entity(namespace='go', identifier='0006473', name='protein acetylation'), Entity(namespace='mod', identifier='00394', name='acetylated residue'), Entity(namespace='mop', identifier='0000030', name='acetylation'), Entity(namespace='sbo', identifier='0000215', name='acetylation'), ], }, 'ADPRib': { 'synonyms': ['ADPRib', 'ADP-ribosylation', 'ADPRib', 'ADP-rybosylation', 'adenosine diphosphoribosyl'], 'xrefs': [ Entity(namespace='go', identifier='0006471', name='protein ADP-ribosylation'), Entity( namespace='mod', identifier='00752', name='adenosine diphosphoribosyl (ADP-ribosyl) modified residue', ), Entity(namespace='mop', identifier='0000220', name='adenosinediphosphoribosylation'), ], }, 'Farn': { 'synonyms': ['Farn', 'farnesylation'], 'xrefs': [ Entity(namespace='go', identifier='0018343', name='protein farnesylation'), Entity(namespace='mod', identifier='00437', name='farnesylated residue'), Entity(namespace='mop', identifier='0000429', name='farnesylation'), ], }, 'Gerger': { 'synonyms': ['Gerger', 'geranylgeranylation'], 'xrefs': [ Entity(namespace='go', identifier='0018344', name='protein geranylgeranylation'), Entity(namespace='mod', identifier='00441', name='geranylgeranylated residue '), Entity(namespace='mop', identifier='0000431', name='geranylgeranylation'), ], }, 'Glyco': { 'synonyms': ['Glyco', 'glycosylation'], 'xrefs': [ Entity(namespace='go', identifier='0006486', name='protein glycosylation'), Entity(namespace='mod', identifier='00693', name='glycosylated residue'), Entity(namespace='mop', identifier='0000162', name='glycosylation'), ], }, 'Hy': { 'synonyms': ['Hy' 'hydroxylation'], 'xrefs': [ Entity(namespace='go', identifier='0018126', name='protein hydroxylation'), Entity(namespace='mod', identifier='00677', name='hydroxylated residue'), Entity(namespace='mop', identifier='0000673', name='hydroxylation'), ], }, 'ISG': { 'synonyms': ['ISG', 'ISGylation', 'ISG15-protein conjugation'], 'xrefs': [ Entity(namespace='go', identifier='0032020', name='ISG15-protein conjugation'), ], 'activities': [ Entity(namespace='go', identifier='0042296', name='ISG15 transferase activity'), ], }, 'Me': { 'synonyms': ['Me', 'methylation'], 'xrefs': [ Entity(namespace='go', identifier='0006479', name='protein methylation'), Entity(namespace='mod', identifier='00427', name='methylated residue'), ], }, 'Me1': { 'synonyms': ['Me1', 'monomethylation', 'mono-methylation'], 'xrefs': [ Entity(namespace='mod', identifier='00599', name='monomethylated residue'), ], 'is_a': ['Me'], }, 'Me2': { 'synonyms': ['Me2', 'dimethylation', 'di-methylation'], 'xrefs': [ Entity(namespace='mod', identifier='00429', name='dimethylated residue'), ], 'is_a': ['Me'], }, 'Me3': { 'synonyms': ['Me3', 'trimethylation', 'tri-methylation'], 'xrefs': [ Entity(namespace='mod', identifier='00430', name='trimethylated residue'), ], 'is_a': ['Me'], }, 'Myr': { 'synonyms': ['Myr', 'myristoylation'], 'xrefs': [ Entity(namespace='go', identifier='0018377', name='protein myristoylation'), Entity(namespace='mod', identifier='00438', name='myristoylated residue'), ], }, 'Nedd': { 'synonyms': ['Nedd', 'neddylation', 'RUB1-protein conjugation'], 'xrefs': [ Entity(namespace='go', identifier='0045116', name='protein neddylation'), Entity(namespace='mod', identifier='01150', name='neddylated lysine'), ], }, 'NGlyco': { 'synonyms': ['NGlyco', 'N-linked glycosylation'], 'xrefs': [ Entity(namespace='go', identifier='0006487', name='protein N-linked glycosylation'), Entity(namespace='mod', identifier='00006', name='N-glycosylated residue'), Entity(namespace='mop', identifier='0002162', name='N-glycosylation'), ], 'is_a': ['Glyco'], }, 'NO': { 'synonyms': ['NO', 'Nitrosylation'], 'xrefs': [ Entity(namespace='go', identifier='0017014', name='protein nitrosylation'), ], }, 'Ox': { 'synonyms': ["Ox", 'oxidation'], 'xrefs': [ Entity(namespace='go', identifier='0018158', name='protein oxidation'), ], }, 'OGlyco': { 'synonyms': ['OGlyco', 'O-linked glycosylation'], 'xrefs': [ Entity(namespace='go', identifier='0006493', name='protein O-linked glycosylation'), Entity(namespace='mod', identifier='00396', name='O-glycosylated residue'), Entity(namespace='mop', identifier='0003162', name='O-glycosylation'), ], 'is_a': ['Glyco'], }, 'Palm': { 'synonyms': ['Palm', 'palmitoylation'], 'xrefs': [ Entity(namespace='go', identifier='0018345', name='protein palmitoylation'), Entity(namespace='mod', identifier='00440', name='palmitoylated residue'), ], }, 'Ph': { 'synonyms': ['Ph', 'phosphorylation'], 'xrefs': [ Entity(namespace='go', identifier='0006468', name='protein phosphorylation'), Entity(namespace='mod', identifier='00696'), ], }, 'Sulf': { 'synonyms': [ 'Sulf', 'sulfation', 'sulphation', 'sulfur addition', 'sulphur addition', 'sulfonation', 'sulphonation', ], 'xrefs': [ Entity(namespace='go', identifier='0006477', name='protein sulfation'), Entity(namespace='mod', identifier='00695', name='sulfated residue'), Entity(namespace='mop', identifier='0000559', name='sulfonation'), ], 'target': [ Entity(namespace='chebi', identifier='29922', name='sulfo group'), ], }, 'Sumo': { 'synonyms': ['Sumo', 'SUMOylation', 'Sumoylation'], 'xrefs': [ Entity(namespace='go', identifier='0016925', name='protein sumoylation'), Entity(namespace='mod', identifier='01149', name='sumoylated lysine'), ], 'activities': [ Entity(namespace='go', identifier='0019789', name='SUMO transferase activity'), ], }, 'Ub': { 'synonyms': ['Ub', 'ubiquitination', 'ubiquitinylation', 'ubiquitylation'], 'xrefs': [ Entity(namespace='go', identifier='0016567', name='protein ubiquitination'), Entity(namespace='mod', identifier='01148', name='ubiquitinylated lysine'), Entity(namespace='sbo', identifier='0000224', name='ubiquitination'), ], }, 'UbK48': { 'synonyms': ['UbK48', 'Lysine 48-linked polyubiquitination'], 'xrefs': [ Entity(namespace='go', identifier='0070936', name='protein K48-linked ubiquitination'), ], }, 'UbK63': { 'synonyms': ['UbK63', 'Lysine 63-linked polyubiquitination'], 'xrefs': [ Entity(namespace='go', identifier='0070534', name='protein K63-linked ubiquitination'), ], }, 'UbMono': { 'synonyms': ['UbMono', 'monoubiquitination'], 'xrefs': [ Entity(namespace='go', identifier='0006513', name='protein monoubiquitination'), ], }, 'UbPoly': { 'synonyms': ['UbPoly', 'polyubiquitination'], 'xrefs': [ Entity(namespace='go', identifier='0000209', name='protein polyubiquitination'), ], }, } #: A dictionary of legacy (BEL 1.0) default namespace protein modifications to their BEL 2.0 preferred value pmod_legacy_labels = { 'P': 'Ph', 'A': 'Ac', 'F': 'Farn', 'G': 'Glyco', 'H': 'Hy', 'M': 'Me', 'R': 'ADPRib', 'S': 'Sumo', 'U': 'Ub', 'O': 'Ox', } #: A dictionary of default gene modifications. This is a PyBEL variant to the BEL specification. gmod_namespace = { 'methylation': 'Me', 'Me': 'Me', 'M': 'Me', 'ADPRib': 'ADPRib', } #: Use Gene Ontology children of go:0006304 ! "DNA modification" gmod_mappings = { 'Me': { 'synonyms': ['Me', 'M', 'methylation'], 'xrefs': [ Entity(namespace='go', identifier='0006306', name='DNA methylation'), ], }, 'ADPRib': { 'synonyms': ['ADPRib'], 'xrefs': [ Entity(namespace='go', identifier='0030592', name='DNA ADP-ribosylation'), ], }, } BEL_DEFAULT_NAMESPACE_VERSION = '2.1.1' BEL_DEFAULT_NAMESPACE_URL = 'http://openbel.org/2.1.1.belns' # just needs something unique... will change later