# -*- coding: utf-8 -*-
"""Language constants for BEL.
This module contains mappings between PyBEL's internal constants and BEL language keywords.
"""
from typing import Optional
from .constants import (
ABUNDANCE, BEL_DEFAULT_NAMESPACE, BIOPROCESS, CELL_SURFACE, COMPLEX, COMPOSITE, EXTRACELLULAR, GENE, IDENTIFIER,
INTRACELLULAR, MIRNA, NAME, NAMESPACE, PATHOLOGY, PROTEIN, RNA, TRANSCRIBED_TO, TRANSLATED_TO,
)
from .utils import ensure_quotes
[docs]class Entity(dict):
"""Represents a named entity with a namespace and name/identifier."""
def __init__(
self,
*,
namespace: str,
name: Optional[str] = None,
identifier: Optional[str] = None
) -> None:
"""Create a dictionary representing a reference to an entity.
:param namespace: The namespace to which the entity belongs
:param name: The name of the entity
:param identifier: The identifier of the entity in the namespace
"""
if name is None and identifier is None:
raise ValueError('cannot create an entity with neither a name nor identifier')
if not isinstance(namespace, str):
raise TypeError('namespace should be a string: {}'.format(namespace))
if not namespace:
raise ValueError('namespace should be non-empty')
super().__init__({
NAMESPACE: namespace,
})
if name is not None:
if not isinstance(name, str):
raise TypeError('name should be a string: {}'.format(name))
if not name:
raise ValueError('name should be non-empty')
self[NAME] = name
if identifier is not None:
if not isinstance(identifier, str):
raise TypeError('identifier should be a string {}'.format(identifier))
if not identifier:
raise ValueError('identifier shold be non-empty')
self[IDENTIFIER] = identifier
@property
def namespace(self) -> str: # noqa: D401
"""The entity's namespace."""
return self[NAMESPACE]
@property
def name(self) -> str: # noqa: D401
"""The entity's name or label."""
return self.get(NAME)
@property
def identifier(self) -> str: # noqa: D401
"""The entity's identifier."""
return self.get(IDENTIFIER)
@property
def curie(self) -> str:
"""Return this entity as a CURIE."""
if self[NAMESPACE] == BEL_DEFAULT_NAMESPACE:
return self[NAME]
return '{}:{}'.format(
self.namespace,
ensure_quotes(self.identifier if self.identifier else self.name),
)
@property
def obo(self) -> str:
"""Return this entity as an OBO-style CURIE."""
return '{}:{} ! {}'.format(
self.namespace,
ensure_quotes(self.identifier),
ensure_quotes(self.name),
)
def __str__(self): # noqa: D105
return self.curie
#: A dictionary of activity labels used in the ma() function in activity(p(X), ma(Y))
activity_labels = {
'catalyticActivity': 'cat',
'cat': 'cat',
'chaperoneActivity': 'chap',
'chap': 'chap',
'gtpBoundActivity': 'gtp',
'gtp': 'gtp',
'kinaseActivity': 'kin',
'kin': 'kin',
'peptidaseActivity': 'pep',
'pep': 'pep',
'phosphataseActivity': 'phos',
'phos': 'phos',
'ribosylationActivity': 'ribo',
'ribo': 'ribo',
'transcriptionalActivity': 'tscript',
'tscript': 'tscript',
'transportActivity': 'tport',
'tport': 'tport',
'molecularActivity': 'molecularActivity',
# Added by PyBEL
'guanineNucleotideExchangeFactorActivity': 'gef',
'gef': 'gef',
'gtpaseActivatingProteinActivity': 'gap',
'gap': 'gap',
}
#: Maps the default BEL molecular activities to Gene Ontology Molecular Functions
activity_mapping = {
'cat': Entity(namespace='go', name='catalytic activity', identifier='0003824'),
'chap': Entity(namespace='go', name='protein binding involved in protein folding', identifier='0044183'),
'gtp': Entity(namespace='go', name='GTP binding', identifier='0005525'),
'kin': Entity(namespace='go', name='kinase activity', identifier='0016301'),
'pep': Entity(namespace='go', name='peptidase activity', identifier='0008233'),
'phos': Entity(namespace='go', name='phosphatase activity', identifier='0016791'),
'ribo': Entity(
namespace='go', name='NAD(P)+-protein-arginine ADP-ribosyltransferase activity',
identifier='0003956',
),
'tscript': Entity(
namespace='go', name='nucleic acid binding transcription factor activity',
identifier='0001071',
),
'tport': Entity(namespace='go', name='transporter activity', identifier='0005215'),
'molecularActivity': Entity(namespace='go', name='molecular_function', identifier='0003674'),
'gef': Entity(namespace='go', name='guanyl-nucleotide exchange factor activity', identifier='0005085'),
'gap': Entity(namespace='go', name='GTPase activating protein binding', identifier='0032794'),
}
activities = list(activity_labels.keys())
cytoplasm = Entity(name='cytoplasm', namespace='go', identifier='0005737')
nucleus = Entity(name='nucleus', namespace='go', identifier='0005634')
#: Maps the default BEL cellular components to Gene Ontology Cellular Components
compartment_mapping = {
INTRACELLULAR: Entity(name='intracellular', namespace='go', identifier='0005622'),
EXTRACELLULAR: Entity(name='extracellular space', namespace='go', identifier='0005615'),
CELL_SURFACE: Entity(name='cell surface', namespace='go', identifier='0009986'),
'cytoplasm': cytoplasm,
'nucleus': nucleus,
}
compartments = list(compartment_mapping)
#: Provides a mapping from BEL terms to PyBEL internal constants
abundance_labels = {
'abundance': ABUNDANCE,
'a': ABUNDANCE,
'geneAbundance': GENE,
'g': GENE,
'microRNAAbundance': MIRNA,
'm': MIRNA,
'proteinAbundance': PROTEIN,
'p': PROTEIN,
'rnaAbundance': RNA,
'r': RNA,
'biologicalProcess': BIOPROCESS,
'bp': BIOPROCESS,
'pathology': PATHOLOGY,
'path': PATHOLOGY,
'composite': COMPOSITE,
'compositeAbundance': COMPOSITE,
'complex': COMPLEX,
'complexAbundance': COMPLEX,
}
#: Maps the BEL abundance types to the Systems Biology Ontology
abundance_sbo_mapping = {
MIRNA: Entity(namespace='sbo', name='microRNA', identifier='0000316'),
BIOPROCESS: Entity(namespace='sbo', name='process', identifier='0000375'),
GENE: Entity(namespace='sbo', name='gene', identifier='0000243'),
RNA: Entity(namespace='sbo', name='messenger RNA', identifier='0000278'),
COMPLEX: Entity(namespace='sbo', name='protein complex', identifier='0000297'),
PATHOLOGY: Entity(namespace='sbo', name='phenotype', identifier='0000358'),
}
relation_sbo_mapping = {
TRANSLATED_TO: Entity(namespace='sbo', name='translation', identifier='0000184'),
TRANSCRIBED_TO: Entity(namespace='sbo', name='transcription', identifier='0000183'),
}
amino_acid_dict = {
'A': 'Ala',
'R': 'Arg',
'N': 'Asn',
'D': 'Asp',
'C': 'Cys',
'E': 'Glu',
'Q': 'Gln',
'G': 'Gly',
'H': 'His',
'I': 'Ile',
'L': 'Leu',
'K': 'Lys',
'M': 'Met',
'F': 'Phe',
'P': 'Pro',
'S': 'Ser',
'T': 'Thr',
'W': 'Trp',
'Y': 'Tyr',
'V': 'Val',
}
dna_nucleotide_labels = {
'A': 'Adenine',
'T': 'Thymine',
'C': 'Cytosine',
'G': 'Guanine',
}
rna_nucleotide_labels = {
'a': 'adenine',
'u': 'uracil',
'c': 'cytosine',
'g': 'guanine',
}
#: A dictionary of default protein modifications to their preferred value
pmod_namespace = {
'Ac': 'Ac',
'acetylation': 'Ac',
'ADPRib': 'ADPRib',
'ADP-ribosylation': 'ADPRib',
'adenosine diphosphoribosyl': 'ADPRib',
'Farn': 'Farn',
'farnesylation': 'Farn',
'Gerger': 'Gerger',
'geranylgeranylation': 'Gerger',
'Glyco': 'Glyco',
'glycosylation': 'Glyco',
'Hy': 'Hy',
'hydroxylation': 'Hy',
'ISG': 'ISG',
'ISGylation': 'ISG',
'ISG15-protein conjugation': 'ISG',
'Me': 'Me',
'methylation': 'Me',
'Me1': 'Me1',
'monomethylation': 'Me1',
'mono-methylation': 'Me1',
'Me2': 'Me2',
'dimethylation': 'Me2',
'di-methylation': 'Me2',
'Me3': 'Me3',
'trimethylation': 'Me3',
'tri-methylation': 'Me3',
'Myr': 'Myr',
'myristoylation': 'Myr',
'Nedd': 'Nedd',
'neddylation': 'Nedd',
'NGlyco': 'NGlyco',
'N-linked glycosylation': 'NGlyco',
'NO': 'NO',
'Nitrosylation': 'NO',
'OGlyco': 'OGlyco',
'O-linked glycosylation': 'OGlyco',
'Palm': 'Palm',
'palmitoylation': 'Palm',
'Ph': 'Ph',
'phosphorylation': 'Ph',
'Sulf': 'Sulf',
'sulfation': 'Sulf',
'sulphation': 'Sulf',
'sulfur addition': 'Sulf',
'sulphur addition': 'Sulf',
'sulfonation': 'sulfonation',
'sulphonation': 'sulfonation',
'Sumo': 'Sumo',
'SUMOylation': 'Sumo',
'Ub': 'Ub',
'ubiquitination': 'Ub',
'ubiquitinylation': 'Ub',
'ubiquitylation': 'Ub',
'UbK48': 'UbK48',
'Lysine 48-linked polyubiquitination': 'UbK48',
'UbK63': 'UbK63',
'Lysine 63-linked polyubiquitination': 'UbK63',
'UbMono': 'UbMono',
'monoubiquitination': 'UbMono',
'UbPoly': 'UbPoly',
'polyubiquitination': 'UbPoly',
# PyBEL Variants
'Ox': "Ox",
'oxidation': 'Ox',
}
#: Use Gene Ontology children of go_0006464: "cellular protein modification process"
pmod_mappings = {
'Ac': {
'synonyms': ['Ac', 'acetylation'],
'xrefs': [
Entity(namespace='go', identifier='0006473', name='protein acetylation'),
Entity(namespace='mod', identifier='00394', name='acetylated residue'),
Entity(namespace='mop', identifier='0000030', name='acetylation'),
Entity(namespace='sbo', identifier='0000215', name='acetylation'),
],
},
'ADPRib': {
'synonyms': ['ADPRib', 'ADP-ribosylation', 'ADPRib', 'ADP-rybosylation', 'adenosine diphosphoribosyl'],
'xrefs': [
Entity(namespace='go', identifier='0006471', name='protein ADP-ribosylation'),
Entity(
namespace='mod', identifier='00752',
name='adenosine diphosphoribosyl (ADP-ribosyl) modified residue',
),
Entity(namespace='mop', identifier='0000220', name='adenosinediphosphoribosylation'),
],
},
'Farn': {
'synonyms': ['Farn', 'farnesylation'],
'xrefs': [
Entity(namespace='go', identifier='0018343', name='protein farnesylation'),
Entity(namespace='mod', identifier='00437', name='farnesylated residue'),
Entity(namespace='mop', identifier='0000429', name='farnesylation'),
],
},
'Gerger': {
'synonyms': ['Gerger', 'geranylgeranylation'],
'xrefs': [
Entity(namespace='go', identifier='0018344', name='protein geranylgeranylation'),
Entity(namespace='mod', identifier='00441', name='geranylgeranylated residue '),
Entity(namespace='mop', identifier='0000431', name='geranylgeranylation'),
],
},
'Glyco': {
'synonyms': ['Glyco', 'glycosylation'],
'xrefs': [
Entity(namespace='go', identifier='0006486', name='protein glycosylation'),
Entity(namespace='mod', identifier='00693', name='glycosylated residue'),
Entity(namespace='mop', identifier='0000162', name='glycosylation'),
],
},
'Hy': {
'synonyms': ['Hy' 'hydroxylation'],
'xrefs': [
Entity(namespace='go', identifier='0018126', name='protein hydroxylation'),
Entity(namespace='mod', identifier='00677', name='hydroxylated residue'),
Entity(namespace='mop', identifier='0000673', name='hydroxylation'),
],
},
'ISG': {
'synonyms': ['ISG', 'ISGylation', 'ISG15-protein conjugation'],
'xrefs': [
Entity(namespace='go', identifier='0032020', name='ISG15-protein conjugation'),
],
'activities': [
Entity(namespace='go', identifier='0042296', name='ISG15 transferase activity'),
],
},
'Me': {
'synonyms': ['Me', 'methylation'],
'xrefs': [
Entity(namespace='go', identifier='0006479', name='protein methylation'),
Entity(namespace='mod', identifier='00427', name='methylated residue'),
],
},
'Me1': {
'synonyms': ['Me1', 'monomethylation', 'mono-methylation'],
'xrefs': [
Entity(namespace='mod', identifier='00599', name='monomethylated residue'),
],
'is_a': ['Me'],
},
'Me2': {
'synonyms': ['Me2', 'dimethylation', 'di-methylation'],
'xrefs': [
Entity(namespace='mod', identifier='00429', name='dimethylated residue'),
],
'is_a': ['Me'],
},
'Me3': {
'synonyms': ['Me3', 'trimethylation', 'tri-methylation'],
'xrefs': [
Entity(namespace='mod', identifier='00430', name='trimethylated residue'),
],
'is_a': ['Me'],
},
'Myr': {
'synonyms': ['Myr', 'myristoylation'],
'xrefs': [
Entity(namespace='go', identifier='0018377', name='protein myristoylation'),
Entity(namespace='mod', identifier='00438', name='myristoylated residue'),
],
},
'Nedd': {
'synonyms': ['Nedd', 'neddylation', 'RUB1-protein conjugation'],
'xrefs': [
Entity(namespace='go', identifier='0045116', name='protein neddylation'),
Entity(namespace='mod', identifier='01150', name='neddylated lysine'),
],
},
'NGlyco': {
'synonyms': ['NGlyco', 'N-linked glycosylation'],
'xrefs': [
Entity(namespace='go', identifier='0006487', name='protein N-linked glycosylation'),
Entity(namespace='mod', identifier='00006', name='N-glycosylated residue'),
Entity(namespace='mop', identifier='0002162', name='N-glycosylation'),
],
'is_a': ['Glyco'],
},
'NO': {
'synonyms': ['NO', 'Nitrosylation'],
'xrefs': [
Entity(namespace='go', identifier='0017014', name='protein nitrosylation'),
],
},
'Ox': {
'synonyms': ["Ox", 'oxidation'],
'xrefs': [
Entity(namespace='go', identifier='0018158', name='protein oxidation'),
],
},
'OGlyco': {
'synonyms': ['OGlyco', 'O-linked glycosylation'],
'xrefs': [
Entity(namespace='go', identifier='0006493', name='protein O-linked glycosylation'),
Entity(namespace='mod', identifier='00396', name='O-glycosylated residue'),
Entity(namespace='mop', identifier='0003162', name='O-glycosylation'),
],
'is_a': ['Glyco'],
},
'Palm': {
'synonyms': ['Palm', 'palmitoylation'],
'xrefs': [
Entity(namespace='go', identifier='0018345', name='protein palmitoylation'),
Entity(namespace='mod', identifier='00440', name='palmitoylated residue'),
],
},
'Ph': {
'synonyms': ['Ph', 'phosphorylation'],
'xrefs': [
Entity(namespace='go', identifier='0006468', name='protein phosphorylation'),
Entity(namespace='mod', identifier='00696'),
],
},
'Sulf': {
'synonyms': [
'Sulf', 'sulfation', 'sulphation', 'sulfur addition', 'sulphur addition', 'sulfonation',
'sulphonation',
],
'xrefs': [
Entity(namespace='go', identifier='0006477', name='protein sulfation'),
Entity(namespace='mod', identifier='00695', name='sulfated residue'),
Entity(namespace='mop', identifier='0000559', name='sulfonation'),
],
'target': [
Entity(namespace='chebi', identifier='29922', name='sulfo group'),
],
},
'Sumo': {
'synonyms': ['Sumo', 'SUMOylation', 'Sumoylation'],
'xrefs': [
Entity(namespace='go', identifier='0016925', name='protein sumoylation'),
Entity(namespace='mod', identifier='01149', name='sumoylated lysine'),
],
'activities': [
Entity(namespace='go', identifier='0019789', name='SUMO transferase activity'),
],
},
'Ub': {
'synonyms': ['Ub', 'ubiquitination', 'ubiquitinylation', 'ubiquitylation'],
'xrefs': [
Entity(namespace='go', identifier='0016567', name='protein ubiquitination'),
Entity(namespace='mod', identifier='01148', name='ubiquitinylated lysine'),
Entity(namespace='sbo', identifier='0000224', name='ubiquitination'),
],
},
'UbK48': {
'synonyms': ['UbK48', 'Lysine 48-linked polyubiquitination'],
'xrefs': [
Entity(namespace='go', identifier='0070936', name='protein K48-linked ubiquitination'),
],
},
'UbK63': {
'synonyms': ['UbK63', 'Lysine 63-linked polyubiquitination'],
'xrefs': [
Entity(namespace='go', identifier='0070534', name='protein K63-linked ubiquitination'),
],
},
'UbMono': {
'synonyms': ['UbMono', 'monoubiquitination'],
'xrefs': [
Entity(namespace='go', identifier='0006513', name='protein monoubiquitination'),
],
},
'UbPoly': {
'synonyms': ['UbPoly', 'polyubiquitination'],
'xrefs': [
Entity(namespace='go', identifier='0000209', name='protein polyubiquitination'),
],
},
}
#: A dictionary of legacy (BEL 1.0) default namespace protein modifications to their BEL 2.0 preferred value
pmod_legacy_labels = {
'P': 'Ph',
'A': 'Ac',
'F': 'Farn',
'G': 'Glyco',
'H': 'Hy',
'M': 'Me',
'R': 'ADPRib',
'S': 'Sumo',
'U': 'Ub',
'O': 'Ox',
}
#: A dictionary of default gene modifications. This is a PyBEL variant to the BEL specification.
gmod_namespace = {
'methylation': 'Me',
'Me': 'Me',
'M': 'Me',
'ADPRib': 'ADPRib',
}
#: Use Gene Ontology children of go:0006304 ! "DNA modification"
gmod_mappings = {
'Me': {
'synonyms': ['Me', 'M', 'methylation'],
'xrefs': [
Entity(namespace='go', identifier='0006306', name='DNA methylation'),
],
},
'ADPRib': {
'synonyms': ['ADPRib'],
'xrefs': [
Entity(namespace='go', identifier='0030592', name='DNA ADP-ribosylation'),
],
},
}
BEL_DEFAULT_NAMESPACE_VERSION = '2.1.1'
BEL_DEFAULT_NAMESPACE_URL = 'http://openbel.org/2.1.1.belns' # just needs something unique... will change later