Source code for pybel.language

# -*- coding: utf-8 -*-

"""Language constants for BEL.

This module contains mappings between PyBEL's internal constants and BEL language keywords.
"""

import warnings
from typing import Optional

from .constants import (
    ABUNDANCE,
    BIOPROCESS,
    CELL_SURFACE,
    COMPLEX,
    COMPOSITE,
    EXTRACELLULAR,
    GENE,
    IDENTIFIER,
    INTRACELLULAR,
    MIRNA,
    NAME,
    NAMESPACE,
    PATHOLOGY,
    PROTEIN,
    RNA,
    TRANSCRIBED_TO,
    TRANSLATED_TO,
)
from .utils import ensure_quotes


[docs]class Entity(dict): """Represents a named entity with a namespace and name/identifier.""" def __init__( self, *, namespace: str, name: Optional[str] = None, identifier: Optional[str] = None, ) -> None: """Create a dictionary representing a reference to an entity. :param namespace: The namespace to which the entity belongs :param name: The name of the entity :param identifier: The identifier of the entity in the namespace """ if name is None and identifier is None: raise ValueError("cannot create an entity with neither a name nor identifier") if not isinstance(namespace, str): raise TypeError("namespace should be a string: {}".format(namespace)) if not namespace: raise ValueError("namespace should be non-empty") super().__init__( { NAMESPACE: namespace, } ) if name is not None: if not isinstance(name, str): raise TypeError("name should be a string: {}".format(name)) if not name: raise ValueError("name should be non-empty") self[NAME] = name if identifier is not None: if not isinstance(identifier, str): raise TypeError(f"identifier should be a string. Got {type(identifier)} {identifier}") if not identifier: raise ValueError("identifier should be non-empty") self[IDENTIFIER] = identifier @property def namespace(self) -> str: # noqa: D401 """The entity's namespace.""" return self[NAMESPACE] @property def name(self) -> str: # noqa: D401 """The entity's name or label.""" return self.get(NAME) @property def identifier(self) -> str: # noqa: D401 """The entity's identifier.""" return self.get(IDENTIFIER) @property def curie(self) -> str: """Return this entity as a CURIE.""" return "{}:{}".format( self.namespace, ensure_quotes(self.identifier if self.identifier else self.name), ) @property def obo(self) -> str: """Return this entity as an OBO-style CURIE.""" return "{}:{} ! {}".format( self.namespace, ensure_quotes(self.identifier), ensure_quotes(self.name), ) def __str__(self): # noqa: D105 return self.obo if self.identifier and self.name else self.curie def __hash__(self) -> int: return hash((self.namespace, self.identifier, self.name))
text_location_labels = { "Abstract": Entity(namespace="iao", identifier="0000315", name="abstract"), "Review": Entity(namespace="iao", identifier="0000311", name="publication"), # sue me "Results": Entity(namespace="iao", identifier="0000318", name="results section"), "Legend": Entity(namespace="sio", identifier="000468 ", name="legend"), } #: A dictionary of activity labels used in the ma() function in activity(p(X), ma(Y)) activity_labels = { "catalyticActivity": "cat", "cat": "cat", "chaperoneActivity": "chap", "chap": "chap", "gtpBoundActivity": "gtp", "gtp": "gtp", "kinaseActivity": "kin", "kin": "kin", "peptidaseActivity": "pep", "pep": "pep", "phosphataseActivity": "phos", "phos": "phos", "ribosylationActivity": "ribo", "ribo": "ribo", "transcriptionalActivity": "tscript", "tscript": "tscript", "transportActivity": "tport", "tport": "tport", "molecularActivity": "molecularActivity", # Added by PyBEL "guanineNucleotideExchangeFactorActivity": "gef", "gef": "gef", "gtpaseActivatingProteinActivity": "gap", "gap": "gap", } #: Maps the default BEL molecular activities to Gene Ontology Molecular Functions activity_mapping = { "act": Entity(namespace="go", name="molecular function", identifier="0003674"), "cat": Entity(namespace="go", name="catalytic activity", identifier="0003824"), "chap": Entity( namespace="go", name="protein binding involved in protein folding", identifier="0044183", ), "gtp": Entity(namespace="go", name="GTP binding", identifier="0005525"), "kin": Entity(namespace="go", name="kinase activity", identifier="0016301"), "pep": Entity(namespace="go", name="peptidase activity", identifier="0008233"), "phos": Entity(namespace="go", name="phosphatase activity", identifier="0016791"), "ribo": Entity( namespace="go", name="NAD(P)+-protein-arginine ADP-ribosyltransferase activity", identifier="0003956", ), "tscript": Entity( namespace="go", name="nucleic acid binding transcription factor activity", identifier="0001071", ), "tport": Entity(namespace="go", name="transporter activity", identifier="0005215"), "molecularActivity": Entity(namespace="go", name="molecular_function", identifier="0003674"), "gef": Entity( namespace="go", name="guanyl-nucleotide exchange factor activity", identifier="0005085", ), "gap": Entity(namespace="go", name="GTPase activating protein binding", identifier="0032794"), } activities = list(activity_labels.keys()) cytoplasm = Entity(name="cytoplasm", namespace="go", identifier="0005737") nucleus = Entity(name="nucleus", namespace="go", identifier="0005634") intracellular = Entity(name="intracellular", namespace="go", identifier="0005622") extracellular = Entity(name="extracellular space", namespace="go", identifier="0005615") cell_surface = Entity(name="cell surface", namespace="go", identifier="0009986") #: Maps the default BEL cellular components to Gene Ontology Cellular Components compartment_mapping = { INTRACELLULAR: intracellular, EXTRACELLULAR: extracellular, CELL_SURFACE: cell_surface, "cytoplasm": cytoplasm, "nucleus": nucleus, } compartments = list(compartment_mapping) #: Provides a mapping from BEL terms to PyBEL internal constants abundance_labels = { "abundance": ABUNDANCE, "a": ABUNDANCE, "geneAbundance": GENE, "g": GENE, "microRNAAbundance": MIRNA, "m": MIRNA, "proteinAbundance": PROTEIN, "p": PROTEIN, "rnaAbundance": RNA, "r": RNA, "biologicalProcess": BIOPROCESS, "bp": BIOPROCESS, "pathology": PATHOLOGY, "path": PATHOLOGY, "composite": COMPOSITE, "compositeAbundance": COMPOSITE, "complex": COMPLEX, "complexAbundance": COMPLEX, } #: Maps the BEL abundance types to the Systems Biology Ontology abundance_sbo_mapping = { MIRNA: Entity(namespace="sbo", name="microRNA", identifier="0000316"), BIOPROCESS: Entity(namespace="sbo", name="process", identifier="0000375"), GENE: Entity(namespace="sbo", name="gene", identifier="0000243"), RNA: Entity(namespace="sbo", name="messenger RNA", identifier="0000278"), COMPLEX: Entity(namespace="sbo", name="protein complex", identifier="0000297"), PATHOLOGY: Entity(namespace="sbo", name="phenotype", identifier="0000358"), } relation_sbo_mapping = { TRANSLATED_TO: Entity(namespace="sbo", name="translation", identifier="0000184"), TRANSCRIBED_TO: Entity(namespace="sbo", name="transcription", identifier="0000183"), } amino_acid_dict = { "A": "Ala", "R": "Arg", "N": "Asn", "D": "Asp", "C": "Cys", "E": "Glu", "Q": "Gln", "G": "Gly", "H": "His", "I": "Ile", "L": "Leu", "K": "Lys", "M": "Met", "F": "Phe", "P": "Pro", "S": "Ser", "T": "Thr", "W": "Trp", "Y": "Tyr", "V": "Val", } dna_nucleotide_labels = { "A": "Adenine", "T": "Thymine", "C": "Cytosine", "G": "Guanine", } rna_nucleotide_labels = { "a": "adenine", "u": "uracil", "c": "cytosine", "g": "guanine", } #: A dictionary of default protein modifications to their preferred value pmod_namespace = { "Ac": "Ac", "acetylation": "Ac", "ADPRib": "ADPRib", "ADP-ribosylation": "ADPRib", "adenosine diphosphoribosyl": "ADPRib", "Farn": "Farn", "farnesylation": "Farn", "Gerger": "Gerger", "geranylgeranylation": "Gerger", "Glyco": "Glyco", "glycosylation": "Glyco", "Hy": "Hy", "hydroxylation": "Hy", "ISG": "ISG", "ISGylation": "ISG", "ISG15-protein conjugation": "ISG", "Me": "Me", "methylation": "Me", "Me1": "Me1", "monomethylation": "Me1", "mono-methylation": "Me1", "Me2": "Me2", "dimethylation": "Me2", "di-methylation": "Me2", "Me3": "Me3", "trimethylation": "Me3", "tri-methylation": "Me3", "Myr": "Myr", "myristoylation": "Myr", "Nedd": "Nedd", "neddylation": "Nedd", "NGlyco": "NGlyco", "N-linked glycosylation": "NGlyco", "NO": "NO", "Nitrosylation": "NO", "OGlyco": "OGlyco", "O-linked glycosylation": "OGlyco", "Palm": "Palm", "palmitoylation": "Palm", "Ph": "Ph", "phosphorylation": "Ph", "Sulf": "Sulf", "sulfation": "Sulf", "sulphation": "Sulf", "sulfur addition": "Sulf", "sulphur addition": "Sulf", "sulfonation": "sulfonation", "sulphonation": "sulfonation", "Sumo": "Sumo", "SUMOylation": "Sumo", "Ub": "Ub", "ubiquitination": "Ub", "ubiquitinylation": "Ub", "ubiquitylation": "Ub", "UbK48": "UbK48", "Lysine 48-linked polyubiquitination": "UbK48", "UbK63": "UbK63", "Lysine 63-linked polyubiquitination": "UbK63", "UbMono": "UbMono", "monoubiquitination": "UbMono", "UbPoly": "UbPoly", "polyubiquitination": "UbPoly", # PyBEL Variants "Ox": "Ox", "oxidation": "Ox", } #: Use Gene Ontology children of go_0006464: "cellular protein modification process" pmod_mappings = { "Ac": { "synonyms": ["Ac", "acetylation"], "xrefs": [ Entity(namespace="go", identifier="0006473", name="protein acetylation"), Entity(namespace="mod", identifier="00394", name="acetylated residue"), Entity(namespace="mop", identifier="0000030", name="acetylation"), Entity(namespace="sbo", identifier="0000215", name="acetylation"), ], }, "ADPRib": { "synonyms": [ "ADPRib", "ADP-ribosylation", "ADPRib", "ADP-rybosylation", "adenosine diphosphoribosyl", ], "xrefs": [ Entity(namespace="go", identifier="0006471", name="protein ADP-ribosylation"), Entity( namespace="mod", identifier="00752", name="adenosine diphosphoribosyl (ADP-ribosyl) modified residue", ), Entity( namespace="mop", identifier="0000220", name="adenosinediphosphoribosylation", ), ], }, "Farn": { "synonyms": ["Farn", "farnesylation"], "xrefs": [ Entity(namespace="go", identifier="0018343", name="protein farnesylation"), Entity(namespace="mod", identifier="00437", name="farnesylated residue"), Entity(namespace="mop", identifier="0000429", name="farnesylation"), ], }, "Gerger": { "synonyms": ["Gerger", "geranylgeranylation"], "xrefs": [ Entity(namespace="go", identifier="0018344", name="protein geranylgeranylation"), Entity(namespace="mod", identifier="00441", name="geranylgeranylated residue "), Entity(namespace="mop", identifier="0000431", name="geranylgeranylation"), ], }, "Glyco": { "synonyms": ["Glyco", "glycosylation"], "xrefs": [ Entity(namespace="go", identifier="0006486", name="protein glycosylation"), Entity(namespace="mod", identifier="00693", name="glycosylated residue"), Entity(namespace="mop", identifier="0000162", name="glycosylation"), ], }, "Hy": { "synonyms": ["Hy" "hydroxylation"], "xrefs": [ Entity(namespace="go", identifier="0018126", name="protein hydroxylation"), Entity(namespace="mod", identifier="00677", name="hydroxylated residue"), Entity(namespace="mop", identifier="0000673", name="hydroxylation"), ], }, "ISG": { "synonyms": ["ISG", "ISGylation", "ISG15-protein conjugation"], "xrefs": [ Entity(namespace="go", identifier="0032020", name="ISG15-protein conjugation"), ], "activities": [ Entity(namespace="go", identifier="0042296", name="ISG15 transferase activity"), ], }, "Me": { "synonyms": ["Me", "methylation"], "xrefs": [ Entity(namespace="go", identifier="0006479", name="protein methylation"), Entity(namespace="mod", identifier="00427", name="methylated residue"), ], }, "Me1": { "synonyms": ["Me1", "monomethylation", "mono-methylation"], "xrefs": [ Entity(namespace="mod", identifier="00599", name="monomethylated residue"), ], "is_a": ["Me"], }, "Me2": { "synonyms": ["Me2", "dimethylation", "di-methylation"], "xrefs": [ Entity(namespace="mod", identifier="00429", name="dimethylated residue"), ], "is_a": ["Me"], }, "Me3": { "synonyms": ["Me3", "trimethylation", "tri-methylation"], "xrefs": [ Entity(namespace="mod", identifier="00430", name="trimethylated residue"), ], "is_a": ["Me"], }, "Myr": { "synonyms": ["Myr", "myristoylation"], "xrefs": [ Entity(namespace="go", identifier="0018377", name="protein myristoylation"), Entity(namespace="mod", identifier="00438", name="myristoylated residue"), ], }, "Nedd": { "synonyms": ["Nedd", "neddylation", "RUB1-protein conjugation"], "xrefs": [ Entity(namespace="go", identifier="0045116", name="protein neddylation"), Entity(namespace="mod", identifier="01150", name="neddylated lysine"), ], }, "NGlyco": { "synonyms": ["NGlyco", "N-linked glycosylation"], "xrefs": [ Entity( namespace="go", identifier="0006487", name="protein N-linked glycosylation", ), Entity(namespace="mod", identifier="00006", name="N-glycosylated residue"), Entity(namespace="mop", identifier="0002162", name="N-glycosylation"), ], "is_a": ["Glyco"], }, "NO": { "synonyms": ["NO", "Nitrosylation"], "xrefs": [ Entity(namespace="go", identifier="0017014", name="protein nitrosylation"), ], }, "Ox": { "synonyms": ["Ox", "oxidation"], "xrefs": [ Entity(namespace="go", identifier="0018158", name="protein oxidation"), ], }, "OGlyco": { "synonyms": ["OGlyco", "O-linked glycosylation"], "xrefs": [ Entity( namespace="go", identifier="0006493", name="protein O-linked glycosylation", ), Entity(namespace="mod", identifier="00396", name="O-glycosylated residue"), Entity(namespace="mop", identifier="0003162", name="O-glycosylation"), ], "is_a": ["Glyco"], }, "Palm": { "synonyms": ["Palm", "palmitoylation"], "xrefs": [ Entity(namespace="go", identifier="0018345", name="protein palmitoylation"), Entity(namespace="mod", identifier="00440", name="palmitoylated residue"), ], }, "Ph": { "synonyms": ["Ph", "phosphorylation"], "xrefs": [ Entity(namespace="go", identifier="0006468", name="protein phosphorylation"), Entity(namespace="mod", identifier="00696"), ], }, "Sulf": { "synonyms": [ "Sulf", "sulfation", "sulphation", "sulfur addition", "sulphur addition", "sulfonation", "sulphonation", ], "xrefs": [ Entity(namespace="go", identifier="0006477", name="protein sulfation"), Entity(namespace="mod", identifier="00695", name="sulfated residue"), Entity(namespace="mop", identifier="0000559", name="sulfonation"), ], "target": [ Entity(namespace="chebi", identifier="29922", name="sulfo group"), ], }, "Sumo": { "synonyms": ["Sumo", "SUMOylation", "Sumoylation"], "xrefs": [ Entity(namespace="go", identifier="0016925", name="protein sumoylation"), Entity(namespace="mod", identifier="01149", name="sumoylated lysine"), ], "activities": [ Entity(namespace="go", identifier="0019789", name="SUMO transferase activity"), ], }, "Ub": { "synonyms": ["Ub", "ubiquitination", "ubiquitinylation", "ubiquitylation"], "xrefs": [ Entity(namespace="go", identifier="0016567", name="protein ubiquitination"), Entity(namespace="mod", identifier="01148", name="ubiquitinylated lysine"), Entity(namespace="sbo", identifier="0000224", name="ubiquitination"), ], }, "UbK48": { "synonyms": ["UbK48", "Lysine 48-linked polyubiquitination"], "xrefs": [ Entity( namespace="go", identifier="0070936", name="protein K48-linked ubiquitination", ), ], }, "UbK63": { "synonyms": ["UbK63", "Lysine 63-linked polyubiquitination"], "xrefs": [ Entity( namespace="go", identifier="0070534", name="protein K63-linked ubiquitination", ), ], }, "UbMono": { "synonyms": ["UbMono", "monoubiquitination"], "xrefs": [ Entity(namespace="go", identifier="0006513", name="protein monoubiquitination"), ], }, "UbPoly": { "synonyms": ["UbPoly", "polyubiquitination"], "xrefs": [ Entity(namespace="go", identifier="0000209", name="protein polyubiquitination"), ], }, } #: A dictionary of legacy (BEL 1.0) default namespace protein modifications to their BEL 2.0 preferred value pmod_legacy_labels = { "P": "Ph", "A": "Ac", "F": "Farn", "G": "Glyco", "H": "Hy", "M": "Me", "R": "ADPRib", "S": "Sumo", "U": "Ub", "O": "Ox", } #: A dictionary of default gene modifications. This is a PyBEL variant to the BEL specification. gmod_namespace = { "methylation": "Me", "Me": "Me", "M": "Me", "ADPRib": "ADPRib", } #: Use Gene Ontology children of go:0006304 ! "DNA modification" gmod_mappings = { "Me": { "synonyms": ["Me", "M", "methylation"], "xrefs": [ Entity(namespace="go", identifier="0006306", name="DNA methylation"), ], }, "ADPRib": { "synonyms": ["ADPRib"], "xrefs": [ Entity(namespace="go", identifier="0030592", name="DNA ADP-ribosylation"), ], }, }
[docs]class CitationDict(Entity): """A dictionary describing a citation.""" def __init__(self, namespace: str, identifier: str, *, name: Optional[str] = None, **kwargs): super().__init__(namespace=namespace, identifier=identifier, name=name) self.update(kwargs)
[docs]def citation_dict( *, namespace: Optional[str] = None, db: Optional[str] = None, identifier: Optional[str] = None, db_id: Optional[str] = None, name: Optional[str] = None, **kwargs, ) -> CitationDict: """Make a citation dictionary.""" if namespace and db: raise ValueError("can not specify both namespace and db") if identifier and db_id: raise ValueError("can not specify both identifier and db_id") if db: warnings.warn( "usage of keyword argument `db` in citation_dict() should be replaced with `namespace`. " "Will be removed in PyBEL 16.", DeprecationWarning, ) namespace = db if db_id: warnings.warn( "usage of keyword argument `db_id` in citation_dict() should be replaced with `identifier`. " "Will be removed in PyBEL 16.", DeprecationWarning, ) identifier = db_id return CitationDict(namespace=namespace, identifier=identifier, name=name, **kwargs)