Source code for pybel.io.pynpa

# -*- coding: utf-8 -*-

"""Exporter for PyNPA.

.. seealso:: https://github.com/pynpa
"""

import logging
import os
from typing import List, Mapping, Optional, Tuple

import pandas as pd

from ..constants import CAUSAL_DECREASE_RELATIONS, CAUSAL_INCREASE_RELATIONS, RELATION
from ..dsl import Gene, MicroRna, Protein, Rna
from ..struct import BELGraph
from ..struct.getters import get_tf_pairs
from ..struct.node_utils import (
    list_abundance_cartesian_expansion,
    reaction_cartesian_expansion,
)

__all__ = [
    "to_npa_directory",
    "to_npa_dfs",
    "to_npa_layers",
]

logger = logging.getLogger(__name__)

Layer = Mapping[Tuple[Gene, Gene], int]

#: Code to distinguish between between iNodes when nodes have been debelized
DEBELIZED_CODE_FOR_INODES = "*"


[docs]def to_npa_directory(graph: BELGraph, directory: str, **kwargs) -> None: """Write the BEL file to two files in the directory for :mod:`pynpa`.""" ppi_df, transcription_df = to_npa_dfs(graph, **kwargs) ppi_df.to_csv(os.path.join(directory, "ppi_layer.tsv"), sep="\t", index=False) transcription_df.to_csv(os.path.join(directory, "transcriptional_layer.tsv"), sep="\t", index=False)
[docs]def to_npa_dfs( graph: BELGraph, cartesian_expansion: bool = False, nomenclature_method_first_layer: Optional[str] = None, nomenclature_method_second_layer: Optional[str] = None, direct_tf_only: bool = False, ) -> Tuple[pd.DataFrame, pd.DataFrame]: """Export the BEL graph as two lists of triples for the :mod:`pynpa`. :param graph: A BEL graph :param cartesian_expansion: If true, applies cartesian expansion on both reactions (reactants x products) as well as list abundances using :func:`list_abundance_cartesian_expansion` and :func:`reaction_cartesian_expansion` :param nomenclature_method_first_layer: Either "curie", "name" or "inodes. Defaults to "curie". :param nomenclature_method_second_layer: Either "curie", "name" or "inodes. Defaults to "curie". 1. Pick out all transcription factor relationships. Protein X is a transcription factor for gene Y IFF ``complex(p(X), g(Y)) -> r(Y)`` 2. Get all other interactions between any gene/rna/protein that are directed causal for the PPI layer """ ppi_layer, transcription_layer = to_npa_layers( graph, cartesian_expansion=cartesian_expansion, direct_tf_only=direct_tf_only, ) return ( _get_df(ppi_layer, method=nomenclature_method_first_layer), _get_df(transcription_layer, method=nomenclature_method_second_layer), )
def _get_df(layer: Layer, method: Optional[str] = None) -> pd.DataFrame: rows = _normalize_layer(layer, method=method) return pd.DataFrame(rows, columns=["source", "target", "relation"]).sort_values(["source", "target"]) def _normalize_layer(layer: Layer, method: Optional[str] = None) -> List[Tuple[str, str, int]]: if method == "curie" or method is None: return [(source.curie, target.curie, direction) for (source, target), direction in layer.items()] elif method == "name": return [(source.name, target.name, direction) for (source, target), direction in layer.items()] elif method == "inodes": return [ ( "{}{}".format(DEBELIZED_CODE_FOR_INODES, source.name), "{}{}".format(DEBELIZED_CODE_FOR_INODES, target.name), direction, ) for (source, target), direction in layer.items() ] else: raise ValueError("Invalid export method: {method}".format(method=method)) def to_npa_layers( graph: BELGraph, cartesian_expansion: bool = False, direct_tf_only: bool = False, ) -> Tuple[Layer, Layer]: """Get the two layers for the network. :param graph: A BEL graph :param cartesian_expansion: If true, applies cartesian expansion on both reactions (reactants x products) as well as list abundances using :func:`list_abundance_cartesian_expansion` and :func:`reaction_cartesian_expansion` :param direct_tf_only: If true, only uses directlyIncreases and directlyDecreases relations for TF relations ``complex(p(X), g(Y)) =>/=| r(Y)``. If false, also allows indirect relations ``complex(p(X), g(Y)) ->/-| r(Y)``. """ if cartesian_expansion: list_abundance_cartesian_expansion(graph) reaction_cartesian_expansion(graph) transcription_layer = { (u.get_rna().get_gene(), v.get_gene()): r for u, v, r in get_tf_pairs(graph, direct_only=direct_tf_only) } logger.info("extracted %d pairs for the transcription layer", len(transcription_layer)) ppi_layer = {} for u, v, d in graph.edges(data=True): u, v = _normalize(u), _normalize(v) if u is None or v is None: continue if (u, v) in transcription_layer: continue relation = d[RELATION] if relation in CAUSAL_INCREASE_RELATIONS: ppi_layer[u, v] = +1 elif relation in CAUSAL_DECREASE_RELATIONS: ppi_layer[u, v] = -1 # TODO what about contradictions logger.info("extracted %d pairs for the ppi layer", len(ppi_layer)) return ppi_layer, transcription_layer def _normalize(n): if isinstance(n, Protein): if n.variants: n = n.get_parent() n = n.get_rna() if isinstance(n, (Rna, MicroRna)): if n.variants: n = n.get_parent() n = n.get_gene() if isinstance(n, Gene): if n.variants: n = n.get_parent() return n