Source code for pybel.io.pynpa

# -*- coding: utf-8 -*-

"""Exporter for PyNPA.

.. seealso:: https://github.com/pynpa
"""

import logging
import os
from typing import List, Mapping, Optional, Tuple

import pandas as pd

from ..constants import CAUSAL_DECREASE_RELATIONS, CAUSAL_INCREASE_RELATIONS, RELATION
from ..dsl import Gene, MicroRna, Protein, Rna
from ..struct import BELGraph
from ..struct.getters import get_tf_pairs
from ..struct.node_utils import (
    list_abundance_cartesian_expansion,
    reaction_cartesian_expansion,
)

__all__ = [
    "to_npa_directory",
    "to_npa_dfs",
    "to_npa_layers",
]

logger = logging.getLogger(__name__)

Layer = Mapping[Tuple[Gene, Gene], int]

#: Code to distinguish between between iNodes when nodes have been debelized
DEBELIZED_CODE_FOR_INODES = "*"


[docs]def to_npa_directory(graph: BELGraph, directory: str, **kwargs) -> None:
    """Write the BEL file to two files in the directory for :mod:`pynpa`."""
    ppi_df, transcription_df = to_npa_dfs(graph, **kwargs)
    ppi_df.to_csv(os.path.join(directory, "ppi_layer.tsv"), sep="\t", index=False)
    transcription_df.to_csv(os.path.join(directory, "transcriptional_layer.tsv"), sep="\t", index=False)


[docs]def to_npa_dfs(
    graph: BELGraph,
    cartesian_expansion: bool = False,
    nomenclature_method_first_layer: Optional[str] = None,
    nomenclature_method_second_layer: Optional[str] = None,
    direct_tf_only: bool = False,
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Export the BEL graph as two lists of triples for the :mod:`pynpa`.

    :param graph: A BEL graph
    :param cartesian_expansion: If true, applies cartesian expansion on both reactions (reactants x products)
     as well as list abundances using :func:`list_abundance_cartesian_expansion` and
     :func:`reaction_cartesian_expansion`
    :param nomenclature_method_first_layer: Either "curie", "name" or "inodes. Defaults to "curie".
    :param nomenclature_method_second_layer: Either "curie", "name" or "inodes. Defaults to "curie".

    1. Pick out all transcription factor relationships. Protein X is a transcription
       factor for gene Y IFF ``complex(p(X), g(Y)) -> r(Y)``
    2. Get all other interactions between any gene/rna/protein that are directed causal
       for the PPI layer
    """
    ppi_layer, transcription_layer = to_npa_layers(
        graph,
        cartesian_expansion=cartesian_expansion,
        direct_tf_only=direct_tf_only,
    )
    return (
        _get_df(ppi_layer, method=nomenclature_method_first_layer),
        _get_df(transcription_layer, method=nomenclature_method_second_layer),
    )


def _get_df(layer: Layer, method: Optional[str] = None) -> pd.DataFrame:
    rows = _normalize_layer(layer, method=method)
    return pd.DataFrame(rows, columns=["source", "target", "relation"]).sort_values(["source", "target"])


def _normalize_layer(layer: Layer, method: Optional[str] = None) -> List[Tuple[str, str, int]]:
    if method == "curie" or method is None:
        return [(source.curie, target.curie, direction) for (source, target), direction in layer.items()]
    elif method == "name":
        return [(source.name, target.name, direction) for (source, target), direction in layer.items()]
    elif method == "inodes":
        return [
            (
                "{}{}".format(DEBELIZED_CODE_FOR_INODES, source.name),
                "{}{}".format(DEBELIZED_CODE_FOR_INODES, target.name),
                direction,
            )
            for (source, target), direction in layer.items()
        ]
    else:
        raise ValueError("Invalid export method: {method}".format(method=method))


def to_npa_layers(
    graph: BELGraph,
    cartesian_expansion: bool = False,
    direct_tf_only: bool = False,
) -> Tuple[Layer, Layer]:
    """Get the two layers for the network.

    :param graph: A BEL graph
    :param cartesian_expansion: If true, applies cartesian expansion on both reactions (reactants x products)
     as well as list abundances using :func:`list_abundance_cartesian_expansion` and
     :func:`reaction_cartesian_expansion`
    :param direct_tf_only: If true, only uses directlyIncreases and directlyDecreases relations for TF relations
     ``complex(p(X), g(Y)) =>/=| r(Y)``. If false, also allows indirect relations ``complex(p(X), g(Y)) ->/-| r(Y)``.
    """
    if cartesian_expansion:
        list_abundance_cartesian_expansion(graph)
        reaction_cartesian_expansion(graph)

    transcription_layer = {
        (u.get_rna().get_gene(), v.get_gene()): r for u, v, r in get_tf_pairs(graph, direct_only=direct_tf_only)
    }
    logger.info("extracted %d pairs for the transcription layer", len(transcription_layer))

    ppi_layer = {}
    for u, v, d in graph.edges(data=True):
        u, v = _normalize(u), _normalize(v)
        if u is None or v is None:
            continue
        if (u, v) in transcription_layer:
            continue
        relation = d[RELATION]

        if relation in CAUSAL_INCREASE_RELATIONS:
            ppi_layer[u, v] = +1
        elif relation in CAUSAL_DECREASE_RELATIONS:
            ppi_layer[u, v] = -1
        # TODO what about contradictions

    logger.info("extracted %d pairs for the ppi layer", len(ppi_layer))
    return ppi_layer, transcription_layer


def _normalize(n):
    if isinstance(n, Protein):
        if n.variants:
            n = n.get_parent()
        n = n.get_rna()
    if isinstance(n, (Rna, MicroRna)):
        if n.variants:
            n = n.get_parent()
        n = n.get_gene()
    if isinstance(n, Gene):
        if n.variants:
            n = n.get_parent()
        return n