Source code for pybel.io.extras

# -*- coding: utf-8 -*-

"""This module contains IO functions for outputting BEL graphs to lossy formats, such as GraphML and CSV."""

import json
from typing import Optional, TextIO, Union

from networkx.utils import open_file

from ..dsl import CentralDogma
from ..struct import BELGraph

__all__ = [
    "to_csv",
    "to_sif",
    "to_gsea",
]


[docs]@open_file(1, mode="w") def to_csv(graph: BELGraph, path: Union[str, TextIO], sep: Optional[str] = None) -> None: """Write the graph as a tab-separated edge list. The resulting file will contain the following columns: 1. Source BEL term 2. Relation 3. Target BEL term 4. Edge data dictionary See the Data Models section of the documentation for which data are stored in the edge data dictionary, such as queryable information about transforms on the subject and object and their associated metadata. """ if sep is None: sep = "\t" for u, v, data in graph.edges(data=True): print( graph.edge_to_bel(u, v, edge_data=data, sep=sep), json.dumps(data), sep=sep, file=path, )
[docs]@open_file(1, mode="w") def to_sif(graph: BELGraph, path: Union[str, TextIO], sep: Optional[str] = None) -> None: """Write the graph as a tab-separated SIF file. The resulting file will contain the following columns: 1. Source BEL term 2. Relation 3. Target BEL term This format is simple and can be used readily with many applications, but is lossy in that it does not include relation metadata. """ if sep is None: sep = "\t" for u, v, data in graph.edges(data=True): print( graph.edge_to_bel(u, v, edge_data=data, sep=sep), file=path, )
[docs]@open_file(1, mode="w") def to_gsea(graph: BELGraph, path: Union[str, TextIO]) -> None: """Write the genes/gene products to a GRP file for use with GSEA gene set enrichment analysis. .. seealso:: - GRP `format specification <http://software.broadinstitute.org/cancer/software/gsea/wiki/index.php/Data_formats#GRP:_Gene_set_file_format_.28.2A.grp.29>`_ - GSEA `publication <https://doi.org/10.1073/pnas.0506580102>`_ """ print("# {}".format(graph.name), file=path) hgnc_gene_symbols = { node.name for node in graph if isinstance(node, CentralDogma) and node.namespace.lower() == "hgnc" } for hgnc_gene_symbol in sorted(hgnc_gene_symbols): print(hgnc_gene_symbol, file=path)