Source code for pybel.io.tsv.api

# -*- coding: utf-8 -*-

"""TSV conversion."""

import json
import logging
from typing import List, Optional, TextIO, Tuple, Union

from networkx.utils import open_file
from tqdm import tqdm

from .converters import (
    AssociationConverter, CorrelationConverter, DecreasesActivityConverter, DecreasesAmountConverter,
    DecreasesDegradationConverter, DrugIndicationConverter, DrugSideEffectConverter, EquivalenceConverter,
    HasVariantConverter, IncreasesActivityConverter, IncreasesAmountConverter, IncreasesDegradationConverter,
    IsAConverter, ListComplexHasComponentConverter, MiRNADecreasesExpressionConverter,
    MiRNADirectlyDecreasesExpressionConverter, NoChangeActivityConverter, NoChangeAmountConverter,
    NoChangeDegradationConverter, PartOfNamedComplexConverter, ProteinPartOfBiologicalProcess,
    ReactionHasCatalystConverter, ReactionHasProductConverter, ReactionHasReactantConverter, RegulatesActivityConverter,
    RegulatesAmountConverter, RegulatesDegradationConverter, SubprocessPartOfBiologicalProcess,
)
from ...dsl import BaseEntity
from ...struct import BELGraph

__all__ = [
    'to_tsv',
    'to_edgelist',
    'get_triples',
    'get_triple',
]

logger = logging.getLogger(__name__)


[docs]@open_file(1, mode='w') def to_tsv(graph: BELGraph, path: Union[str, TextIO], *, use_tqdm: bool = False, sep='\t') -> None: """Write the graph as a TSV. :param graph: A BEL graph :param path: A path or file-like :param use_tqdm: Should a progress bar be shown? :param sep: The separator to use """ for h, r, t in get_triples(graph, use_tqdm=use_tqdm): print(h, r, t, sep=sep, file=path)
@open_file(1, mode='w') def to_edgelist(graph: BELGraph, path: Union[str, TextIO], *, use_tqdm: bool = False, sep='\t') -> None: """Write the graph as an edgelist. :param graph: A BEL graph :param path: A path or file-like :param use_tqdm: Should a progress bar be shown? :param sep: The separator to use """ for h, r, t in get_triples(graph, use_tqdm=use_tqdm): print(h, t, json.dumps(dict(relation=r)), sep=sep, file=path) def get_triples(graph: BELGraph, use_tqdm: bool = False) -> List[Tuple[str, str, str]]: """Get a non-redundant list of triples representing the graph. :param graph: A BEL graph :param use_tqdm: Should a progress bar be shown? """ it = graph.edges(keys=True) if use_tqdm: it = tqdm(it, total=graph.number_of_edges(), desc='Preparing TSV') triples = ( get_triple(graph, u, v, key) for u, v, key in it ) # clean duplicates and Nones return list( sorted({ triple for triple in triples if triple is not None }), ) def get_triple( graph: BELGraph, u: BaseEntity, v: BaseEntity, key: str, ) -> Optional[Tuple[str, str, str]]: # noqa: C901 """Get the triples' strings that should be written to the file.""" data = graph[u][v][key] # order is important converters = [ ListComplexHasComponentConverter, PartOfNamedComplexConverter, SubprocessPartOfBiologicalProcess, ProteinPartOfBiologicalProcess, RegulatesActivityConverter, MiRNADecreasesExpressionConverter, MiRNADirectlyDecreasesExpressionConverter, IsAConverter, EquivalenceConverter, CorrelationConverter, AssociationConverter, DrugIndicationConverter, DrugSideEffectConverter, RegulatesAmountConverter, IncreasesAmountConverter, DecreasesAmountConverter, NoChangeAmountConverter, IncreasesActivityConverter, DecreasesActivityConverter, NoChangeActivityConverter, ReactionHasProductConverter, ReactionHasReactantConverter, ReactionHasCatalystConverter, HasVariantConverter, IncreasesDegradationConverter, DecreasesDegradationConverter, RegulatesDegradationConverter, NoChangeDegradationConverter, ] for converter in converters: if converter.predicate(u, v, key, data): return converter.convert(u, v, key, data) logger.warning('unhandled: {}'.format(graph.edge_to_bel(u, v, data)))