Source code for pybel.struct.query.selection

# -*- coding: utf-8 -*-

"""A wrapper around selection methods."""

import logging
from typing import Any, List, Optional

from .constants import (
    SEED_TYPE_ANNOTATION, SEED_TYPE_AUTHOR, SEED_TYPE_DOUBLE_NEIGHBORS, SEED_TYPE_DOWNSTREAM,
    SEED_TYPE_INDUCTION, SEED_TYPE_NEIGHBORS, SEED_TYPE_PATHS, SEED_TYPE_PUBMED, SEED_TYPE_SAMPLE, SEED_TYPE_UPSTREAM,
)
from ..mutation import (
    expand_nodes_neighborhoods, get_multi_causal_downstream, get_multi_causal_upstream,
    get_random_subgraph, get_subgraph_by_all_shortest_paths, get_subgraph_by_annotations, get_subgraph_by_authors,
    get_subgraph_by_induction, get_subgraph_by_neighborhood, get_subgraph_by_pubmed, get_subgraph_by_second_neighbors,
)
from ...dsl import BaseEntity

__all__ = [
    'get_subgraph',
]

logger = logging.getLogger(__name__)


[docs]def get_subgraph( graph, seed_method: Optional[str] = None, seed_data: Optional[Any] = None, expand_nodes: Optional[List[BaseEntity]] = None, remove_nodes: Optional[List[BaseEntity]] = None, ): """Run a pipeline query on graph with multiple sub-graph filters and expanders. Order of Operations: 1. Seeding by given function name and data 2. Add nodes 3. Remove nodes :param pybel.BELGraph graph: A BEL graph :param seed_method: The name of the get_subgraph_by_* function to use :param seed_data: The argument to pass to the get_subgraph function :param expand_nodes: Add the neighborhoods around all of these nodes :param remove_nodes: Remove these nodes and all of their in/out edges :rtype: Optional[pybel.BELGraph] """ # Seed by the given function if seed_method == SEED_TYPE_INDUCTION: result = get_subgraph_by_induction(graph, seed_data) elif seed_method == SEED_TYPE_PATHS: result = get_subgraph_by_all_shortest_paths(graph, seed_data) elif seed_method == SEED_TYPE_NEIGHBORS: result = get_subgraph_by_neighborhood(graph, seed_data) elif seed_method == SEED_TYPE_DOUBLE_NEIGHBORS: result = get_subgraph_by_second_neighbors(graph, seed_data) elif seed_method == SEED_TYPE_UPSTREAM: result = get_multi_causal_upstream(graph, seed_data) elif seed_method == SEED_TYPE_DOWNSTREAM: result = get_multi_causal_downstream(graph, seed_data) elif seed_method == SEED_TYPE_PUBMED: result = get_subgraph_by_pubmed(graph, seed_data) elif seed_method == SEED_TYPE_AUTHOR: result = get_subgraph_by_authors(graph, seed_data) elif seed_method == SEED_TYPE_ANNOTATION: result = get_subgraph_by_annotations(graph, seed_data['annotations'], or_=seed_data.get('or')) elif seed_method == SEED_TYPE_SAMPLE: result = get_random_subgraph( graph, number_edges=seed_data.get('number_edges'), seed=seed_data.get('seed'), ) elif not seed_method: # Otherwise, don't seed a sub-graph result = graph.copy() logger.debug('no seed function - using full network: %s', result.name) else: raise ValueError('Invalid seed method: {}'.format(seed_method)) if result is None: logger.debug('query returned no results') return logger.debug('original graph has (%s nodes / %s edges)', result.number_of_nodes(), result.number_of_edges()) # Expand around the given nodes if expand_nodes: expand_nodes_neighborhoods(graph, result, expand_nodes) logger.debug('graph expanded to (%s nodes / %s edges)', result.number_of_nodes(), result.number_of_edges()) # Delete the given nodes if remove_nodes: for node in remove_nodes: if node not in result: logger.debug('%s is not in graph %s', node, graph.name) continue result.remove_node(node) logger.debug('graph contracted to (%s nodes / %s edges)', result.number_of_nodes(), result.number_of_edges()) logger.debug( 'Subgraph coming from %s (seed type) %s (data) contains %d nodes and %d edges', seed_method, seed_data, result.number_of_nodes(), result.number_of_edges(), ) return result