Source code for pybel.struct.summary.provenance

# -*- coding: utf-8 -*-

"""Summary functions for citation and provenance information in BEL graphs."""

from typing import Iterable, Set

from ..filters.edge_predicates import CITATION_PREDICATES
from ..graph import BELGraph
from ...constants import CITATION, IDENTIFIER

__all__ = [
    "iterate_pubmed_identifiers",
    "iterate_pmc_identifiers",
    "get_pubmed_identifiers",
    "get_pmc_identifiers",
]


def iterate_citation_identifiers(graph, prefix: str):
    """Iterate over all citation identifiers with the given prefix in a graph.

    :param graph: A BEL graph
    :param prefix: The citation prefix to keep
    :return: An iterator over the PubMed identifiers in the graph
    """
    predicate = CITATION_PREDICATES.get(prefix)
    if predicate is None:
        raise ValueError(f"Invalid citation prefix: {prefix}")

    return (data[CITATION][IDENTIFIER].strip() for _, _, data in graph.edges(data=True) if predicate(data))


[docs]def iterate_pubmed_identifiers(graph: BELGraph) -> Iterable[str]: """Iterate over all PubMed identifiers in a graph. :param graph: A BEL graph :return: An iterator over the PubMed identifiers in the graph """ return iterate_citation_identifiers(graph, "pubmed")
[docs]def iterate_pmc_identifiers(graph: BELGraph) -> Iterable[str]: """Iterate over all PMC identifiers in a graph. :param graph: A BEL graph :return: An iterator over the PMC identifiers in the graph """ return iterate_citation_identifiers(graph, "pmc")
def get_citation_identifiers(graph: BELGraph, prefix: str) -> Set[str]: """Get the set of all identifiers with the give prefix cited in the construction of a graph. :param graph: A BEL graph :param prefix: The citation prefix to keep :return: A set of all PubMed identifiers cited in the construction of this graph """ return set(iterate_citation_identifiers(graph, prefix))
[docs]def get_pubmed_identifiers(graph: BELGraph) -> Set[str]: """Get the set of all PubMed identifiers cited in the construction of a graph. :param graph: A BEL graph :return: A set of all PubMed identifiers cited in the construction of this graph """ return get_citation_identifiers(graph, "pubmed")
[docs]def get_pmc_identifiers(graph: BELGraph) -> Set[str]: """Get the set of all PMC identifiers cited in the construction of a graph. :param graph: A BEL graph :return: A set of all PMC identifiers cited in the construction of this graph """ return get_citation_identifiers(graph, "pmc")