nbl

`navis.nbl.compress_scores` #

Compress scores.

This will not necessarily reduce the in-memory footprint but will lead to much smaller file sizes when saved to disk.

PARAMETER	DESCRIPTION
`scores`	TYPE: `pandas.DataFrame`
`threshold`	Scores lower than this will be capped at `threshold`. TYPE: `float` DEFAULT: `None`
`digits`	`Round scores to the Nth digit.` TYPE: `int` DEFAULT: `None`

RETURNS	DESCRIPTION
`scores_comp`	Copy of the original dataframe with the data cast to 32bit floats and the optional filters (see `threshold` and `digits`) applied. TYPE: `pandas.DataFrame`

Source code in navis/nbl/utils.py

def compress_scores(scores, threshold=None, digits=None):
    """Compress scores.

    This will not necessarily reduce the in-memory footprint but will lead to
    much smaller file sizes when saved to disk.

    Parameters
    ----------
    scores :        pandas.DataFrame
    threshold :     float, optional
                    Scores lower than this will be capped at `threshold`.
    digits :        int, optional
                    Round scores to the Nth digit.

    Returns
    -------
    scores_comp :   pandas.DataFrame
                    Copy of the original dataframe with the data cast to 32bit
                    floats and the optional filters (see `threshold` and
                    `digits`) applied.

    """
    scores = scores.astype(np.float32)
    if digits is not None:
        scores = scores.round(digits)
    if threshold is not None:
        scores.clip(lower=threshold, inplace=True)
    return scores

`navis.nbl.dendrogram` #

Plot dendrogram.

This is just a convenient thin wrapper around scipy's dendrogram function that lets you feed NBLAST scores directly. Note that this causes some overhead for very large NBLASTs.

PARAMETER	DESCRIPTION
`x`	`Pandas DataFrame is assumed to be NBLAST scores. Array is assumed to be a linkage.` TYPE: `DataFrame \| array`
`method`	Method for `linkage`. Ignored if `x` is already a linkage. TYPE: `str` DEFAULT: `'ward'`
`**kwargs`	Keyword argument passed to scipy's `dendrogram`. DEFAULT: `{}`

RETURNS	DESCRIPTION
`dendrogram`

Source code in navis/nbl/utils.py

def dendrogram(x, method='ward', **kwargs):
    """Plot dendrogram.

    This is just a convenient thin wrapper around scipy's dendrogram function
    that lets you feed NBLAST scores directly. Note that this causes some
    overhead for very large NBLASTs.

    Parameters
    ----------
    x :             DataFrame | array
                    Pandas DataFrame is assumed to be NBLAST scores. Array is
                    assumed to be a linkage.
    method :        str
                    Method for `linkage`. Ignored if `x` is already a linkage.
    **kwargs
                    Keyword argument passed to scipy's `dendrogram`.

    Returns
    -------
    dendrogram

    """
    # Some sensible defaults that help with large dendrograms
    DEFAULTS = dict(no_labels=True,
                    labels=x.index.values.astype(str) if isinstance(x, pd.DataFrame) else None)
    DEFAULTS.update(kwargs)

    # Make linkage
    Z = make_linkage(x, method=method)

    return sch.dendrogram(Z, **DEFAULTS)

`navis.nbl.extract_matches` #

Extract top matches from score matrix.

See N, threshold or percentage for the criterion.

PARAMETER	DESCRIPTION
`scores`	Score matrix (e.g. from [`navis.nblast`][]). TYPE: `pd.DataFrame`
`N`	`Number of matches to extract.` TYPE: `int` DEFAULT: `None`
`threshold`	`Extract all matches above a given threshold.` TYPE: `float` DEFAULT: `None`
`percentage`	Extract all matches within a given range of the top match. E.g. `percentage=0.05` will return all matches within 5% of the top match. TYPE: `float [0-1]` DEFAULT: `None`
`single_cols`	`If True will return single columns with comma-separated strings for match ID and match score, respectively.` TYPE: `bool`
`axis`	`For which axis to produce matches.` TYPE: `0 \| 1` DEFAULT: `0`
`distances`	Whether `scores` is distances or similarities (i.e. whether we need to look for the lowest instead of the highest values). "auto" (default) will infer based on the diagonal of the `scores` matrix. Use boolean to override. TYPE: `"auto" \| bool` DEFAULT: `'auto'`

RETURNS	DESCRIPTION
`pd.DataFrame`	Note that the format is slightly different depending on the criterion.

Source code in navis/nbl/utils.py

def extract_matches(scores, N=None, threshold=None, percentage=None,
                    axis=0, distances='auto'):
    """Extract top matches from score matrix.

    See `N`, `threshold` or `percentage` for the criterion.

    Parameters
    ----------
    scores :        pd.DataFrame
                    Score matrix (e.g. from [`navis.nblast`][]).
    N :             int
                    Number of matches to extract.
    threshold :     float
                    Extract all matches above a given threshold.
    percentage :    float [0-1]
                    Extract all matches within a given range of the top match.
                    E.g. `percentage=0.05` will return all matches within
                    5% of the top match.
    single_cols :   bool
                    If True will return single columns with comma-separated
                    strings for match ID and match score, respectively.
    axis :          0 | 1
                    For which axis to produce matches.
    distances :     "auto" | bool
                    Whether `scores` is distances or similarities (i.e. whether
                    we need to look for the lowest instead of the highest values).
                    "auto" (default) will infer based on the diagonal of the
                    `scores` matrix. Use boolean to override.

    Returns
    -------
    pd.DataFrame
                    Note that the format is slightly different depending on
                    the criterion.

    """
    assert axis in (0, 1), '`axis` must be 0 or 1'

    if N is None and threshold is None and percentage is None:
        raise ValueError('Must provide either `N` or `threshold` or '
                         '`percentage` as criterion for match extraction.')
    elif len({N, threshold, percentage}) > 2:
        # We expect {criterion, None}
        raise ValueError('Please provide either `N`, `threshold` or '
                         '`percentage` as criterion for match extraction.')

    if distances == 'auto':
        distances = True if most(np.diag(scores.values).round(2) == 0) else False

    # Transposing is easier than dealing with the different axes further down
    if axis == 1:
        scores = scores.T

    if N is not None:
        return _extract_matches_n(scores,
                                  N=N,
                                  distances=distances)
    elif threshold is not None:
        return _extract_matches_threshold(scores,
                                          threshold=threshold,
                                          distances=distances)
    elif percentage is not None:
        return _extract_matches_perc(scores,
                                     perc=percentage,
                                     distances=distances)

`navis.nbl.make_clusters` #

Form flat clusters.

This is a thin wrapper around scipy.cluster.hierarchy.cut_tree and scipy.cluster.hierarchy.fcluster functions.

PARAMETER	DESCRIPTION
`x`	`Pandas DataFrame is assumed to be NBLAST scores. Array is assumed to be a linkage.` TYPE: `DataFrame \| array`
`t`	See `method`. TYPE: `scalar`
`criterion`	Method to use for creating clusters: - `n_clusters` uses `cut_tree` to create `t` clusters - `height` uses `cut_tree` to cut the dendrogram at height `t` - `inconsistent`, `distance`, `maxclust`, etc are passed through to `fcluster` TYPE: `str` DEFAULT: `'n_clusters'`
`method`	Method for `linkage`. Ignored if `x` is already a linkage. TYPE: `str` DEFAULT: `'ward'`
`**kwargs`	Additional keyword arguments are passed through to the cluster functions `cut_tree` and `fcluster`. DEFAULT: `{}`

RETURNS	DESCRIPTION
`clusters`	TYPE: `np.ndarray`

Source code in navis/nbl/utils.py

def make_clusters(x, t, criterion='n_clusters', method='ward', **kwargs):
    """Form flat clusters.

    This is a thin wrapper around `scipy.cluster.hierarchy.cut_tree` and
    `scipy.cluster.hierarchy.fcluster` functions.

    Parameters
    ----------
    x :             DataFrame | array
                    Pandas DataFrame is assumed to be NBLAST scores. Array is
                    assumed to be a linkage.
    t :             scalar
                    See `method`.
    criterion :     str
                    Method to use for creating clusters:
                     - `n_clusters` uses `cut_tree` to create `t` clusters
                     - `height` uses `cut_tree` to cut the dendrogram at
                        height `t`
                     - `inconsistent`, `distance`, `maxclust`, etc are passed
                       through to `fcluster`
    method :        str
                    Method for `linkage`. Ignored if `x` is already a linkage.
    **kwargs
                    Additional keyword arguments are passed through to the
                    cluster functions `cut_tree` and `fcluster`.

    Returns
    -------
    clusters :      np.ndarray

    """
    # Make linkage
    Z = make_linkage(x, method=method)

    if criterion == 'n_clusters':
        cl = sch.cut_tree(Z, n_clusters=t, **kwargs).flatten()
    elif criterion == 'height':
        cl = sch.cut_tree(Z, height=t, **kwargs).flatten()
    else:
        cl = sch.fcluster(Z, t=t, criterion=criterion, **kwargs)

    return cl

`navis.nbl.update_scores` #

Update score matrix by running only new query->target pairs.

PARAMETER	DESCRIPTION
`queries`	TYPE: `Dotprops`
`targets`	TYPE: `Dotprops`
`scores_ex`	`DataFrame with existing scores.` TYPE: `pandas.DataFrame`
`nblast_func`	The NBLAST to use. For example: `navis.nblast`. TYPE: `callable`
`**kwargs`	Argument passed to `nblast_func`. DEFAULT: `{}`

RETURNS	DESCRIPTION
`pandas.DataFrame`	Updated scores.

Examples:

Mostly for testing but also illustrates the principle:

>>> import navis
>>> import numpy as np
>>> nl = navis.example_neurons(n=5)
>>> dp = navis.make_dotprops(nl, k=5) / 125
>>> # Full NBLAST
>>> scores = navis.nblast(dp, dp, n_cores=1)
>>> # Subset and fill in
>>> scores2 = navis.nbl.update_scores(dp, dp,
...                                   scores_ex=scores.iloc[:3, 2:],
...                                   nblast_func=navis.nblast,
...                                   n_cores=1)
>>> np.all(scores == scores2)
True

Source code in navis/nbl/utils.py

def update_scores(queries, targets, scores_ex, nblast_func, **kwargs):
    """Update score matrix by running only new query->target pairs.

    Parameters
    ----------
    queries :       Dotprops
    targets :       Dotprops
    scores_ex :     pandas.DataFrame
                    DataFrame with existing scores.
    nblast_func :   callable
                    The NBLAST to use. For example: `navis.nblast`.
    **kwargs
                    Argument passed to `nblast_func`.

    Returns
    -------
    pandas.DataFrame
                    Updated scores.

    Examples
    --------

    Mostly for testing but also illustrates the principle:

    >>> import navis
    >>> import numpy as np
    >>> nl = navis.example_neurons(n=5)
    >>> dp = navis.make_dotprops(nl, k=5) / 125
    >>> # Full NBLAST
    >>> scores = navis.nblast(dp, dp, n_cores=1)
    >>> # Subset and fill in
    >>> scores2 = navis.nbl.update_scores(dp, dp,
    ...                                   scores_ex=scores.iloc[:3, 2:],
    ...                                   nblast_func=navis.nblast,
    ...                                   n_cores=1)
    >>> np.all(scores == scores2)
    True

    """
    if not callable(nblast_func):
        raise TypeError('`nblast_func` must be callable.')
    # The np.isin query is much faster if we force any strings to <U18 by
    # converting to arrays
    is_new_q = ~np.isin(queries.id, np.array(scores_ex.index))
    is_new_t = ~np.isin(targets.id, np.array(scores_ex.columns))

    logger.info(f'Found {is_new_q.sum()} new queries and '
                f'{is_new_t.sum()} new targets.')

    # Reindex old scores
    scores = scores_ex.reindex(index=queries.id, columns=targets.id).copy()

    # NBLAST new queries against all targets
    if 'precision' not in kwargs:
        kwargs['precision'] = scores.values.dtype

    if any(is_new_q):
        logger.info(f'Updating new queries -> targets scores')
        qt = nblast_func(queries[is_new_q], targets, **kwargs)
        scores.loc[qt.index, qt.columns] = qt.values

    # NBLAST all old queries against new targets
    if any(is_new_t):
        logger.info(f'Updating old queries -> new targets scores')
        tq = nblast_func(queries[~is_new_q], targets[is_new_t], **kwargs)
        scores.loc[tq.index, tq.columns] = tq.values

    return scores

nbl

navis.nbl.compress_scores #

navis.nbl.dendrogram #

navis.nbl.extract_matches #

navis.nbl.make_clusters #

navis.nbl.update_scores #

`navis.nbl.compress_scores` #

`navis.nbl.dendrogram` #

`navis.nbl.extract_matches` #

`navis.nbl.make_clusters` #

`navis.nbl.update_scores` #