Skip to content

utils

Make linkage from input. If input looks like linkage it is passed through.

Source code in navis/nbl/utils.py
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
def make_linkage(x, method='single', optimal_ordering=False):
    """Make linkage from input. If input looks like linkage it is passed through."""
    if isinstance(x, pd.DataFrame):
        # Make sure it is symmetric
        if x.shape[0] != x.shape[1]:
            raise ValueError(f'Scores must be symmetric, got shape {x.shape}')
        # A cheap check for whether these are mean scores
        if any(x.values[0].round(5) != x.values[:, 0].round(5)):
            logger.warning(f'Symmetrizing scores because they do not look like mean scores!')
            x = (x + x.values.T) / 2

        dists = squareform(1 - x.values, checks=False)
        Z = sch.linkage(dists, method=method, optimal_ordering=optimal_ordering)
    elif isinstance(x, np.ndarray):
        Z = x
    else:
        raise TypeError(f'Expected scores) (DataFrame) or linkage (array), got {type(x)}')

    return Z

Check if most (as opposed to all) entries are True.

Source code in navis/nbl/utils.py
427
428
429
430
431
def most(x, f=.9):
    """Check if most (as opposed to all) entries are True."""
    if x.sum() >= (x.shape[0] * f):
        return True
    return False

Generate a smoothed version of the NBLAST scores.

In brief: 1. Run PCA on the NBLAST scores and extract the first N components. 2. From that calulate a new similarity matrix.

Requires scikit-learn.

PARAMETER DESCRIPTION
scores
    The all-by-all NBLAST scores.

TYPE: pandas.DataFrame

n_dim
    The number of dimensions to use. If float (0 < n_dim < 1) will
    use `scores.shape[0] * n_dim`.

TYPE: float | int DEFAULT: 0.2

metric
    Which distance metric to use. Directly passed through to the
    `scipy.spatial.distance.pdist` function.

TYPE: str DEFAULT: 'euclidean'

RETURNS DESCRIPTION
scores_new
Source code in navis/nbl/utils.py
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
def nblast_prime(scores, n_dim=.2, metric='euclidean'):
    """Generate a smoothed version of the NBLAST scores.

    In brief:
     1. Run PCA on the NBLAST scores and extract the first N components.
     2. From that calulate a new similarity matrix.

    Requires scikit-learn.

    Parameters
    ----------
    scores :    pandas.DataFrame
                The all-by-all NBLAST scores.
    n_dim :     float | int
                The number of dimensions to use. If float (0 < n_dim < 1) will
                use `scores.shape[0] * n_dim`.
    metric :    str
                Which distance metric to use. Directly passed through to the
                `scipy.spatial.distance.pdist` function.

    Returns
    -------
    scores_new

    """
    try:
        from sklearn.decomposition import PCA
    except ModuleNotFoundError:
        raise ModuleNotFoundError(
            'Please install scikit-learn to use `nblast_prime`:\n'
            '  pip3 install scikit-learn -U'
            )

    if not isinstance(scores, pd.DataFrame):
        raise TypeError(f'`scores` must be pandas DataFrame, got "{type(scores)}"')

    if (scores.shape[0] != scores.shape[1]) or ~np.all(scores.columns == scores.index):
        logger.warning('NBLAST matrix is not symmetric - are you sure this is '
                       'an all-by-all matrix?')

    if n_dim < 1:
        n_dim = int(scores.shape[1] * n_dim)

    pca = PCA(n_components=n_dim)
    X_new = pca.fit_transform(scores.values)

    dist = pdist(X_new, metric=metric)

    return pd.DataFrame(1 - squareform(dist), index=scores.index, columns=scores.columns)