Skip to content

ablast_funcs

Base class for blasting.

Source code in navis/nbl/base.py
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
class Blaster(ABC):
    """Base class for blasting."""

    def __init__(self, dtype=np.float64, progress=True):
        """Initialize class."""
        self.dtype = dtype
        self.progress = progress
        self.desc = "Blasting"
        self.self_hits = []
        self.neurons = []
        self.ids = []

    def __len__(self):
        return len(self.neurons)

    @abstractmethod
    def append(self, neurons) -> NestedIndices:
        """Append neurons."""
        pass

    @abstractmethod
    def calc_self_hit(self, neurons):
        """Non-normalized value for self hit."""
        pass

    @abstractmethod
    def single_query_target(self, q_idx, t_idx, scores='forward'):
        """Query single target against single target."""
        pass

    @property
    def dtype(self):
        """Data type used for scores."""
        return self._dtype

    @dtype.setter
    def dtype(self, dtype):
        try:
            self._dtype = np.dtype(dtype)
        except TypeError:
            try:
                self._dtype = FLOAT_DTYPES[dtype]
            except KeyError:
                raise ValueError(
                    f'Unknown precision/dtype {dtype}. Expected on of the following: 16, 32 or 64 (default)'
                )

    def pair_query_target(self, pairs, scores='forward'):
        """BLAST multiple pairs.

        Parameters
        ----------
        pairs :             tuples
                            Tuples of (query_ix, target_ix) to query.
        scores :            "forward" | "mean" | "min" | "max" | "both"
                            Which scores to return.

        """
        # See `multi_query_target` for explanation on progress bars
        scr = []
        for p in config.tqdm_classic(pairs,
                             desc=f'{self.desc} pairs',
                             leave=False,
                             position=getattr(self,
                                              'pbar_position',
                                              None),
                             disable=not self.progress):
            scr.append(self.single_query_target(p[0], p[1], scores=scores))

        return scr

    def multi_query_target(self, q_idx, t_idx, scores='forward'):
        """BLAST multiple queries against multiple targets.

        Parameters
        ----------
        q_idx,t_idx :       iterable
                            Iterable of query/target neuron indices to BLAST.
        scores :            "forward" | "mean" | "min" | "max" | "both"
                            Which scores to return.

        """
        shape = (len(q_idx), len(t_idx)) if scores != 'both' else (len(q_idx), len(t_idx), 2)
        res = np.empty(shape, dtype=self.dtype)
        for i, q in enumerate(config.tqdm(q_idx,
                                          desc=self.desc,
                                          leave=False,
                                          position=getattr(self,
                                                           'pbar_position',
                                                           None),
                                          disable=not self.progress)):
            for k, t in enumerate(t_idx):
                res[i, k] = self.single_query_target(q, t, scores=scores)

        # Generate results
        if res.ndim == 2:
            res = pd.DataFrame(res)
            res.columns = [self.ids[t] for t in t_idx]
            res.index = [self.ids[q] for q in q_idx]
            res.index.name = 'query'
            res.columns.name = 'target'
        else:
            # For scores='both' we will create a DataFrame with multi-index
            ix = pd.MultiIndex.from_product([[self.ids[q] for q in q_idx],
                                             ['forward', 'reverse']],
                                            names=["query", "score"])
            res = pd.DataFrame(np.hstack((res[:, :, 0],
                                          res[:, :, 1])).reshape(len(q_idx) * 2,
                                                                 len(t_idx)),
                               index=ix,
                               columns=[self.ids[t] for t in t_idx])
            res.columns.name = 'target'

        return res

    def all_by_all(self, scores='forward'):
        """BLAST all-by-all neurons."""
        res = self.multi_query_target(range(len(self.neurons)),
                                      range(len(self.neurons)),
                                      scores='forward')

        # For all-by-all BLAST we can get the mean score by
        # transposing the scores
        if scores == 'mean':
            res = (res + res.T) / 2
        elif scores == 'min':
            res.loc[:, :] = np.dstack((res, res.T)).min(axis=2)
        elif scores == 'max':
            res.loc[:, :] = np.dstack((res, res.T)).max(axis=2)
        elif scores == 'both':
            ix = pd.MultiIndex.from_product([res.index, ['forward', 'reverse']],
                                            names=["query", "score"])
            res = pd.DataFrame(np.hstack((res[:, :, 0],
                                          res[:, :, 1])).reshape(res.shape[0] * 2,
                                                                 res.shape[1]),
                               index=ix,
                               columns=res.columns)

        return res

    def __len__(self):
        return len(self.neurons)

Data type used for scores.

Initialize class.

Source code in navis/nbl/base.py
37
38
39
40
41
42
43
44
def __init__(self, dtype=np.float64, progress=True):
    """Initialize class."""
    self.dtype = dtype
    self.progress = progress
    self.desc = "Blasting"
    self.self_hits = []
    self.neurons = []
    self.ids = []

BLAST all-by-all neurons.

Source code in navis/nbl/base.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
def all_by_all(self, scores='forward'):
    """BLAST all-by-all neurons."""
    res = self.multi_query_target(range(len(self.neurons)),
                                  range(len(self.neurons)),
                                  scores='forward')

    # For all-by-all BLAST we can get the mean score by
    # transposing the scores
    if scores == 'mean':
        res = (res + res.T) / 2
    elif scores == 'min':
        res.loc[:, :] = np.dstack((res, res.T)).min(axis=2)
    elif scores == 'max':
        res.loc[:, :] = np.dstack((res, res.T)).max(axis=2)
    elif scores == 'both':
        ix = pd.MultiIndex.from_product([res.index, ['forward', 'reverse']],
                                        names=["query", "score"])
        res = pd.DataFrame(np.hstack((res[:, :, 0],
                                      res[:, :, 1])).reshape(res.shape[0] * 2,
                                                             res.shape[1]),
                           index=ix,
                           columns=res.columns)

    return res

Append neurons.

Source code in navis/nbl/base.py
49
50
51
52
@abstractmethod
def append(self, neurons) -> NestedIndices:
    """Append neurons."""
    pass

Non-normalized value for self hit.

Source code in navis/nbl/base.py
54
55
56
57
@abstractmethod
def calc_self_hit(self, neurons):
    """Non-normalized value for self hit."""
    pass

BLAST multiple queries against multiple targets.

PARAMETER DESCRIPTION
q_idx
            Iterable of query/target neuron indices to BLAST.

scores
            Which scores to return.

TYPE: "forward" | "mean" | "min" | "max" | "both" DEFAULT: 'forward'

Source code in navis/nbl/base.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def multi_query_target(self, q_idx, t_idx, scores='forward'):
    """BLAST multiple queries against multiple targets.

    Parameters
    ----------
    q_idx,t_idx :       iterable
                        Iterable of query/target neuron indices to BLAST.
    scores :            "forward" | "mean" | "min" | "max" | "both"
                        Which scores to return.

    """
    shape = (len(q_idx), len(t_idx)) if scores != 'both' else (len(q_idx), len(t_idx), 2)
    res = np.empty(shape, dtype=self.dtype)
    for i, q in enumerate(config.tqdm(q_idx,
                                      desc=self.desc,
                                      leave=False,
                                      position=getattr(self,
                                                       'pbar_position',
                                                       None),
                                      disable=not self.progress)):
        for k, t in enumerate(t_idx):
            res[i, k] = self.single_query_target(q, t, scores=scores)

    # Generate results
    if res.ndim == 2:
        res = pd.DataFrame(res)
        res.columns = [self.ids[t] for t in t_idx]
        res.index = [self.ids[q] for q in q_idx]
        res.index.name = 'query'
        res.columns.name = 'target'
    else:
        # For scores='both' we will create a DataFrame with multi-index
        ix = pd.MultiIndex.from_product([[self.ids[q] for q in q_idx],
                                         ['forward', 'reverse']],
                                        names=["query", "score"])
        res = pd.DataFrame(np.hstack((res[:, :, 0],
                                      res[:, :, 1])).reshape(len(q_idx) * 2,
                                                             len(t_idx)),
                           index=ix,
                           columns=[self.ids[t] for t in t_idx])
        res.columns.name = 'target'

    return res

BLAST multiple pairs.

PARAMETER DESCRIPTION
pairs
            Tuples of (query_ix, target_ix) to query.

TYPE: tuples

scores
            Which scores to return.

TYPE: "forward" | "mean" | "min" | "max" | "both" DEFAULT: 'forward'

Source code in navis/nbl/base.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def pair_query_target(self, pairs, scores='forward'):
    """BLAST multiple pairs.

    Parameters
    ----------
    pairs :             tuples
                        Tuples of (query_ix, target_ix) to query.
    scores :            "forward" | "mean" | "min" | "max" | "both"
                        Which scores to return.

    """
    # See `multi_query_target` for explanation on progress bars
    scr = []
    for p in config.tqdm_classic(pairs,
                         desc=f'{self.desc} pairs',
                         leave=False,
                         position=getattr(self,
                                          'pbar_position',
                                          None),
                         disable=not self.progress):
        scr.append(self.single_query_target(p[0], p[1], scores=scores))

    return scr

Query single target against single target.

Source code in navis/nbl/base.py
59
60
61
62
@abstractmethod
def single_query_target(self, q_idx, t_idx, scores='forward'):
    """Query single target against single target."""
    pass

Convenience class inheriting from LookupNd for the common 2D float case. Provides IO with pandas DataFrames.

Source code in navis/nbl/smat.py
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
class Lookup2d(LookupNd):
    """Convenience class inheriting from LookupNd for the common 2D float case.
    Provides IO with pandas DataFrames.
    """

    def __init__(self, axis0: Digitizer, axis1: Digitizer, cells: np.ndarray):
        """2D lookup table for convert NBLAST matches to scores.

        Commonly read from a `pandas.DataFrame`
        or trained on data using a `LookupDistDotBuilder`.

        Parameters
        ----------
        axis0 : Digitizer
            How to convert continuous values into an index for the first axis.
        axis1 : Digitizer
            How to convert continuous values into an index for the second axis.
        cells : np.ndarray
            Values to look up in the table.
        """
        super().__init__([axis0, axis1], cells)

    def to_dataframe(self) -> pd.DataFrame:
        """Convert the lookup table into a `pandas.DataFrame`.

        From there, it can be shared, saved, and so on.

        The index and column labels describe the intervals represented by that axis.

        Returns
        -------
        pd.DataFrame
        """
        return pd.DataFrame(
            self.cells,
            self.axes[0].to_strings(),
            self.axes[1].to_strings(),
        )

    @classmethod
    def from_dataframe(cls, df: pd.DataFrame):
        """Parse score matrix from a dataframe with string index and column labels.

        Expects the index and column labels to specify an interval
        like `f"[{{lower}},{{upper}})"`.
        Will replace the lowermost and uppermost bound with -inf and inf
        if they are not already.
        """
        return cls(
            Digitizer.from_strings(df.index),
            Digitizer.from_strings(df.columns),
            df.to_numpy(),
        )

2D lookup table for convert NBLAST matches to scores.

Commonly read from a pandas.DataFrame or trained on data using a LookupDistDotBuilder.

PARAMETER DESCRIPTION
axis0

How to convert continuous values into an index for the first axis.

TYPE: Digitizer

axis1

How to convert continuous values into an index for the second axis.

TYPE: Digitizer

cells

Values to look up in the table.

TYPE: np.ndarray

Source code in navis/nbl/smat.py
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
def __init__(self, axis0: Digitizer, axis1: Digitizer, cells: np.ndarray):
    """2D lookup table for convert NBLAST matches to scores.

    Commonly read from a `pandas.DataFrame`
    or trained on data using a `LookupDistDotBuilder`.

    Parameters
    ----------
    axis0 : Digitizer
        How to convert continuous values into an index for the first axis.
    axis1 : Digitizer
        How to convert continuous values into an index for the second axis.
    cells : np.ndarray
        Values to look up in the table.
    """
    super().__init__([axis0, axis1], cells)

Parse score matrix from a dataframe with string index and column labels.

Expects the index and column labels to specify an interval like f"[{{lower}},{{upper}})". Will replace the lowermost and uppermost bound with -inf and inf if they are not already.

Source code in navis/nbl/smat.py
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
@classmethod
def from_dataframe(cls, df: pd.DataFrame):
    """Parse score matrix from a dataframe with string index and column labels.

    Expects the index and column labels to specify an interval
    like `f"[{{lower}},{{upper}})"`.
    Will replace the lowermost and uppermost bound with -inf and inf
    if they are not already.
    """
    return cls(
        Digitizer.from_strings(df.index),
        Digitizer.from_strings(df.columns),
        df.to_numpy(),
    )

Convert the lookup table into a pandas.DataFrame.

From there, it can be shared, saved, and so on.

The index and column labels describe the intervals represented by that axis.

RETURNS DESCRIPTION
pd.DataFrame
Source code in navis/nbl/smat.py
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
def to_dataframe(self) -> pd.DataFrame:
    """Convert the lookup table into a `pandas.DataFrame`.

    From there, it can be shared, saved, and so on.

    The index and column labels describe the intervals represented by that axis.

    Returns
    -------
    pd.DataFrame
    """
    return pd.DataFrame(
        self.cells,
        self.axes[0].to_strings(),
        self.axes[1].to_strings(),
    )

Implements a version of NBLAST were neurons are first aligned.

Please note that some properties are computed on initialization and changing parameters (e.g. use_alpha) at a later stage will mess things up!

PARAMETER DESCRIPTION
use_alpha
        Whether or not to use alpha values for the scoring.
        If True, the dotproduct of nearest neighbor vectors will
        be scaled by `sqrt(alpha1 * alpha2)`.

TYPE: bool DEFAULT: False

normalized
        If True, will normalize scores by the best possible score
        (i.e. self-self) of the query neuron.

TYPE: bool DEFAULT: True

smat
        How to convert the point match pairs into an NBLAST score,
        usually by a lookup table:
         - if 'auto' (default), will use the "official" NBLAST scoring
           matrices based on FCWB data. Same behaviour as in R's
           nat.nblast implementation.
         - if `smat='v1'` it uses the analytic formulation of the
           NBLAST scoring from Kohl et. al (2013). You can adjust parameter
           `sigma_scaling` (default to 10) using `smat_kwargs`.
         - DataFrames will be used to build a `Lookup2d`
         - if `Callable` given, it passes distance and dot products as
           first and second argument respectively
         - if `smat=None` the scores will be generated as the
           product of the distances and the dotproduct of the vectors
           of nearest-neighbor pairs

TYPE: navis.nbl.smat.Lookup2d | pd.DataFrame | str | Callable DEFAULT: 'auto'

smat_kwargs
        functions. For example: `smat_kwargs["sigma_scoring"] = 10`.

DEFAULT: dict()

limit_dist
        Sets the max distance for the nearest neighbor search
        (`distance_upper_bound`). Typically this should be the
        highest distance considered by the scoring function. If
        "auto", will extract that value from the first axis of the
        scoring matrix.

TYPE: float | "auto" | None DEFAULT: None

progress
        If True, will show a progress bar.

TYPE: bool DEFAULT: True

Source code in navis/nbl/ablast_funcs.py
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
class NBlasterAlign(Blaster):
    """Implements a version of NBLAST were neurons are first aligned.

    Please note that some properties are computed on initialization and
    changing parameters (e.g. `use_alpha`) at a later stage will mess things
    up!

    Parameters
    ----------
    use_alpha :     bool
                    Whether or not to use alpha values for the scoring.
                    If True, the dotproduct of nearest neighbor vectors will
                    be scaled by `sqrt(alpha1 * alpha2)`.
    normalized :    bool
                    If True, will normalize scores by the best possible score
                    (i.e. self-self) of the query neuron.
    smat :          navis.nbl.smat.Lookup2d | pd.DataFrame | str | Callable
                    How to convert the point match pairs into an NBLAST score,
                    usually by a lookup table:
                     - if 'auto' (default), will use the "official" NBLAST scoring
                       matrices based on FCWB data. Same behaviour as in R's
                       nat.nblast implementation.
                     - if `smat='v1'` it uses the analytic formulation of the
                       NBLAST scoring from Kohl et. al (2013). You can adjust parameter
                       `sigma_scaling` (default to 10) using `smat_kwargs`.
                     - DataFrames will be used to build a `Lookup2d`
                     - if `Callable` given, it passes distance and dot products as
                       first and second argument respectively
                     - if `smat=None` the scores will be generated as the
                       product of the distances and the dotproduct of the vectors
                       of nearest-neighbor pairs
    smat_kwargs:    Dictionary with additional parameters passed to scoring
                    functions. For example: `smat_kwargs["sigma_scoring"] = 10`.
    limit_dist :    float | "auto" | None
                    Sets the max distance for the nearest neighbor search
                    (`distance_upper_bound`). Typically this should be the
                    highest distance considered by the scoring function. If
                    "auto", will extract that value from the first axis of the
                    scoring matrix.
    progress :      bool
                    If True, will show a progress bar.

    """

    def __init__(self,
                 align_func, two_way_align=True, sample_align=None,
                 use_alpha=False, normalized=True, smat='auto',
                 limit_dist=None, approx_nn=False, dtype=np.float64,
                 progress=True,
                 smat_kwargs=dict(),
                 align_kwargs=dict(),
                 dotprop_kwargs=dict(),
                 ):
        """Initialize class."""
        super().__init__(progress=progress, dtype=dtype)
        self.align_func = align_func
        self.two_way_align = two_way_align
        self.sample_align = sample_align
        self.use_alpha = use_alpha
        self.normalized = normalized
        self.approx_nn = approx_nn
        self.dotprop_kwargs = dotprop_kwargs
        self.align_kwargs = align_kwargs
        self.desc = "NBlasting"
        self.self_hits = {}
        self.dotprops = {}
        self.neurons = []

        if smat is None:
            self.score_fn = operator.mul
        elif smat == 'auto':
            from ..nbl.smat import smat_fcwb
            self.score_fn = smat_fcwb(self.use_alpha)
        elif smat == 'v1':
            self.score_fn = partial(
                _nblast_v1_scoring, sigma_scoring = smat_kwargs.get('sigma_scoring', 10)
            )
        elif isinstance(smat, pd.DataFrame):
            self.score_fn = Lookup2d.from_dataframe(smat)
        else:
            self.score_fn = smat

        if limit_dist == "auto":
            try:
                if self.score_fn.axes[0].boundaries[-1] != np.inf:
                    self.distance_upper_bound = self.score_fn.axes[0].boundaries[-1]
                else:
                    # If the right boundary is open (i.e. infinity), we will use
                    # the second highest boundary plus a 5% offset
                    self.distance_upper_bound = self.score_fn.axes[0].boundaries[-2] * 1.05
            except AttributeError:
                logger.warning("Could not infer distance upper bound from scoring function")
                self.distance_upper_bound = None
        else:
            self.distance_upper_bound = limit_dist

    def append(self, neuron: core.BaseNeuron) -> NestedIndices:
        """Append neurons.

        Returns the numerical index appended neurons.
        If neurons is a (possibly nested) sequence of neurons,
        return a (possibly nested) list of indices.

        Note that `self_hit` is ignored (and hence calculated from scratch)
        when `neurons` is a nested list of dotprops.
        """
        if isinstance(neuron, core.BaseNeuron):
            return self._append_neuron(neuron)

        try:
            return [self.append(n) for n in neuron]
        except TypeError:  # i.e. not iterable
            raise ValueError(f"Expected Neuron or iterable thereof; got {type(neuron)}")

    def _append_neuron(self, neuron: core.BaseNeuron) -> int:
        next_id = len(self)
        self.neurons.append(neuron)
        self.ids.append(neuron.id)
        return next_id

    def get_dotprop(self, ix):
        if ix not in self.dotprops:
            if not isinstance(self.neurons[ix], core.Dotprops):
                self.dotprops[ix] = core.make_dotprops(self.neurons[ix],
                                                    **self.dotprop_kwargs)
            else:
                self.dotprops[ix] = self.neurons[ix]
        return self.dotprops[ix]

    def get_self_hit(self, ix):
        if ix not in self.self_hits:
            self.self_hits[ix] = self.calc_self_hit(self.get_dotprop(ix))
        return self.self_hits[ix]

    def calc_self_hit(self, dotprops):
        """Non-normalized value for self hit."""
        if not self.use_alpha:
            return len(dotprops.points) * self.score_fn(0, 1.0)
        else:
            dists = np.repeat(0, len(dotprops.points))
            alpha = dotprops.alpha * dotprops.alpha
            dots = np.repeat(1, len(dotprops.points)) * np.sqrt(alpha)
            return self.score_fn(dists, dots).sum()

    def score_single(self, q_dp, t_dp, q_idx):
        """Calculate score for single query/target dotprop pair."""
        # Run nearest-neighbor search for query against target
        data = q_dp.dist_dots(t_dp,
                              alpha=self.use_alpha,
                              # eps=0.1 means we accept 10% inaccuracy
                              eps=.1 if self.approx_nn else 0,
                              distance_upper_bound=self.distance_upper_bound)

        if self.use_alpha:
            dists, dots, alpha = data
            dots *= np.sqrt(alpha)
        else:
            dists, dots = data

        scr = self.score_fn(dists, dots).sum()

        # Normalize against best hit
        if self.normalized:
            scr /= self.get_self_hit(q_idx)

        return scr

    def single_query_target(self, q_idx: int, t_idx: int, scores='forward',
                            allow_rev_align=True):
        """Query single query against single target."""
        # Take a short-cut if this is a self-self comparison
        if q_idx == t_idx:
            if self.normalized:
                return 1
            return self.get_self_hit(q_idx)

        # Align the query to the target
        q_xf = self.align_func(self.neurons[q_idx],
                               target=self.neurons[t_idx],
                               sample=self.sample_align,
                               progress=False,
                               **self.align_kwargs)[0][0]

        # The query must always be made into new dotprops because it has been
        # moved around
        q_dp = core.make_dotprops(q_xf, **self.dotprop_kwargs)

        # The target dotprop has to be compute only once
        t_dp = self.get_dotprop(t_idx)

        scr = self.score_single(q_dp, t_dp, q_idx)

        # For the mean score we also have to produce the reverse score
        if scores in ('mean', 'min', 'max', 'both'):
            reverse = self.score_single(t_dp, q_dp, t_idx)
            if scores == 'mean':
                scr = (scr + reverse) / 2
            elif scores == 'min':
                scr = min(scr, reverse)
            elif scores == 'max':
                scr = max(scr, reverse)
            elif scores == 'both':
                # If both scores are requested
                scr = [scr, reverse]

        if self.two_way_align and allow_rev_align:
            rev = self.single_query_target(t_idx, q_idx, scores=scores,
                                           allow_rev_align=False)
            scr = (scr + rev) / 2

        return scr

Initialize class.

Source code in navis/nbl/ablast_funcs.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
def __init__(self,
             align_func, two_way_align=True, sample_align=None,
             use_alpha=False, normalized=True, smat='auto',
             limit_dist=None, approx_nn=False, dtype=np.float64,
             progress=True,
             smat_kwargs=dict(),
             align_kwargs=dict(),
             dotprop_kwargs=dict(),
             ):
    """Initialize class."""
    super().__init__(progress=progress, dtype=dtype)
    self.align_func = align_func
    self.two_way_align = two_way_align
    self.sample_align = sample_align
    self.use_alpha = use_alpha
    self.normalized = normalized
    self.approx_nn = approx_nn
    self.dotprop_kwargs = dotprop_kwargs
    self.align_kwargs = align_kwargs
    self.desc = "NBlasting"
    self.self_hits = {}
    self.dotprops = {}
    self.neurons = []

    if smat is None:
        self.score_fn = operator.mul
    elif smat == 'auto':
        from ..nbl.smat import smat_fcwb
        self.score_fn = smat_fcwb(self.use_alpha)
    elif smat == 'v1':
        self.score_fn = partial(
            _nblast_v1_scoring, sigma_scoring = smat_kwargs.get('sigma_scoring', 10)
        )
    elif isinstance(smat, pd.DataFrame):
        self.score_fn = Lookup2d.from_dataframe(smat)
    else:
        self.score_fn = smat

    if limit_dist == "auto":
        try:
            if self.score_fn.axes[0].boundaries[-1] != np.inf:
                self.distance_upper_bound = self.score_fn.axes[0].boundaries[-1]
            else:
                # If the right boundary is open (i.e. infinity), we will use
                # the second highest boundary plus a 5% offset
                self.distance_upper_bound = self.score_fn.axes[0].boundaries[-2] * 1.05
        except AttributeError:
            logger.warning("Could not infer distance upper bound from scoring function")
            self.distance_upper_bound = None
    else:
        self.distance_upper_bound = limit_dist

Append neurons.

Returns the numerical index appended neurons. If neurons is a (possibly nested) sequence of neurons, return a (possibly nested) list of indices.

Note that self_hit is ignored (and hence calculated from scratch) when neurons is a nested list of dotprops.

Source code in navis/nbl/ablast_funcs.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def append(self, neuron: core.BaseNeuron) -> NestedIndices:
    """Append neurons.

    Returns the numerical index appended neurons.
    If neurons is a (possibly nested) sequence of neurons,
    return a (possibly nested) list of indices.

    Note that `self_hit` is ignored (and hence calculated from scratch)
    when `neurons` is a nested list of dotprops.
    """
    if isinstance(neuron, core.BaseNeuron):
        return self._append_neuron(neuron)

    try:
        return [self.append(n) for n in neuron]
    except TypeError:  # i.e. not iterable
        raise ValueError(f"Expected Neuron or iterable thereof; got {type(neuron)}")

Non-normalized value for self hit.

Source code in navis/nbl/ablast_funcs.py
180
181
182
183
184
185
186
187
188
def calc_self_hit(self, dotprops):
    """Non-normalized value for self hit."""
    if not self.use_alpha:
        return len(dotprops.points) * self.score_fn(0, 1.0)
    else:
        dists = np.repeat(0, len(dotprops.points))
        alpha = dotprops.alpha * dotprops.alpha
        dots = np.repeat(1, len(dotprops.points)) * np.sqrt(alpha)
        return self.score_fn(dists, dots).sum()

Calculate score for single query/target dotprop pair.

Source code in navis/nbl/ablast_funcs.py
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
def score_single(self, q_dp, t_dp, q_idx):
    """Calculate score for single query/target dotprop pair."""
    # Run nearest-neighbor search for query against target
    data = q_dp.dist_dots(t_dp,
                          alpha=self.use_alpha,
                          # eps=0.1 means we accept 10% inaccuracy
                          eps=.1 if self.approx_nn else 0,
                          distance_upper_bound=self.distance_upper_bound)

    if self.use_alpha:
        dists, dots, alpha = data
        dots *= np.sqrt(alpha)
    else:
        dists, dots = data

    scr = self.score_fn(dists, dots).sum()

    # Normalize against best hit
    if self.normalized:
        scr /= self.get_self_hit(q_idx)

    return scr

Query single query against single target.

Source code in navis/nbl/ablast_funcs.py
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
def single_query_target(self, q_idx: int, t_idx: int, scores='forward',
                        allow_rev_align=True):
    """Query single query against single target."""
    # Take a short-cut if this is a self-self comparison
    if q_idx == t_idx:
        if self.normalized:
            return 1
        return self.get_self_hit(q_idx)

    # Align the query to the target
    q_xf = self.align_func(self.neurons[q_idx],
                           target=self.neurons[t_idx],
                           sample=self.sample_align,
                           progress=False,
                           **self.align_kwargs)[0][0]

    # The query must always be made into new dotprops because it has been
    # moved around
    q_dp = core.make_dotprops(q_xf, **self.dotprop_kwargs)

    # The target dotprop has to be compute only once
    t_dp = self.get_dotprop(t_idx)

    scr = self.score_single(q_dp, t_dp, q_idx)

    # For the mean score we also have to produce the reverse score
    if scores in ('mean', 'min', 'max', 'both'):
        reverse = self.score_single(t_dp, q_dp, t_idx)
        if scores == 'mean':
            scr = (scr + reverse) / 2
        elif scores == 'min':
            scr = min(scr, reverse)
        elif scores == 'max':
            scr = max(scr, reverse)
        elif scores == 'both':
            # If both scores are requested
            scr = [scr, reverse]

    if self.two_way_align and allow_rev_align:
        rev = self.single_query_target(t_idx, q_idx, scores=scores,
                                       allow_rev_align=False)
        scr = (scr + rev) / 2

    return scr

Find an optimal partition for given NBLAST query.

PARAMETER DESCRIPTION
N_cores
    Number of available cores.

TYPE: int

q
    Query and targets, respectively.

RETURNS DESCRIPTION
(n_rows, n_cols)
Source code in navis/nbl/nblast_funcs.py
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
def find_optimal_partition(N_cores, q, t):
    """Find an optimal partition for given NBLAST query.

    Parameters
    ----------
    N_cores :   int
                Number of available cores.
    q,t :       NeuronList of Dotprops
                Query and targets, respectively.

    Returns
    -------
    n_rows, n_cols

    """
    neurons_per_query = []
    for n_rows in range(1, N_cores + 1):
        # Skip splits we can't make
        if N_cores % n_rows:
            continue
        if n_rows > len(q):
            continue

        n_cols = min(int(N_cores / n_rows), len(t))

        n_queries = len(q) / n_rows
        n_targets = len(t) / n_cols

        neurons_per_query.append([n_rows, n_cols, n_queries + n_targets])

    # Find the optimal partition
    neurons_per_query = np.array(neurons_per_query)
    n_rows, n_cols = neurons_per_query[np.argmin(neurons_per_query[:, 2]), :2]

    return int(n_rows), int(n_cols)

Run preflight checks for NBLAST.

Source code in navis/nbl/nblast_funcs.py
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
def nblast_preflight(query, target, n_cores, batch_size=None,
                     req_unique_ids=False, req_dotprops=True, req_points=True,
                     req_microns=True):
    """Run preflight checks for NBLAST."""
    if req_dotprops:
        if query.types != (Dotprops, ):
            raise TypeError(f'`query` must be Dotprop(s), got "{query.types}". '
                            'Use `navis.make_dotprops` to convert neurons.')

        if target.types != (Dotprops, ):
            raise TypeError(f'`target` must be Dotprop(s), got "{target.types}". '
                            'Use `navis.make_dotprops` to convert neurons.')

        if req_points:
            no_points = query.n_points == 0
            if any(no_points):
                raise ValueError('Some query dotprops appear to have no points: '
                                 f'{query.id[no_points]}')
            no_points = target.n_points == 0
            if any(no_points):
                raise ValueError('Some target dotprops appear to have no points: '
                                 f'{target.id[no_points]}')

    if req_unique_ids:
        # At the moment, neurons need to have a unique ID for things to work
        if query.is_degenerated:
            raise ValueError('Queries have non-unique IDs.')
        if target.is_degenerated:
            raise ValueError('Targets have non-unique IDs.')

    # Check if query or targets are in microns
    # Note this test can return `None` if it can't be determined
    if req_microns:
        if check_microns(query) is False:
            logger.warning('NBLAST is optimized for data in microns and it looks '
                           'like your queries are not in microns.')
        if check_microns(target) is False:
            logger.warning('NBLAST is optimized for data in microns and it looks '
                           'like your targets are not in microns.')

    if not isinstance(n_cores, numbers.Number) or n_cores < 1:
        raise TypeError('`n_cores` must be an integer > 0')

    n_cores = int(n_cores)
    if n_cores > 1 and n_cores % 2:
        logger.warning('NBLAST is most efficient if `n_cores` is an even number')
    elif n_cores < 1:
        raise ValueError('`n_cores` must not be smaller than 1')
    elif n_cores > os.cpu_count():
        logger.warning('`n_cores` should not larger than the number of '
                       'available cores')

    if batch_size is not None:
        if batch_size <= 0:
            raise ValueError('`batch_size` must be >= 1 or `None`.')

Set OMP_NUM_THREADS flag to given value.

This is to avoid pykdtree causing multiple layers of concurrency which will over-subcribe and slow down NBLAST on multi-core systems.

Use as context manager!

Source code in navis/nbl/nblast_funcs.py
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
def set_omp_flag(limits=1):
    """Set OMP_NUM_THREADS flag to given value.

    This is to avoid pykdtree causing multiple layers of concurrency which
    will over-subcribe and slow down NBLAST on multi-core systems.

    Use as context manager!
    """
    class OMPSetter:
        def __init__(self, num_threads):
            assert isinstance(num_threads, (int, type(None)))
            self.num_threads = num_threads

        def __enter__(self):
            if self.num_threads is None:
                return
            # Track old value (if there is one)
            self.old_value = os.environ.get('OMP_NUM_THREADS', None)
            # Set flag
            os.environ['OMP_NUM_THREADS'] = str(self.num_threads)

        def __exit__(self, *args, **kwargs):
            if self.num_threads is None:
                return

            # Reset flag
            if self.old_value:
                os.environ['OMP_NUM_THREADS'] = str(self.old_value)
            else:
                os.environ.pop('OMP_NUM_THREADS', None)

    return OMPSetter(limits)