Skip to content

synblast_funcs

`navis.nbl.synblast_funcs.SynBlaster` #

Implements a synapsed-based NBLAST algorithm.

Please note that some properties are computed on initialization and changing parameters at a later stage will mess things up!

TODOs

implement use_alpha as average synapse density (i.e. emphasize areas where a neuron has lots of synapses)

PARAMETER	DESCRIPTION
`normalized`	`If True, will normalize scores by the best possible score (i.e. self-self) of the query neuron.` TYPE: `bool` DEFAULT: `True`
`by_type`	`If True will only compare synapses with the same value in the "type" column.` TYPE: `bool` DEFAULT: `True`
`smat`	How to convert the point match pairs into an NBLAST score, usually by a lookup table. If 'auto' (default), will use scoring matrices from FCWB. Same behaviour as in R's nat.nblast implementation. Dataframes will be used to build a `Lookup2d`. If `smat=None` the scores will be generated as the product of the distances and the dotproduct of the vectors of nearest-neighbor pairs. TYPE: `navis.nbl.smat.Lookup2d \| pd.DataFrame \| str` DEFAULT: `'auto'`
`progress`	`If True, will show a progress bar.` TYPE: `bool` DEFAULT: `True`

Source code in navis/nbl/synblast_funcs.py

class SynBlaster(Blaster):
    """Implements a synapsed-based NBLAST algorithm.

    Please note that some properties are computed on initialization and
    changing parameters at a later stage will mess things up!

    TODOs
    -----
    - implement `use_alpha` as average synapse density (i.e. emphasize areas
      where a neuron has lots of synapses)

    Parameters
    ----------
    normalized :    bool
                    If True, will normalize scores by the best possible score
                    (i.e. self-self) of the query neuron.
    by_type :       bool
                    If True will only compare synapses with the same value in
                    the "type" column.
    smat :          navis.nbl.smat.Lookup2d | pd.DataFrame | str
                    How to convert the point match pairs into an NBLAST score,
                    usually by a lookup table.
                    If 'auto' (default), will use scoring matrices
                    from FCWB. Same behaviour as in R's nat.nblast
                    implementation.
                    Dataframes will be used to build a `Lookup2d`.
                    If `smat=None` the scores will be
                    generated as the product of the distances and the dotproduct
                    of the vectors of nearest-neighbor pairs.
    progress :      bool
                    If True, will show a progress bar.

    """

    def __init__(self, normalized=True, by_type=True,
                 smat='auto', progress=True):
        """Initialize class."""
        super().__init__(progress=progress)
        self.normalized = normalized
        self.by_type = by_type

        if smat is None:
            self.score_fn = operator.mul
        elif smat == 'auto':
            self.score_fn = smat_fcwb()
        elif isinstance(smat, pd.DataFrame):
            self.score_fn = Lookup2d.from_dataframe(smat)
        else:
            self.score_fn = smat

        self.ids = []

    def append(self, neuron, id=None, self_hit=None) -> NestedIndices:
        """Append neurons/connector tables, returning numerical indices of added objects.

        Note that `self_hit` is ignored and hence calculated from scratch
        when `neuron` is a (nested) list of neurons.
        """
        if isinstance(neuron, pd.DataFrame):
            return self._append_connectors(neuron, id, self_hit=self_hit)

        if isinstance(neuron, BaseNeuron):
            if not neuron.has_connectors:
                raise ValueError('Neuron must have synapses')
            return self._append_connectors(neuron.connectors, neuron.id, self_hit=self_hit)

        try:
            return [self.append(n) for n in neuron]
        except TypeError:
            raise ValueError(
                "Expected a dataframe, or a Neuron or sequence thereof; got "
                f"{type(neuron)}"
            )

    def _append_connectors(self, connectors: pd.DataFrame, id, self_hit=None) -> int:
        if id is None:
            raise ValueError("Explicit non-None id required for appending connectors")

        next_idx = len(self)
        self.ids.append(id)
        self.neurons.append({})
        if not self.by_type:
            data = connectors[['x', 'y', 'z']].values
            # Generate the KDTree
            self.neurons[-1]['all'] = data # KDTree(data)
        else:
            if 'type' not in connectors.columns:
                raise ValueError('Connector tables must have a "type" column '
                                 'if `by_type=True`')
            for ty in connectors['type'].unique():
                data = connectors.loc[connectors['type'] == ty, ['x', 'y', 'z']].values
                # Generate the KDTree
                self.neurons[-1][ty] = data # KDTree(data)

        # Calculate score for self hit if required
        if not self_hit:
            self_hit = self.calc_self_hit(connectors)
        self.self_hits.append(self_hit)

        return next_idx

    def _build_trees(self):
        """Build KDTree for all neurons."""
        for i, n in enumerate(self.neurons):
            for ty, data in n.items():
                n[ty] = KDTree(data)

    def calc_self_hit(self, cn):
        """Non-normalized value for self hit."""
        return cn.shape[0] * self.score_fn(0, 1)

    def single_query_target(self, q_idx: int, t_idx: int, scores='forward'):
        """Query single target against single target."""
        # Take a short-cut if this is a self-self comparison
        if q_idx == t_idx:
            if self.normalized:
                return 1
            return self.self_hits[q_idx]

        # Run nearest-neighbor search for query against target
        t_trees = self.neurons[t_idx]
        q_trees = self.neurons[q_idx]

        dists = []
        # Go over all connector types (e.g. "pre" and "post") present in the
        # query neuron. If `by_type=False`, there will be a single tree simply
        # called "all"
        for ty, qt in q_trees.items():
            # If target does not have this type of connectors
            if ty not in t_trees:
                # Note that this infinite distance will simply get the worst
                # score possible in the scoring function
                dists = np.append(dists, [self.score_fn.max_dist] * qt.data.shape[0])
            else:
                # Note: we're building the trees lazily here once we actually need them.
                # The main reason is that pykdtree is not picklable and hence
                # we can't pass it to the worker processes.
                if not isinstance(t_trees[ty], KDTree):
                    t_trees[ty] = KDTree(t_trees[ty])

                tt = t_trees[ty]
                if isinstance(qt, KDTree):
                    data = qt.data
                else:
                    data = qt

                # pykdtree tracks data as flat array
                if data.ndim == 1:
                    data = data.reshape((qt.n, qt.ndim))
                dists = np.append(dists, tt.query(data)[0])

        # We use the same scoring function as for normal NBLAST but ignore the
        # vector dotproduct component
        scr = self.score_fn(dists, 1).sum()

        # Normalize against best possible hit (self hit)
        if self.normalized:
            scr /= self.self_hits[q_idx]

        # For the mean score we also have to produce the reverse score
        if scores in ('mean', 'min', 'max'):
            reverse = self.single_query_target(t_idx, q_idx, scores='forward')
            if scores == 'mean':
                scr = (scr + reverse) / 2
            elif scores == 'min':
                scr = min(scr, reverse)
            elif scores == 'max':
                scr = max(scr, reverse)

        return scr

`init` #

Initialize class.

Source code in navis/nbl/synblast_funcs.py

def __init__(self, normalized=True, by_type=True,
             smat='auto', progress=True):
    """Initialize class."""
    super().__init__(progress=progress)
    self.normalized = normalized
    self.by_type = by_type

    if smat is None:
        self.score_fn = operator.mul
    elif smat == 'auto':
        self.score_fn = smat_fcwb()
    elif isinstance(smat, pd.DataFrame):
        self.score_fn = Lookup2d.from_dataframe(smat)
    else:
        self.score_fn = smat

    self.ids = []

`append` #

Append neurons/connector tables, returning numerical indices of added objects.

Note that self_hit is ignored and hence calculated from scratch when neuron is a (nested) list of neurons.

Source code in navis/nbl/synblast_funcs.py

def append(self, neuron, id=None, self_hit=None) -> NestedIndices:
    """Append neurons/connector tables, returning numerical indices of added objects.

    Note that `self_hit` is ignored and hence calculated from scratch
    when `neuron` is a (nested) list of neurons.
    """
    if isinstance(neuron, pd.DataFrame):
        return self._append_connectors(neuron, id, self_hit=self_hit)

    if isinstance(neuron, BaseNeuron):
        if not neuron.has_connectors:
            raise ValueError('Neuron must have synapses')
        return self._append_connectors(neuron.connectors, neuron.id, self_hit=self_hit)

    try:
        return [self.append(n) for n in neuron]
    except TypeError:
        raise ValueError(
            "Expected a dataframe, or a Neuron or sequence thereof; got "
            f"{type(neuron)}"
        )

`calc_self_hit` #

Non-normalized value for self hit.

Source code in navis/nbl/synblast_funcs.py

def calc_self_hit(self, cn):
    """Non-normalized value for self hit."""
    return cn.shape[0] * self.score_fn(0, 1)

`single_query_target` #

Query single target against single target.

Source code in navis/nbl/synblast_funcs.py

def single_query_target(self, q_idx: int, t_idx: int, scores='forward'):
    """Query single target against single target."""
    # Take a short-cut if this is a self-self comparison
    if q_idx == t_idx:
        if self.normalized:
            return 1
        return self.self_hits[q_idx]

    # Run nearest-neighbor search for query against target
    t_trees = self.neurons[t_idx]
    q_trees = self.neurons[q_idx]

    dists = []
    # Go over all connector types (e.g. "pre" and "post") present in the
    # query neuron. If `by_type=False`, there will be a single tree simply
    # called "all"
    for ty, qt in q_trees.items():
        # If target does not have this type of connectors
        if ty not in t_trees:
            # Note that this infinite distance will simply get the worst
            # score possible in the scoring function
            dists = np.append(dists, [self.score_fn.max_dist] * qt.data.shape[0])
        else:
            # Note: we're building the trees lazily here once we actually need them.
            # The main reason is that pykdtree is not picklable and hence
            # we can't pass it to the worker processes.
            if not isinstance(t_trees[ty], KDTree):
                t_trees[ty] = KDTree(t_trees[ty])

            tt = t_trees[ty]
            if isinstance(qt, KDTree):
                data = qt.data
            else:
                data = qt

            # pykdtree tracks data as flat array
            if data.ndim == 1:
                data = data.reshape((qt.n, qt.ndim))
            dists = np.append(dists, tt.query(data)[0])

    # We use the same scoring function as for normal NBLAST but ignore the
    # vector dotproduct component
    scr = self.score_fn(dists, 1).sum()

    # Normalize against best possible hit (self hit)
    if self.normalized:
        scr /= self.self_hits[q_idx]

    # For the mean score we also have to produce the reverse score
    if scores in ('mean', 'min', 'max'):
        reverse = self.single_query_target(t_idx, q_idx, scores='forward')
        if scores == 'mean':
            scr = (scr + reverse) / 2
        elif scores == 'min':
            scr = min(scr, reverse)
        elif scores == 'max':
            scr = max(scr, reverse)

    return scr

`navis.nbl.synblast_funcs.find_batch_partition` #

Find partitions such that each batch takes about T seconds.

Source code in navis/nbl/synblast_funcs.py

def find_batch_partition(q, t, T=10):
    """Find partitions such that each batch takes about `T` seconds."""
    # Get a median-sized query and target
    q_ix = np.argsort(q.n_connectors)[len(q)//2]
    t_ix = np.argsort(t.n_connectors)[len(t)//2]

    # Generate the KDTree
    tree = KDTree(q[q_ix].connectors[['x', 'y', 'z']].values)

    # Run a quick single query benchmark
    timings = []
    for i in range(10):  # Run 10 tests
        s = time.time()
        _ = tree.query(t[t_ix].connectors[['x', 'y', 'z']].values)  #  ignoring scoring / normalizing
        timings.append(time.time() - s)
    time_per_query = min(timings)  # seconds per medium sized query

    # Number of queries per job such that each job runs in `T` second
    queries_per_batch = T / time_per_query

    # Number of neurons per batch
    neurons_per_batch  = max(1, int(np.sqrt(queries_per_batch)))

    n_rows = len(q) // neurons_per_batch
    n_cols = len(t) // neurons_per_batch

    return max(1, n_rows), max(1, n_cols)