Source code for elephant.spike_train_dissimilarity

# -*- coding: utf-8 -*-
"""
In neuroscience one often wants to evaluate, how similar or dissimilar pairs
or even large sets of spiketrains are. For this purpose various different
spike train dissimilarity measures were introduced in the literature.
They differ, e.g., by the properties of having the mathematical properties of
a metric or by being time-scale dependent or not. Well known representatives
of spike train dissimilarity measures are the Victor-Purpura distance and the
Van Rossum distance implemented in this module, which both are metrics in the
mathematical sense and time-scale dependent.

:copyright: Copyright 2016 by the Elephant team, see AUTHORS.txt.
:license: Modified BSD, see LICENSE.txt for details.
"""

import quantities as pq
import numpy as np
import scipy as sp
import elephant.kernels as kernels
from neo.core import SpikeTrain

# Problem of conversion from Python 2 to Python 3:
# 'xrange' in Python 2 is 'range' in Python 3.
try:
    xrange
except NameError:
    xrange = range


def _create_matrix_from_indexed_function(
        shape, func, symmetric_2d=False, **func_params):
    mat = np.empty(shape)
    if symmetric_2d:
        for i in xrange(shape[0]):
            for j in xrange(i, shape[1]):
                mat[i, j] = mat[j, i] = func(i, j, **func_params)
    else:
        for idx in np.ndindex(*shape):
            mat[idx] = func(*idx, **func_params)
    return mat


[docs]def victor_purpura_dist( trains, q=1.0 * pq.Hz, kernel=None, sort=True, algorithm='fast'): """ Calculates the Victor-Purpura's (VP) distance. It is often denoted as :math:`D^{\\text{spike}}[q]`. It is defined as the minimal cost of transforming spike train `a` into spike train `b` by using the following operations: * Inserting or deleting a spike (cost 1.0). * Shifting a spike from :math:`t` to :math:`t'` (cost :math:`q \\cdot |t - t'|`). A detailed description can be found in *Victor, J. D., & Purpura, K. P. (1996). Nature and precision of temporal coding in visual cortex: a metric-space analysis. Journal of Neurophysiology.* Given the average number of spikes :math:`n` in a spike train and :math:`N` spike trains the run-time complexity of this function is :math:`O(N^2 n^2)` and :math:`O(N^2 + n^2)` memory will be needed. Parameters ---------- trains : Sequence of :class:`neo.core.SpikeTrain` objects of which the distance will be calculated pairwise. q: Quantity scalar Cost factor for spike shifts as inverse time scalar. Extreme values :math:`q=0` meaning no cost for any shift of spikes, or :math: `q=np.inf` meaning infinite cost for any spike shift and hence exclusion of spike shifts, are explicitly allowed. If `kernel` is not `None`, :math:`q` will be ignored. Default: 1.0 * pq.Hz kernel: :class:`.kernels.Kernel` Kernel to use in the calculation of the distance. If `kernel` is `None`, an unnormalized triangular kernel with standard deviation of :math:'2.0/(q * sqrt(6.0))' corresponding to a half width of :math:`2.0/q` will be used. Usage of the default value calculates the Victor-Purpura distance correctly with a triangular kernel of the suitable width. The choice of another kernel is enabled, but this leaves the framework of Victor-Purpura distances. Default: None sort: bool Spike trains with sorted spike times will be needed for the calculation. You can set `sort` to `False` if you know that your spike trains are already sorted to decrease calculation time. Default: True algorithm: string Allowed values are 'fast' or 'intuitive', each selecting an algorithm with which to calculate the pairwise Victor-Purpura distance. Typically 'fast' should be used, because while giving always the same result as 'intuitive', within the temporary structure of Python and add-on modules as numpy it is faster. Default: 'fast' Returns ------- 2-D array Matrix containing the VP distance of all pairs of spike trains. Example ------- import elephant.spike_train_dissimilarity_measures as stdm q = 1.0 / (10.0 * pq.ms) st_a = SpikeTrain([10, 20, 30], units='ms', t_stop= 1000.0) st_b = SpikeTrain([12, 24, 30], units='ms', t_stop= 1000.0) vp_f = stdm.victor_purpura_dist([st_a, st_b], q)[0, 1] vp_i = stdm.victor_purpura_dist( [st_a, st_b], q, algorithm='intuitive')[0, 1] """ for train in trains: if not (isinstance(train, (pq.quantity.Quantity, SpikeTrain)) and train.dimensionality.simplified == pq.Quantity(1, "s").dimensionality.simplified): raise TypeError("Spike trains must have a time unit.") if not (isinstance(q, pq.quantity.Quantity) and q.dimensionality.simplified == pq.Quantity(1, "Hz").dimensionality.simplified): raise TypeError("q must be a rate quantity.") if kernel is None: if q == 0.0: num_spikes = np.atleast_2d([st.size for st in trains]) return np.absolute(num_spikes.T - num_spikes) elif q == np.inf: num_spikes = np.atleast_2d([st.size for st in trains]) return num_spikes.T + num_spikes else: kernel = kernels.TriangularKernel(2.0 / (np.sqrt(6.0) * q)) if sort: trains = [np.sort(st.view(type=pq.Quantity)) for st in trains] def compute(i, j): if i == j: return 0.0 else: if algorithm == 'fast': return _victor_purpura_dist_for_st_pair_fast( trains[i], trains[j], kernel) elif algorithm == 'intuitive': return _victor_purpura_dist_for_st_pair_intuitive( trains[i], trains[j], q) else: raise NameError("algorithm must be either 'fast' " "or 'intuitive'.") return _create_matrix_from_indexed_function( (len(trains), len(trains)), compute, kernel.is_symmetric())
def _victor_purpura_dist_for_st_pair_fast(train_a, train_b, kernel): """ The algorithm used is based on the one given in J. D. Victor and K. P. Purpura, Nature and precision of temporal coding in visual cortex: a metric-space analysis, Journal of Neurophysiology, 1996. It constructs a matrix G[i, j] containing the minimal cost when only considering the first i and j spikes of the spike trains. However, one never needs to store more than one row and one column at the same time for calculating the VP distance. cost[0, :cost.shape[1] - i] corresponds to G[i:, i]. In the same way cost[1, :cost.shape[1] - i] corresponds to G[i, i:]. Moreover, the minimum operation on the costs of the three kind of actions (delete, insert or move spike) can be split up in two operations. One operation depends only on the already calculated costs and kernel evaluation (insertion of spike vs moving a spike). The other minimum depends on that result and the cost of deleting a spike. This operation always depends on the last calculated element in the cost array and corresponds to a recursive application of f(accumulated_min[i]) = min(f(accumulated_min[i-1]), accumulated_min[i]) + 1. That '+1' can be excluded from this function if the summed value for all recursive applications is added upfront to accumulated_min. Afterwards it has to be removed again except one for the currently processed spike to get the real costs up to the evaluation of i. All currently calculated costs will be considered -1 because this saves a number of additions as in most cases the cost would be increased by exactly one (the only exception is shifting, but in that calculation is already the addition of a constant involved, thus leaving the number of operations the same). The increase by one will be added after calculating all minima by shifting decreasing_sequence by one when removing it from accumulated_min. Parameters ---------- train_a, train_b : :class:`neo.core.SpikeTrain` objects of which the Victor-Purpura distance will be calculated pairwise. kernel: :class:`.kernels.Kernel` Kernel to use in the calculation of the distance. Returns ------- float The Victor-Purpura distance of train_a and train_b """ if train_a.size <= 0 or train_b.size <= 0: return max(train_a.size, train_b.size) if train_a.size < train_b.size: train_a, train_b = train_b, train_a min_dim, max_dim = train_b.size, train_a.size + 1 cost = np.asfortranarray(np.tile(np.arange(float(max_dim)), (2, 1))) decreasing_sequence = np.asfortranarray(cost[:, ::-1]) kern = kernel((np.atleast_2d(train_a).T.view(type=pq.Quantity) - train_b.view(type=pq.Quantity))) as_fortran = np.asfortranarray( ((np.sqrt(6.0) * kernel.sigma) * kern).simplified) k = 1 - 2 * as_fortran for i in xrange(min_dim): # determine G[i, i] == accumulated_min[:, 0] accumulated_min = cost[:, :-i - 1] + k[i:, i] accumulated_min[1, :train_b.size - i] = \ cost[1, :train_b.size - i] + k[i, i:] accumulated_min = np.minimum( accumulated_min, # shift cost[:, 1:max_dim - i]) # insert acc_dim = accumulated_min.shape[1] # delete vs min(insert, shift) accumulated_min[:, 0] = min(cost[1, 1], accumulated_min[0, 0]) # determine G[i, :] and G[:, i] by propagating minima. accumulated_min += decreasing_sequence[:, -acc_dim - 1:-1] accumulated_min = np.minimum.accumulate(accumulated_min, axis=1) cost[:, :acc_dim] = accumulated_min - decreasing_sequence[:, -acc_dim:] return cost[0, -min_dim - 1] def _victor_purpura_dist_for_st_pair_intuitive( train_a, train_b, q=1.0 * pq.Hz): """ Function to calculate the Victor-Purpura distance between two spike trains described in *J. D. Victor and K. P. Purpura, Nature and precision of temporal coding in visual cortex: a metric-space analysis, J Neurophysiol,76(2):1310-1326, 1996* This function originates from the spikes-module in the signals-folder of the software package Neurotools. It represents the 'intuitive' implementation of the Victor-Purpura distance. With respect to calculation time at the moment this code is uncompetitive with the code implemented in the function _victor_purpura_dist_for_st_pair_fast. However, it is expected that the discrepancy in calculation time of the 2 algorithms decreases drastically if the temporary valid calculation speed difference of plain Python and Numpy routines would be removed when languages like cython could take over. The decision then has to be made between an intuitive and probably slightly slower algorithm versus a correct but strange optimal solution of an optimization problem under boundary conditions, where the boundary conditions would finally have been removed. Hence also this algoritm is kept here. Parameters ---------- train_a, train_b : :class:`neo.core.SpikeTrain` objects of which the Victor-Purpura distance will be calculated pairwise. q : Quantity scalar of rate dimension The cost parameter. Default: 1.0 * pq.Hz Returns ------- float The Victor-Purpura distance of train_a and train_b """ nspk_a = len(train_a) nspk_b = len(train_b) scr = np.zeros((nspk_a+1, nspk_b+1)) scr[:, 0] = xrange(0, nspk_a+1) scr[0, :] = xrange(0, nspk_b+1) if nspk_a > 0 and nspk_b > 0: for i in xrange(1, nspk_a+1): for j in xrange(1, nspk_b+1): scr[i, j] = min(scr[i-1, j]+1, scr[i, j-1]+1) scr[i, j] = min(scr[i, j], scr[i-1, j-1] + np.float64(( q*abs(train_a[i-1]-train_b[j-1])).simplified)) return scr[nspk_a, nspk_b]
[docs]def van_rossum_dist(trains, tau=1.0 * pq.s, sort=True): """ Calculates the van Rossum distance. It is defined as Euclidean distance of the spike trains convolved with a causal decaying exponential smoothing filter. A detailed description can be found in *Rossum, M. C. W. (2001). A novel spike distance. Neural Computation, 13(4), 751-763.* This implementation is normalized to yield a distance of 1.0 for the distance between an empty spike train and a spike train with a single spike. Divide the result by sqrt(2.0) to get the normalization used in the cited paper. Given :math:`N` spike trains with :math:`n` spikes on average the run-time complexity of this function is :math:`O(N^2 n)`. Parameters ---------- trains : Sequence of :class:`neo.core.SpikeTrain` objects of which the van Rossum distance will be calculated pairwise. tau : Quantity scalar Decay rate of the exponential function as time scalar. Controls for which time scale the metric will be sensitive. This parameter will be ignored if `kernel` is not `None`. May also be :const:`scipy.inf` which will lead to only measuring differences in spike count. Default: 1.0 * pq.s sort : bool Spike trains with sorted spike times might be needed for the calculation. You can set `sort` to `False` if you know that your spike trains are already sorted to decrease calculation time. Default: True Returns ------- 2-D array Matrix containing the van Rossum distances for all pairs of spike trains. Example ------- import elephant.spike_train_dissimilarity_measures as stdm tau = 10.0 * pq.ms st_a = SpikeTrain([10, 20, 30], units='ms', t_stop= 1000.0) st_b = SpikeTrain([12, 24, 30], units='ms', t_stop= 1000.0) vr = stdm.van_rossum_dist([st_a, st_b], tau)[0, 1] """ for train in trains: if not (isinstance(train, (pq.quantity.Quantity, SpikeTrain)) and train.dimensionality.simplified == pq.Quantity(1, "s").dimensionality.simplified): raise TypeError("Spike trains must have a time unit.") if not (isinstance(tau, pq.quantity.Quantity) and tau.dimensionality.simplified == pq.Quantity(1, "s").dimensionality.simplified): raise TypeError("tau must be a time quantity.") if tau == 0: spike_counts = [st.size for st in trains] return np.sqrt(spike_counts + np.atleast_2d(spike_counts).T) elif tau == np.inf: spike_counts = [st.size for st in trains] return np.absolute(spike_counts - np.atleast_2d(spike_counts).T) k_dist = _summed_dist_matrix( [st.view(type=pq.Quantity) for st in trains], tau, not sort) vr_dist = np.empty_like(k_dist) for i, j in np.ndindex(k_dist.shape): vr_dist[i, j] = ( k_dist[i, i] + k_dist[j, j] - k_dist[i, j] - k_dist[j, i]) return sp.sqrt(vr_dist)
def _summed_dist_matrix(spiketrains, tau, presorted=False): # The algorithm underlying this implementation is described in # Houghton, C., & Kreuz, T. (2012). On the efficient calculation of van # Rossum distances. Network: Computation in Neural Systems, 23(1-2), # 48-58. We would like to remark that in this paper in formula (9) the # left side of the equation should be divided by two. # # Given N spiketrains with n entries on average the run-time complexity is # O(N^2 * n). O(N^2 + N * n) memory will be needed. if len(spiketrains) <= 0: return np.zeros((0, 0)) if not presorted: spiketrains = [v.copy() for v in spiketrains] for v in spiketrains: v.sort() sizes = np.asarray([v.size for v in spiketrains]) values = np.empty((len(spiketrains), max(1, sizes.max()))) values.fill(np.nan) for i, v in enumerate(spiketrains): if v.size > 0: values[i, :v.size] = \ (v / tau * pq.dimensionless).simplified exp_diffs = np.exp(values[:, :-1] - values[:, 1:]) markage = np.zeros(values.shape) for u in xrange(len(spiketrains)): markage[u, 0] = 0 for i in xrange(sizes[u] - 1): markage[u, i + 1] = (markage[u, i] + 1.0) * exp_diffs[u, i] # Same spiketrain terms D = np.empty((len(spiketrains), len(spiketrains))) D[np.diag_indices_from(D)] = sizes + 2.0 * np.sum(markage, axis=1) # Cross spiketrain terms for u in xrange(D.shape[0]): all_ks = np.searchsorted(values[u], values, 'left') - 1 for v in xrange(u): js = np.searchsorted(values[v], values[u], 'right') - 1 ks = all_ks[v] slice_j = np.s_[np.searchsorted(js, 0):sizes[u]] slice_k = np.s_[np.searchsorted(ks, 0):sizes[v]] D[u, v] = np.sum( np.exp(values[v][js[slice_j]] - values[u][slice_j]) * (1.0 + markage[v][js[slice_j]])) D[u, v] += np.sum( np.exp(values[u][ks[slice_k]] - values[v][slice_k]) * (1.0 + markage[u][ks[slice_k]])) D[v, u] = D[u, v] return D