Source code for elephant.pandas_bridge

# -*- coding: utf-8 -*-
"""
Bridge to the pandas library.

:copyright: Copyright 2014-2016 by the Elephant team, see `doc/authors.rst`.
:license: Modified BSD, see LICENSE.txt for details.
"""

from __future__ import division, print_function, unicode_literals

import numpy as np
import pandas as pd
import quantities as pq

from elephant.neo_tools import (extract_neo_attrs, get_all_epochs,
                                get_all_events, get_all_spiketrains)


def _multiindex_from_dict(inds):
    """Given a dictionary, return a `pandas.MultiIndex`.

    Parameters
    ----------
    inds : dict
           A dictionary where the keys are annotations or attribute names and
           the values are the corresponding annotation or attribute value.

    Returns
    -------
    pandas MultiIndex
    """
    names, indexes = zip(*sorted(inds.items()))
    return pd.MultiIndex.from_tuples([indexes], names=names)


def _sort_inds(obj, axis=0):
    """Put the indexes and index levels of a pandas object in sorted order.

    Paramters
    ---------
    obj : pandas Series, DataFrame, Panel, or Panel4D
          The object whose indexes should be sorted.
    axis : int, list, optional, 'all'
           The axis whose indexes should be sorted.  Default is 0.
           Can also be a list of indexes, in which case all of those axes
           are sorted.  If 'all', sort all indexes.

    Returns
    -------
    pandas Series, DataFrame, Panel, or Panel4D
        A copy of the object with indexes sorted.
        Indexes are sorted in-place.
    """
    if axis == 'all':
        return _sort_inds(obj, axis=range(obj.ndim))

    if hasattr(axis, '__iter__'):
        for iax in axis:
            obj = _sort_inds(obj, iax)
        return obj

    obj = obj.reorder_levels(sorted(obj.axes[axis].names), axis=axis)
    return obj.sort_index(level=0, axis=axis, sort_remaining=True)


def _extract_neo_attrs_safe(obj, parents=True, child_first=True):
    """Given a neo object, return a dictionary of attributes and annotations.

    This is done in a manner that is safe for `pandas` indexes.

    Parameters
    ----------

    obj : neo object
    parents : bool, optional
              Also include attributes and annotations from parent neo
              objects (if any).
    child_first : bool, optional
                  If True (default True), values of child attributes are used
                  over parent attributes in the event of a name conflict.
                  If False, parent attributes are used.
                  This parameter does nothing if `parents` is False.

    Returns
    -------

    dict
        A dictionary where the keys are annotations or attribute names and
        the values are the corresponding annotation or attribute value.

    """
    res = extract_neo_attrs(obj, skip_array=True, skip_none=True,
                            parents=parents, child_first=child_first)
    for key, value in res.items():
        res[key] = _convert_value_safe(value)
        key2 = _convert_value_safe(key)
        if key2 is not key:
            res[key2] = res.pop(key)

    return res


def _convert_value_safe(value):
    """Convert `neo` values to a value compatible with `pandas`.

    Some types and dtypes used with neo are not safe to use with pandas in some
    or all situations.

    `quantities.Quantity` don't follow the normal python rule that values
    with that are equal should have the same hash, making it fundamentally
    incompatible with `pandas`.

    On python 3, `pandas` coerces `S` dtypes to bytes, which are not always
    safe to use.

    Parameters
    ----------

    value : any
            Value to convert (if it has any known issues).

    Returns
    -------

    any
        `value` or a version of value with potential problems fixed.

    """
    if hasattr(value, 'dimensionality'):
        return (value.magnitude.tolist(), str(value.dimensionality))
    if hasattr(value, 'dtype') and value.dtype.kind == 'S':
        return value.astype('U').tolist()
    if hasattr(value, 'tolist'):
        return value.tolist()
    if hasattr(value, 'decode') and not hasattr(value, 'encode'):
        return value.decode('UTF8')
    return value


[docs]def spiketrain_to_dataframe(spiketrain, parents=True, child_first=True):
    """Convert a `neo.SpikeTrain` to a `pandas.DataFrame`.

    The `pandas.DataFrame` object has a single column, with each element
    being the spike time converted to a `float` value in seconds.

    The column heading is a `pandas.MultiIndex` with one index
    for each of the scalar attributes and annotations.  The `index`
    is the spike number.

    Parameters
    ----------

    spiketrain : neo SpikeTrain
                 The SpikeTrain to convert.
    parents : bool, optional
              Also include attributes and annotations from parent neo
              objects (if any).

    Returns
    -------

    pandas DataFrame
        A DataFrame containing the spike times from `spiketrain`.

    Notes
    -----

    The index name is `spike_number`.

    Attributes that contain non-scalar values are skipped.  So are
    annotations or attributes containing a value of `None`.

    `quantity.Quantities` types are incompatible with `pandas`, so attributes
    and annotations of that type are converted to a tuple where the first
    element is the scalar value and the second is the string representation of
    the units.

    """
    attrs = _extract_neo_attrs_safe(spiketrain,
                                    parents=parents, child_first=child_first)
    columns = _multiindex_from_dict(attrs)

    times = spiketrain.magnitude
    times = pq.Quantity(times, spiketrain.units).rescale('s').magnitude
    times = times[np.newaxis].T

    index = pd.Index(np.arange(len(spiketrain)), name='spike_number')

    pdobj = pd.DataFrame(times, index=index, columns=columns)
    return _sort_inds(pdobj, axis=1)


[docs]def event_to_dataframe(event, parents=True, child_first=True):
    """Convert a `neo.core.Event` to a `pandas.DataFrame`.

    The `pandas.DataFrame` object has a single column, with each element
    being the event label from the `event.label` attribute.

    The column heading is a `pandas.MultiIndex` with one index
    for each of the scalar attributes and annotations.  The `index`
    is the time stamp from the `event.times` attribute.

    Parameters
    ----------

    event : neo Event
            The Event to convert.
    parents : bool, optional
              Also include attributes and annotations from parent neo
              objects (if any).
    child_first : bool, optional
                  If True (default True), values of child attributes are used
                  over parent attributes in the event of a name conflict.
                  If False, parent attributes are used.
                  This parameter does nothing if `parents` is False.

    Returns
    -------

    pandas DataFrame
        A DataFrame containing the labels from `event`.

    Notes
    -----

    If the length of event.times and event.labels are not the same,
    the longer will be truncated to the length of the shorter.

    The index name is `times`.

    Attributes that contain non-scalar values are skipped.  So are
    annotations or attributes containing a value of `None`.

    `quantity.Quantities` types are incompatible with `pandas`, so attributes
    and annotations of that type are converted to a tuple where the first
    element is the scalar value and the second is the string representation of
    the units.

    """
    attrs = _extract_neo_attrs_safe(event,
                                    parents=parents, child_first=child_first)
    columns = _multiindex_from_dict(attrs)

    times = event.times.rescale('s').magnitude
    labels = event.labels.astype('U')

    times = times[:len(labels)]
    labels = labels[:len(times)]

    index = pd.Index(times, name='times')

    pdobj = pd.DataFrame(labels[np.newaxis].T, index=index, columns=columns)
    return _sort_inds(pdobj, axis=1)


[docs]def epoch_to_dataframe(epoch, parents=True, child_first=True):
    """Convert a `neo.core.Epoch` to a `pandas.DataFrame`.

    The `pandas.DataFrame` object has a single column, with each element
    being the epoch label from the `epoch.label` attribute.

    The column heading is a `pandas.MultiIndex` with one index
    for each of the scalar attributes and annotations.  The `index`
    is a `pandas.MultiIndex`, with the first index being the time stamp from
    the `epoch.times` attribute and the second being the duration from
    the `epoch.durations` attribute.

    Parameters
    ----------

    epoch : neo Epoch
            The Epoch to convert.
    parents : bool, optional
              Also include attributes and annotations from parent neo
              objects (if any).
    child_first : bool, optional
                  If True (default True), values of child attributes are used
                  over parent attributes in the event of a name conflict.
                  If False, parent attributes are used.
                  This parameter does nothing if `parents` is False.

    Returns
    -------

    pandas DataFrame
        A DataFrame containing the labels from `epoch`.

    Notes
    -----

    If the length of `epoch.times`, `epoch.duration`, and `epoch.labels` are
    not the same, the longer will be truncated to the length of the shortest.

    The index names for `epoch.times` and `epoch.durations` are `times` and
    `durations`, respectively.

    Attributes that contain non-scalar values are skipped.  So are
    annotations or attributes containing a value of `None`.

    `quantity.Quantities` types are incompatible with `pandas`, so attributes
    and annotations of that type are converted to a tuple where the first
    element is the scalar value and the second is the string representation of
    the units.

    """
    attrs = _extract_neo_attrs_safe(epoch,
                                    parents=parents, child_first=child_first)
    columns = _multiindex_from_dict(attrs)

    times = epoch.times.rescale('s').magnitude
    durs = epoch.durations.rescale('s').magnitude
    labels = epoch.labels.astype('U')

    minlen = min([len(durs), len(times), len(labels)])
    index = pd.MultiIndex.from_arrays([times[:minlen], durs[:minlen]],
                                      names=['times', 'durations'])

    pdobj = pd.DataFrame(labels[:minlen][np.newaxis].T,
                         index=index, columns=columns)
    return _sort_inds(pdobj, axis='all')


def _multi_objs_to_dataframe(container, conv_func, get_func,
                             parents=True, child_first=True):
    """Convert one or more of a given `neo` object to a `pandas.DataFrame`.

    The objects can be any list, dict, or other iterable or mapping containing
    the object, as well as any neo object that can hold the object.
    Objects are searched recursively, so the objects can be nested (such as a
    list of blocks).

    The column heading is a `pandas.MultiIndex` with one index
    for each of the scalar attributes and annotations of the respective
    object.

    Parameters
    ----------

    container : list, tuple, iterable, dict, neo container object
                The container for the objects to convert.
    parents : bool, optional
              Also include attributes and annotations from parent neo
              objects (if any).
    child_first : bool, optional
                  If True (default True), values of child attributes are used
                  over parent attributes in the event of a name conflict.
                  If False, parent attributes are used.
                  This parameter does nothing if `parents` is False.

    Returns
    -------

    pandas DataFrame
        A DataFrame containing the converted objects.

    Attributes that contain non-scalar values are skipped.  So are
    annotations or attributes containing a value of `None`.

    `quantity.Quantities` types are incompatible with `pandas`, so attributes
    and annotations of that type are converted to a tuple where the first
    element is the scalar value and the second is the string representation of
    the units.

    """
    res = pd.concat([conv_func(obj, parents=parents, child_first=child_first)
                     for obj in get_func(container)], axis=1)
    return _sort_inds(res, axis=1)


[docs]def multi_spiketrains_to_dataframe(container,
                                   parents=True, child_first=True):
    """Convert one or more `neo.SpikeTrain` objects to a `pandas.DataFrame`.

    The objects can be any list, dict, or other iterable or mapping containing
    spiketrains, as well as any neo object that can hold spiketrains:
    `neo.Block`, `neo.ChannelIndex`, `neo.Unit`, and `neo.Segment`.
    Objects are searched recursively, so the objects can be nested (such as a
    list of blocks).

    The `pandas.DataFrame` object has one column for each spiketrain, with each
    element being the spike time converted to a `float` value in seconds.
    columns are padded to the same length with `NaN` values.

    The column heading is a `pandas.MultiIndex` with one index
    for each of the scalar attributes and annotations of the respective
    spiketrain.  The `index` is the spike number.

    Parameters
    ----------

    container : list, tuple, iterable, dict,
                neo Block, neo Segment, neo Unit, neo ChannelIndex
                The container for the spiketrains to convert.
    parents : bool, optional
              Also include attributes and annotations from parent neo
              objects (if any).
    child_first : bool, optional
                  If True (default True), values of child attributes are used
                  over parent attributes in the event of a name conflict.
                  If False, parent attributes are used.
                  This parameter does nothing if `parents` is False.

    Returns
    -------

    pandas DataFrame
        A DataFrame containing the spike times from `container`.

    Notes
    -----

    The index name is `spike_number`.

    Attributes that contain non-scalar values are skipped.  So are
    annotations or attributes containing a value of `None`.

    `quantity.Quantities` types are incompatible with `pandas`, so attributes
    and annotations of that type are converted to a tuple where the first
    element is the scalar value and the second is the string representation of
    the units.

    """
    return _multi_objs_to_dataframe(container,
                                    spiketrain_to_dataframe,
                                    get_all_spiketrains,
                                    parents=parents, child_first=child_first)


[docs]def multi_events_to_dataframe(container, parents=True, child_first=True):
    """Convert one or more `neo.Event` objects to a `pandas.DataFrame`.

    The objects can be any list, dict, or other iterable or mapping containing
    events, as well as any neo object that can hold events:
    `neo.Block` and `neo.Segment`.  Objects are searched recursively, so the
    objects can be nested (such as a list of blocks).

    The `pandas.DataFrame` object has one column for each event, with each
    element being the event label. columns are padded to the same length with
    `NaN` values.

    The column heading is a `pandas.MultiIndex` with one index
    for each of the scalar attributes and annotations of the respective
    event.  The `index` is the time stamp from the `event.times` attribute.

    Parameters
    ----------

    container : list, tuple, iterable, dict, neo Block, neo Segment
                The container for the events to convert.
    parents : bool, optional
              Also include attributes and annotations from parent neo
              objects (if any).
    child_first : bool, optional
                  If True (default True), values of child attributes are used
                  over parent attributes in the event of a name conflict.
                  If False, parent attributes are used.
                  This parameter does nothing if `parents` is False.

    Returns
    -------

    pandas DataFrame
        A DataFrame containing the labels from `container`.

    Notes
    -----

    If the length of event.times and event.labels are not the same for any
    individual event, the longer will be truncated to the length of the
    shorter for that event.  Between events, lengths can differ.

    The index name is `times`.

    Attributes that contain non-scalar values are skipped.  So are
    annotations or attributes containing a value of `None`.

    `quantity.Quantities` types are incompatible with `pandas`, so attributes
    and annotations of that type are converted to a tuple where the first
    element is the scalar value and the second is the string representation of
    the units.

    """
    return _multi_objs_to_dataframe(container,
                                    event_to_dataframe, get_all_events,
                                    parents=parents, child_first=child_first)


[docs]def multi_epochs_to_dataframe(container, parents=True, child_first=True):
    """Convert one or more `neo.Epoch` objects to a `pandas.DataFrame`.

    The objects can be any list, dict, or other iterable or mapping containing
    epochs, as well as any neo object that can hold epochs:
    `neo.Block` and `neo.Segment`.  Objects are searched recursively, so the
    objects can be nested (such as a list of blocks).

    The `pandas.DataFrame` object has one column for each epoch, with each
    element being the epoch label. columns are padded to the same length with
    `NaN` values.

    The column heading is a `pandas.MultiIndex` with one index
    for each of the scalar attributes and annotations of the respective
    epoch.  The `index` is a `pandas.MultiIndex`, with the first index being
    the time stamp from the `epoch.times` attribute and the second being the
    duration from the `epoch.durations` attribute.

    Parameters
    ----------

    container : list, tuple, iterable, dict, neo Block, neo Segment
                The container for the epochs to convert.
    parents : bool, optional
              Also include attributes and annotations from parent neo
              objects (if any).
    child_first : bool, optional
                  If True (default True), values of child attributes are used
                  over parent attributes in the event of a name conflict.
                  If False, parent attributes are used.
                  This parameter does nothing if `parents` is False.

    Returns
    -------

    pandas DataFrame
        A DataFrame containing the labels from `container`.

    Notes
    -----

    If the length of `epoch.times`, `epoch.duration`, and `epoch.labels` are
    not the same for any individual epoch, the longer will be truncated to the
    length of the shorter for that epoch.  Between epochs, lengths can differ.

    The index level names for `epoch.times` and `epoch.durations` are
    `times` and `durations`, respectively.

    Attributes that contain non-scalar values are skipped.  So are
    annotations or attributes containing a value of `None`.

    `quantity.Quantities` types are incompatible with `pandas`, so attributes
    and annotations of that type are converted to a tuple where the first
    element is the scalar value and the second is the string representation of
    the units.

    """
    return _multi_objs_to_dataframe(container,
                                    epoch_to_dataframe, get_all_epochs,
                                    parents=parents, child_first=child_first)


[docs]def slice_spiketrain(pdobj, t_start=None, t_stop=None):
    """Slice a `pandas.DataFrame`, changing indices appropriately.

    Values outside the sliced range are converted to `NaN` values.

    Slicing happens over columns.

    This sets the `t_start` and `t_stop` column indexes to be the new values.
    Otherwise it is the same as setting values outside the range to `NaN`.

    Parameters
    ----------
    pdobj : pandas DataFrame
            The DataFrame to slice.
    t_start : float, optional.
              If specified, the returned DataFrame values less than this set
              to `NaN`.
              Default is `None` (do not use this argument).
    t_stop : float, optional.
             If specified, the returned DataFrame values greater than this set
             to `NaN`.
             Default is `None` (do not use this argument).

    Returns
    -------

    pdobj : scalar, pandas Series, DataFrame, or Panel
            The returned data type is the same as the type of `pdobj`

    Note
    ----

    The order of the index and/or column levels of the returned object may
    differ  from the order of the original.

    If `t_start` or `t_stop` is specified, all columns indexes will be changed
    to  the respective values, including those already within the new range.
    If `t_start` or `t_stop` is not specified, those column indexes will not
    be changed.

    Returns a copy, even if `t_start` and `t_stop` are both `None`.

    """
    if t_start is None and t_stop is None:
        return pdobj.copy()

    if t_stop is not None:
        pdobj[pdobj > t_stop] = np.nan

        pdobj = pdobj.T.reset_index(level='t_stop')
        pdobj['t_stop'] = t_stop
        pdobj = pdobj.set_index('t_stop', append=True).T
        pdobj = _sort_inds(pdobj, axis=1)

    if t_start is not None:
        pdobj[pdobj < t_start] = np.nan

        pdobj = pdobj.T.reset_index(level='t_start')
        pdobj['t_start'] = t_start
        pdobj = pdobj.set_index('t_start', append=True).T
        pdobj = _sort_inds(pdobj, axis=1)

    return pdobj