[pymvpa] ERNiftiDataset with different event durations

Thu Jul 16 13:32:30 UTC 2009

Hi,

I am currently analysing an event-related design with different event 
durations (4, 6 or 8 sec.). As far as I can see, it is not possible to 
use the ERNiftiDataset for this purpose cause it takes the maximum 
boxlengths for "all" events.

        boxlength = max(durations)
        if __debug__:
            if not max(durations) == min(durations):
                warning('Boxcar mapper will use maximum boxlength (%i) 
of all '
                        'provided Events.'% boxlength)

I was wondering if it would be 'theoretically' possible to modify the 
'BoxcarMapper', so that it could also handle different event durations?

My idea was to modify mvpa/datasets/event.py like this:

        # we need a regular array, so all events must have a common
        # boxlength
#        boxlength = max(durations)
#        if __debug__:
#            if not max(durations) == min(durations):
#                warning('Boxcar mapper will use maximum boxlength (%i) 
of all '
#                        'provided Events.'% boxlength)

        # loop over events and extract (different) event durations
        boxlength = [e['duration'] for e in events]    

and  mvpa/mappers/boxcar.py like this:

<snip>

class BoxcarMapper(Mapper):
    """Mapper to combine multiple samples into a single sample.

    .. note::

      This mapper is somewhat unconventional since it doesn't preserve 
number
      of samples (ie the size of 0-th dimension).
    """

    _COLLISION_RESOLUTIONS = ['mean']

    def __init__(self, startpoints, boxlength, offset=0,
                 collision_resolution='mean'):
        """
        :Parameters:
          startpoints: sequence
            Index values along the first axis of 'data'.
          boxlength: int
            The number of elements after 'startpoint' along the first 
axis of
            'data' to be considered for the boxcar.
          offset: int
            The offset between the provided starting point and the 
actual start
            of the boxcar.
          collision_resolution : 'mean'
            if a sample belonged to multiple output samples, then on 
reverse,
            how to resolve the value
        """
        Mapper.__init__(self)

        startpoints = N.asanyarray(startpoints)
        if N.issubdtype(startpoints.dtype, 'i'):
            self.startpoints = startpoints
        else:
            if __debug__:
                debug('MAP', "Boxcar: obtained startpoints are not of 
int type."
                      " Rounding and changing dtype")
            self.startpoints = N.asanyarray(N.round(startpoints), dtype='i')

        # Sanity checks
#        if boxlength < 1:
#            raise ValueError, "Boxlength lower than 1 makes no sense."
#        if boxlength - int(boxlength) != 0:
#            raise ValueError, "boxlength must be an integer value."

        #self.boxlength = int(boxlength)
        boxlength = N.asanyarray(boxlength)
        if N.issubdtype(boxlength.dtype, 'i'):
            self.boxlength = boxlength
        else:
            if __debug__:
                debug('MAP', "Boxcar duration error...")       

        self.offset = offset
        self.__selectors = None

        if not collision_resolution in self._COLLISION_RESOLUTIONS:
            raise ValueError, "Unknown method to resolve the collision." \
                  " Valid are %s" % self._COLLISION_RESOLUTIONS
        self.__collision_resolution = collision_resolution

    __doc__ = enhancedDocString('BoxcarMapper', locals(), Mapper)

#    def __repr__(self):
#        s = super(BoxcarMapper, self).__repr__()
#        return s.replace("(", "(boxlength=%d, offset=%d, startpoints=%s, "
#                         "collision_resolution='%s'" %
#                         (self.boxlength, self.offset, 
str(self.startpoints),
#                          str(self.__collision_resolution)), 1)

    def __repr__(self):
        s = super(BoxcarMapper, self).__repr__()
        return s.replace("(", "(boxlength=%s, offset=%d, startpoints=%s, "
                         "collision_resolution='%s'" %
                         (str(self.boxlength), self.offset, 
str(self.startpoints),
                          str(self.__collision_resolution)), 1)

    def forward(self, data):
        """Project an ND matrix into N+1D matrix

        This method also handles the special of forward mapping a single 
'raw'
        sample. Such a sample is extended (by concatenating clones of 
itself) to
        cover a full boxcar. This functionality is only availably after 
a full
        data array has been forward mapped once.

        :Returns:
          array: (#startpoint, ...)
        """
        # in case the mapper is already charged
        if not self.__selectors is None:
            # if we have a single 'raw' sample (not a boxcar)
            # extend it to cover the full box -- useful if one
            # wants to forward map a mask in raw dataspace (e.g.
            # fMRI ROI or channel map) into an appropriate mask vector
            if data.shape == self._outshape[2:]:
                return N.asarray([data] * self.boxlength)

        self._inshape = data.shape

        startpoints = self.startpoints
        offset = self.offset
        boxlength = self.boxlength

        # check for illegal boxes
#        for sp in self.startpoints:
#            if ( sp + offset + boxlength - 1 > len(data)-1 ) \
#               or ( sp + offset < 0 ):
#                raise ValueError, \
#                      'Illegal box: start: %i, offset: %i, length: %i' \
#                      % (sp, offset, boxlength)

        boxcounter=0
        for sp in self.startpoints:
            if ( sp + offset + boxlength[boxcounter] - 1 > len(data)-1 ) \
               or ( sp + offset < 0 ):
                raise ValueError, \
                      'Illegal box: start: %i, offset: %i, length: %i' \
                      % (sp, offset, boxlength[boxcounter])
            boxcounter+= 1

        # build a list of list where each sublist contains the indexes 
of to be
        # averaged data elements     
#        self.__selectors = [ N.arange(i + offset, i + offset + boxlength) \
#                             for i in startpoints ]
#        selected = N.asarray([ data[ box ] for box in self.__selectors ])
#        self._outshape = selected.shape
#
#        return selected

         # working with fixed boxlength value
#        boxlength_dummy=4
#        self.__selectors = [ N.arange(i + offset, i + offset + 
boxlength_dummy \
#                             for i in startpoints ]

        # build a list of the boxlengths where each boxlength value has 
the same index as the
        # corresponding startpoint                            
        boxlengths = range(len(data))
        boxcounter=0
        for s in startpoints:
            boxlengths[s] = boxlength[boxcounter]
            boxcounter+= 1

        # build a list of list where each sublist contains the indexes 
of to be
        # averaged data elements                                
        self.__selectors = [ N.arange(i + offset, i + offset + 
boxlengths[i]) \
                             for i in startpoints ]

        selected = N.asarray([ data[ box ] for box in self.__selectors ])
        self._outshape = selected.shape

        return selected

<snip>

But than the problem occurs that it is not possible to save lists like

[1 2 3 4]
[21 22]
[37 38 39]
[56 57 58 59]
[63 64]
[68 69 70 71]
[75 76]
[86 87 88]
[92 93]
[ 97  98  99 100]
[104 105]
[114 115 116]
[136 137 138 139]
[143 144]

with N.asarray

Now I am reaching the point where I have no idea how to handle this. Do 
you have any idea, or future plans to implement a function which can 
deal with different event durations?

I really appreciate your help.
 Matthias Ekman