[pymvpa] Zscore creates nan in dataset

Sun Jan 18 13:45:58 UTC 2015

Hello,
Im using zscore, and i receive the following exception:
" Some input data for predict is not finite (Nan or Inf)".

I've checked my pre-processed nifti files - they don't include Nan values,
and yet after using zscore i get that the dataset includes Nan values.

How can i fix this situation so i could move forward with
the processing in my script?

I've attached the script i'm using.

Thanks,
Gal Star
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.alioth.debian.org/pipermail/pkg-exppsy-pymvpa/attachments/20150118/2039c668/attachment.html>
-------------- next part --------------
from __future__ import division
import getopt
import sys
import os
from mvpa2.suite import *
import numpy
import matplotlib.pyplot as plt

type = sys.argv[1]
sub = sys.argv[2]
samples = int(sys.argv[3])
#samples = 232.5

img_name = '4D_scans_conc.nii.gz'
#img_name = '4D_scans_brain.nii.gz'
#map_name = 'map.txt'
#map_name = 'balance_map.txt'
#map_name = 'map.txt.2label_balance.txt'
map_name = 'map.txt.4label_balance.txt'
#map_name = 'map.txt.2label'
#map_name = 'map.txt.4label'
#map_name = 'maps/map.txt'
source 	 = '/home/gals/converted_data/sub_brik_data/' + type + '/' + sub + '/'
#source  = '/home/gals/converted_data/sub_brik_data/ester/' + type
#source  = '/home/gals/converted_data/sub_brik_data/gal/'+ sub + '/'
#source  = '/home/gals/converted_data/sub_brik_data/gal/Dov/'

print "type: %s" % type
print "sub: %s" % sub
print "samples: %d" % samples

folds = 2
samples_per_fold = samples / folds
#samples_per_fold = 24.75
#########################################################
# Read mvpa sample attributes definition from text file #
#########################################################
attr=SampleAttributes(os.path.join(source,map_name))
print "after sampleAttributes"

fds=fmri_dataset(samples=os.path.join(source,img_name),targets=attr.targets,chunks=attr.chunks,mask='/home/gals/masks/brain_mask.nii.gz')
#fds=fmri_dataset(samples=os.path.join(source,img_name),targets=attr.targets,chunks=attr.chunks,mask='/home/gals/converted_data/sub_brik_data/ester/for_gal/mask.nii.gz')

print "passed fmri dataset"
#print fds.summary()

poly_detrend(fds, polyord=1, chunks_attr='chunks')
print "after detrending"
#print fds.summary()

# remote:
#interesting = numpy.array([l in ['2221','2222','2121','2122','23'] for l in fds.sa.targets])

# recent:
interesting = numpy.array([l in ['2111','2211','23'] for l in fds.sa.targets])
#interesting = numpy.array([l in ['211','221','3'] for l in fds.sa.targets])

# correct incorrect all
#interesting = numpy.array([l in ['21','22','3'] for l in fds.sa.targets])

fds = fds[interesting]

#zscore(fds)
zscore(fds, param_est=('targets', ['23']), chunks_attr='chunks')
#zscore(fds, param_est=('targets', ['3']), chunks_attr='chunks')

# remote:
#interesting = numpy.array([l in ['2221','2222','2121','2122'] for l in fds.sa.targets])

# recent:
#interesting = numpy.array([l in ['2112','2111','2211','2212'] for l in fds.sa.targets])
interesting = numpy.array([l in ['2111','2211'] for l in fds.sa.targets])
#interesting = numpy.array([l in ['211','221'] for l in fds.sa.targets])

# correct incorrect all
#interesting = numpy.array([l in ['21','22'] for l in fds.sa.targets])

fds = fds[interesting]

print len(fds)

print "saved after normalize to nifti file"

# randomize
leftover = int((samples_per_fold - (int(samples_per_fold))) * folds)
pseudo_runs = [_ for _ in range(folds) for x in range(int(samples_per_fold))]
print leftover

# handle leftovers
[pseudo_runs.append(x) for x in range(leftover)]

random.shuffle(pseudo_runs)
fds.sa['pseudo_runs'] = pseudo_runs

sens = SensitivityBasedFeatureSelection(OneWayAnova(), FixedNElementTailSelector(1000, tail='upper', mode='select'), enable_ca=['sensitivity'])
#, postproc=FxMapper('features', lambda x: x /x.max(), attrfx=None)2)

#clf = LinearCSVMC()
clf = FeatureSelectionClassifier(LinearCSVMC(),sens)
#cv = CrossValidation(clf,NFoldPartitioner(attr='pseudo_runs'),enable_ca=['stats','training_stats'])
cv = CrossValidation(clf,NFoldPartitioner(attr='chunks'),enable_ca=['stats','training_stats'])

error = cv(fds)
accuracy = 1- np.mean(error)

ds = sens.ca.sensitivity.samples
numpy.savetxt(os.path.join(source,'anova_vals'), ds, delimiter='\n', fmt='%1.2f')

print "The list of results per fold:"
print error.samples

print "Stats on all folds:"
print cv.ca.stats.as_string(summary=True)

print "And The Accuracy:"
print "Accuracy is %f" % accuracy

print "resuls run2:"
print cv.ca.stats.matrices[1].as_string(summary=True)

print "resuls run3:"
print cv.ca.stats.matrices[2].as_string(summary=True)

#print "resuls run18:"
#print cv.ca.stats.matrices[17].as_string(summary=True)

print "resuls run4:"
print cv.ca.stats.matrices[3].as_string(summary=True)