[med-svn] [python-mne] 79/376: ENH : new cluster level non-parametric statistic on 1D data
Yaroslav Halchenko
debian at onerussian.com
Fri Nov 27 17:22:11 UTC 2015
This is an automated email from the git hooks/post-receive script.
yoh pushed a commit to annotated tag v0.1
in repository python-mne.
commit 70fb56b2450fee3fbd018d6d9a31c76ae98266b8
Author: Alexandre Gramfort <alexandre.gramfort at inria.fr>
Date: Wed Feb 16 16:57:37 2011 -0500
ENH : new cluster level non-parametric statistic on 1D data
---
examples/stats/plot_cluster_stats_evoked.py | 82 +++++++++++++++
mne/stats/__init__.py | 1 +
mne/stats/cluster_level.py | 157 ++++++++++++++++++++++++++++
mne/stats/permutations.py | 63 ++---------
mne/stats/tests/test_cluster_level.py | 39 +++++++
mne/stats/tests/test_permutations.py | 12 +--
6 files changed, 293 insertions(+), 61 deletions(-)
diff --git a/examples/stats/plot_cluster_stats_evoked.py b/examples/stats/plot_cluster_stats_evoked.py
new file mode 100644
index 0000000..7215200
--- /dev/null
+++ b/examples/stats/plot_cluster_stats_evoked.py
@@ -0,0 +1,82 @@
+"""
+=======================================================
+Permutation T-test on sensor data with 1D cluster level
+=======================================================
+
+One tests if the evoked response is significantly different
+between conditions. Multiple comparison problem is adressed
+with cluster level permutation test.
+
+"""
+
+# Authors: Alexandre Gramfort <gramfort at nmr.mgh.harvard.edu>
+#
+# License: BSD (3-clause)
+
+print __doc__
+
+import numpy as np
+
+import mne
+from mne import fiff
+from mne.stats import permutation_1d_cluster_test
+from mne.datasets import sample
+
+###############################################################################
+# Set parameters
+data_path = sample.data_path('..')
+raw_fname = data_path + '/MEG/sample/sample_audvis_filt-0-40_raw.fif'
+event_fname = data_path + '/MEG/sample/sample_audvis_filt-0-40_raw-eve.fif'
+event_id = 1
+tmin = -0.2
+tmax = 0.5
+
+# Setup for reading the raw data
+raw = fiff.setup_read_raw(raw_fname)
+events = mne.read_events(event_fname)
+
+channel = 'MEG 1332'
+include = [channel]
+
+###############################################################################
+# Read epochs for the channel of interest
+picks = fiff.pick_types(raw['info'], meg=False, include=include)
+event_id = 1
+data1, times, channel_names = mne.read_epochs(raw, events, event_id,
+ tmin, tmax, picks=picks, baseline=(None, 0))
+condition1 = np.squeeze(np.array([d['epoch'] for d in data1])) # as 3D matrix
+
+event_id = 2
+data2, times, channel_names = mne.read_epochs(raw, events, event_id,
+ tmin, tmax, picks=picks, baseline=(None, 0))
+condition2 = np.squeeze(np.array([d['epoch'] for d in data2])) # as 3D matrix
+
+###############################################################################
+# Compute statistic
+threshold = 6.0
+T_obs, clusters, cluster_p_values, H0 = \
+ permutation_1d_cluster_test([condition1, condition2],
+ n_permutations=1000, threshold=threshold, tail=1)
+
+###############################################################################
+# Plot
+import pylab as pl
+pl.close('all')
+pl.subplot(211)
+pl.title('Channel : ' + channel)
+pl.plot(times, condition1.mean(axis=0) - condition2.mean(axis=0),
+ label="ERF Contrast (Event 1 - Event 2)")
+pl.ylabel("MEG (T / m)")
+pl.legend()
+pl.subplot(212)
+for i_c, (start, stop) in enumerate(clusters):
+ if cluster_p_values[i_c] <= 0.05:
+ h = pl.axvspan(times[start], times[stop-1], color='r', alpha=0.3)
+ else:
+ pl.axvspan(times[start], times[stop-1], color=(0.3, 0.3, 0.3),
+ alpha=0.3)
+hf = pl.plot(times, T_obs, 'g')
+pl.legend((h, ), ('cluster p-value < 0.05', ))
+pl.xlabel("time (ms)")
+pl.ylabel("f-values")
+pl.show()
diff --git a/mne/stats/__init__.py b/mne/stats/__init__.py
index 954fed4..4b782c8 100644
--- a/mne/stats/__init__.py
+++ b/mne/stats/__init__.py
@@ -1 +1,2 @@
from .permutations import permutation_t_test
+from .cluster_level import permutation_1d_cluster_test
diff --git a/mne/stats/cluster_level.py b/mne/stats/cluster_level.py
new file mode 100644
index 0000000..b93c3f7
--- /dev/null
+++ b/mne/stats/cluster_level.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Author: Thorsten Kranz <thorstenkranz at gmail.com>
+# Alexandre Gramfort <gramfort at nmr.mgh.harvard.edu>
+#
+# License: Simplified BSD
+
+import numpy as np
+from scipy import stats
+from scipy.stats import percentileofscore
+
+
+def f_oneway(*args):
+ """Call scipy.stats.f_oneway, but return only f-value"""
+ return stats.f_oneway(*args)[0]
+
+
+def _find_clusters(x, threshold, tail=0):
+ """For a given 1d-array (test statistic), find all clusters which
+ are above/below a certain threshold. Returns a list of 2-tuples.
+
+ Parameters
+ ----------
+ x: 1D array
+ Data
+ threshold: float
+ Where to threshold the statistic
+ tail : -1 | 0 | 1
+ Type of comparison
+
+ Returns
+ -------
+ clusters: list of tuples
+ Each tuple is a pair of indices (begin/end of cluster)
+
+ Example
+ -------
+ >>> _find_clusters([1, 2, 3, 1], 1.9, tail=1)
+ [(1, 3)]
+ >>> _find_clusters([2, 2, 3, 1], 1.9, tail=1)
+ [(0, 3)]
+ >>> _find_clusters([1, 2, 3, 2], 1.9, tail=1)
+ [(1, 4)]
+ >>> _find_clusters([1, -2, 3, 1], 1.9, tail=0)
+ [(1, 3)]
+ >>> _find_clusters([1, -2, -3, 1], -1.9, tail=-1)
+ [(1, 3)]
+ """
+ if not tail in [-1, 0, 1]:
+ raise ValueError('invalid tail parameter')
+
+ x = np.asanyarray(x)
+
+ x = np.concatenate([np.array([threshold]), x, np.array([threshold])])
+ if tail == -1:
+ x_in = (x < threshold).astype(np.int)
+ elif tail == 1:
+ x_in = (x > threshold).astype(np.int)
+ else:
+ x_in = (np.abs(x) > threshold).astype(np.int)
+
+ x_switch = np.diff(x_in)
+ in_points = np.where(x_switch > 0)[0]
+ out_points = np.where(x_switch < 0)[0]
+ clusters = zip(in_points, out_points)
+ return clusters
+
+
+def permutation_1d_cluster_test(X, stat_fun=f_oneway, threshold=1.67,
+ n_permutations=1000, tail=0):
+ """Cluster-level statistical permutation test
+
+ For a list of 2d-arrays of data, e.g. power values, calculate some
+ statistics for each timepoint (dim 1) over groups. Do a cluster
+ analysis with permutation test for calculating corrected p-values
+
+ Parameters
+ ----------
+ X: list
+ List of 2d-arrays containing the data, dim 1: timepoints, dim 2:
+ elements of groups
+ stat_fun : callable
+ function called to calculate statistics, must accept 1d-arrays as
+ arguments (default: scipy.stats.f_oneway)
+ tail : -1 or 0 or 1 (default = 0)
+ If tail is 1, the statistic is thresholded above threshold.
+ If tail is -1, the statistic is thresholded below threshold.
+ If tail is 0, the statistic is thresholded on both sides of
+ the distribution.
+
+ Returns
+ -------
+ T_obs : array of shape [n_tests]
+ T-statistic observerd for all variables
+ clusters: list of tuples
+ Each tuple is a pair of indices (begin/end of cluster)
+ cluster_pv: array
+ P-value for each cluster
+ H0 : array of shape [n_permutations]
+ Max cluster level stats observed under permutation.
+
+ Notes
+ -----
+ Reference:
+ Cluster permutation algorithm as described in
+ Maris/Oostenveld (2007),
+ "Nonparametric statistical testing of EEG- and MEG-data"
+ Journal of Neuroscience Methods, Vol. 164, No. 1., pp. 177-190.
+ doi:10.1016/j.jneumeth.2007.03.024
+ """
+ X_full = np.concatenate(X, axis=0)
+ n_samples_total = X_full.shape[0]
+ n_samples_per_condition = [x.shape[0] for x in X]
+
+ # Step 1: Calculate Anova (or other stat_fun) for original data
+ # -------------------------------------------------------------
+ T_obs = stat_fun(*X)
+
+ clusters = _find_clusters(T_obs, threshold, tail)
+
+ splits_idx = np.cumsum(n_samples_per_condition)[:-1]
+ # Step 2: If we have some clusters, repeat process on permutated data
+ # -------------------------------------------------------------------
+ if len(clusters) > 0:
+ cluster_stats = [np.sum(T_obs[c[0]:c[1]]) for c in clusters]
+ cluster_pv = np.ones(len(clusters), dtype=np.float)
+ H0 = np.zeros(n_permutations) # histogram
+ for i_s in range(n_permutations):
+ # make list of indices for random data split
+ indices_lists = np.split(np.random.permutation(n_samples_total),
+ splits_idx)
+
+ X_shuffle_list = [X_full[indices] for indices in indices_lists]
+ T_obs_surr = stat_fun(*X_shuffle_list)
+ clusters_perm = _find_clusters(T_obs_surr, threshold, tail)
+
+ if len(clusters_perm) > 0:
+ cluster_stats_perm = [np.sum(T_obs_surr[c[0]:c[1]])
+ for c in clusters_perm]
+ H0[i_s] = max(cluster_stats_perm)
+ else:
+ H0[i_s] = 0
+
+ # for each cluster in original data, calculate p-value as percentile
+ # of its cluster statistics within all cluster statistics in surrogate
+ # data
+ cluster_pv[:] = [percentileofscore(H0,
+ cluster_stats[i_cl])
+ for i_cl in range(len(clusters))]
+ cluster_pv[:] = (100.0 - cluster_pv[:]) / 100.0 # from pct to fraction
+ return T_obs, clusters, cluster_pv, H0
+ else:
+ return T_obs, np.array([]), np.array([]), np.array([])
+
+
+permutation_1d_cluster_test.__test__ = False
diff --git a/mne/stats/permutations.py b/mne/stats/permutations.py
index e32be53..8b1cda7 100644
--- a/mne/stats/permutations.py
+++ b/mne/stats/permutations.py
@@ -78,12 +78,12 @@ def permutation_t_test(X, n_permutations=10000, tail=0):
Returns
-------
+ T_obs : array of shape [n_tests]
+ T-statistic observed for all variables
+
p_values : array of shape [n_tests]
P-values for all the tests (aka variables)
- T0 : array of shape [n_tests]
- T-statistic for all variables
-
H0 : array of shape [n_permutations]
T-statistic obtained by permutations and t-max trick for multiple
comparison.
@@ -109,7 +109,7 @@ def permutation_t_test(X, n_permutations=10000, tail=0):
mu0 = np.mean(X, axis=0)
dof_scaling = sqrt(n_samples / (n_samples - 1.0))
std0 = np.sqrt(X2 - mu0**2) * dof_scaling # get std with variance splitting
- T0 = np.mean(X, axis=0) / (std0 / sqrt(n_samples))
+ T_obs = np.mean(X, axis=0) / (std0 / sqrt(n_samples))
if do_exact:
perms = bin_perm_rep(n_samples, a=1, b=-1)[1:,:]
@@ -124,60 +124,13 @@ def permutation_t_test(X, n_permutations=10000, tail=0):
scaling = float(n_permutations + 1)
if tail == 0:
- p_values = 1.0 - np.searchsorted(H0, np.abs(T0)) / scaling
+ p_values = 1.0 - np.searchsorted(H0, np.abs(T_obs)) / scaling
elif tail == 1:
- p_values = 1.0 - np.searchsorted(H0, T0) / scaling
+ p_values = 1.0 - np.searchsorted(H0, T_obs) / scaling
elif tail == -1:
- p_values = 1.0 - np.searchsorted(H0, -T0) / scaling
+ p_values = 1.0 - np.searchsorted(H0, -T_obs) / scaling
- return p_values, T0, H0
+ return T_obs, p_values, H0
permutation_t_test.__test__ = False # for nosetests
-
-if __name__ == '__main__':
- # 1 sample t-test
- n_samples, n_tests = 30, 5
- n_permutations = 50000
- # n_permutations = 'exact'
- X = np.random.randn(n_samples, n_tests)
- X[:,:2] += 0.6
- p_values, T0, H0 = permutation_t_test(X, n_permutations, tail=1)
- is_significant = p_values < 0.05
- print 80*"-"
- print "-------- 1-sample t-test :"
- print "T stats : ", T0
- print "p_values : ", p_values
- print "Is significant : ", is_significant
-
- print 80*"-"
- print "-------- Comparison analytic vs permutation :"
- p_values, T0, H0 = permutation_t_test(X, n_permutations, tail=1)
- print "--- permutation_t_test :"
- print "T stats : ", T0
- print "p_values : ", p_values
- print "Is significant : ", is_significant
-
- from scipy import stats
- T0, p_values = stats.ttest_1samp(X[:,0], 0)
- print "--- scipy.stats.ttest_1samp :"
- print "T stats : ", T0
- print "p_values : ", p_values
-
- # 2 samples t-test
- X1 = np.random.randn(n_samples, n_tests)
- X2 = np.random.randn(n_samples, n_tests)
- X1[:,:2] += 2
- p_values, T0, H0 = permutation_t_test(X1 - X2, n_permutations)
- print 80*"-"
- print "-------- 2-samples t-test :"
- print "T stats : ", T0
- print "p_values : ", p_values
- print "Is significant : ", is_significant
-
- # import pylab as pl
- # pl.close('all')
- # pl.hist(H0)
- # y_min, y_max = pl.ylim()
- # pl.vlines(T0, y_min, y_max, color='g', linewidth=2, linestyle='--')
- # pl.show()
diff --git a/mne/stats/tests/test_cluster_level.py b/mne/stats/tests/test_cluster_level.py
new file mode 100644
index 0000000..9b6b999
--- /dev/null
+++ b/mne/stats/tests/test_cluster_level.py
@@ -0,0 +1,39 @@
+import numpy as np
+from numpy.testing import assert_equal
+
+from ..cluster_level import permutation_1d_cluster_test
+
+
+def test_permutation_t_test():
+ """Test T-test based on permutations
+ """
+ noiselevel = 20
+
+ normfactor = np.hanning(20).sum()
+
+ condition1 = np.random.randn(40, 350) * noiselevel
+ for c in condition1:
+ c[:] = np.convolve(c, np.hanning(20), mode="same") / normfactor
+
+ condition2 = np.random.randn(33, 350) * noiselevel
+ for c in condition2:
+ c[:] = np.convolve(c, np.hanning(20), mode="same") / normfactor
+
+ pseudoekp = 5 * np.hanning(150)[None,:]
+ condition1[:, 100:250] += pseudoekp
+ condition2[:, 100:250] -= pseudoekp
+
+ T_obs, clusters, cluster_p_values, hist = permutation_1d_cluster_test(
+ [condition1, condition2], n_permutations=500,
+ tail=1)
+ assert_equal(np.sum(cluster_p_values < 0.05), 1)
+
+ T_obs, clusters, cluster_p_values, hist = permutation_1d_cluster_test(
+ [condition1, condition2], n_permutations=500,
+ tail=0)
+ assert_equal(np.sum(cluster_p_values < 0.05), 1)
+
+ T_obs, clusters, cluster_p_values, hist = permutation_1d_cluster_test(
+ [condition1, condition2], n_permutations=500,
+ tail=-1)
+ assert_equal(np.sum(cluster_p_values < 0.05), 0)
diff --git a/mne/stats/tests/test_permutations.py b/mne/stats/tests/test_permutations.py
index 3161dd1..768dbf8 100644
--- a/mne/stats/tests/test_permutations.py
+++ b/mne/stats/tests/test_permutations.py
@@ -15,20 +15,20 @@ def test_permutation_t_test():
X = np.random.randn(n_samples, n_tests)
X[:,:2] += 1
- p_values, T0, H0 = permutation_t_test(X, n_permutations=999, tail=0)
+ T_obs, p_values, H0 = permutation_t_test(X, n_permutations=999, tail=0)
is_significant = p_values < 0.05
assert_array_equal(is_significant, [True, True, False, False, False])
- p_values, T0, H0 = permutation_t_test(X, n_permutations=999, tail=1)
+ T_obs, p_values, H0 = permutation_t_test(X, n_permutations=999, tail=1)
is_significant = p_values < 0.05
assert_array_equal(is_significant, [True, True, False, False, False])
- p_values, T0, H0 = permutation_t_test(X, n_permutations=999, tail=-1)
+ T_obs, p_values, H0 = permutation_t_test(X, n_permutations=999, tail=-1)
is_significant = p_values < 0.05
assert_array_equal(is_significant, [False, False, False, False, False])
X = np.random.randn(18, 1)
- p_values, T0, H0 = permutation_t_test(X[:, [0]], n_permutations='all')
- T0_scipy, p_values_scipy = stats.ttest_1samp(X[:, 0], 0)
- assert_almost_equal(T0[0], T0_scipy, 8)
+ T_obs, p_values, H0 = permutation_t_test(X[:, [0]], n_permutations='all')
+ T_obs_scipy, p_values_scipy = stats.ttest_1samp(X[:, 0], 0)
+ assert_almost_equal(T_obs[0], T_obs_scipy, 8)
assert_almost_equal(p_values[0], p_values_scipy, 2)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-mne.git
More information about the debian-med-commit
mailing list