[med-svn] [SCM] aghermann branch, master, updated. 551e213a23b59b71cba6a9c3a282d1b60e21b854

Andrei Zavada johnhommer at gmail.com
Sun Apr 21 23:17:56 UTC 2013


The following commit has been merged in the master branch:
commit ee9ef082b050fcf1d4f6b54f05011a457f9931b1
Author: Andrei Zavada <johnhommer at gmail.com>
Date:   Fri Apr 12 19:57:18 2013 +0300

    parse and look at individual EDF patient_id fields (patch 1/2)

diff --git a/src/expdesign/recording.cc b/src/expdesign/recording.cc
index 704ba97..aa8ac62 100644
--- a/src/expdesign/recording.cc
+++ b/src/expdesign/recording.cc
@@ -130,7 +130,7 @@ CProfile (CSubject& J, const string& d, const sigfile::SChannel& h,
 	      // anchor zero page, get pagesize from edf^W CBinnedPower^W either goes
 		time_t dima = F.start_time();
 		printf( "CProfile::CProfile(): adding %s of [%s, %s, %s] %zu pages (%zu full, %zu in hypnogram) recorded %s",
-			metrics::name(params.metric), F.subject(), F.session(), F.episode(),
+			metrics::name(params.metric), F.id.c_str(), F.session(), F.episode(),
 			M.total_pages(), M.full_pages(), M.hypnogram().pages(), ctime( &dima));
 
 		if ( pz - pa != (int)M.full_pages() ) {
@@ -185,7 +185,7 @@ CProfile (CRecording& M,
 		pz = (size_t)difftime( M.F().end_time(), _0at) / _pagesize;
 	time_t	dima = M.F().start_time();
 	printf( "CProfile::CProfile(): adding single recording %s of [%s, %s, %s] %zu pages (%zu full, %zu in hypnogram) recorded %s",
-		metrics::name(params.metric), M.F().subject(), M.F().session(), M.F().episode(),
+		metrics::name(params.metric), M.F().id.c_str(), M.F().session(), M.F().episode(),
 		M.total_pages(), M.full_pages(), M.hypnogram().pages(), ctime( &dima));
 
 	if ( pz - pa != (int)M.full_pages() ) {
diff --git a/src/expdesign/recording.hh b/src/expdesign/recording.hh
index a0997d3..e197099 100644
--- a/src/expdesign/recording.hh
+++ b/src/expdesign/recording.hh
@@ -153,7 +153,7 @@ class CRecording {
 		    const metrics::mc::SPPack&);
        ~CRecording ();
 
-	const char* subject() const      {  return _source().subject(); }
+	const char* subject() const      {  return _source().name.c_str(); }
 	const char* session() const      {  return _source().session(); }
 	const char* episode() const      {  return _source().episode(); }
 	const char* channel() const      {  return _source().channel_by_id(_sig_no); }
diff --git a/src/expdesign/tree-scanner.cc b/src/expdesign/tree-scanner.cc
index b1e9d23..e074779 100644
--- a/src/expdesign/tree-scanner.cc
+++ b/src/expdesign/tree-scanner.cc
@@ -151,16 +151,16 @@ register_intree_source( sigfile::CTypedSource&& F,
 		}
 
 		// refuse to register sources of wrong subjects
-		if ( j_name != F().subject() ) {
-			log_message( "%s: file belongs to subject \"%s\", is misplaced here under subject \"%s\"\n",
-				     F().filename(), F().subject(), j_name.c_str());
+		if ( j_name != F().id ) {
+			log_message( "%s: file belongs to subject %s (\"%s\"), is misplaced here under subject \"%s\"\n",
+				     F().filename(), F().id.c_str(), F().name.c_str(), j_name.c_str());
 			return -1;
 		}
 		try {
-			auto existing_group = group_of( F().subject());
+			auto existing_group = group_of( F().id.c_str());
 			if ( g_name != existing_group ) {
-				log_message( "%s: subject \"%s\" belongs to a different group (\"%s\")\n",
-					     F().filename(), F().subject(), existing_group);
+				log_message( "%s: subject %s (\"%s\") belongs to a different group (\"%s\")\n",
+					     F().filename(), F().id.c_str(), F().name.c_str(), existing_group);
 				return -1;
 			}
 		} catch (invalid_argument) {
@@ -189,8 +189,8 @@ register_intree_source( sigfile::CTypedSource&& F,
 			J = &*Ji;
 
 	      // insert/update episode observing start/end times
-		printf( "\nCExpDesign::register_intree_source( file: \"%s\", J: \"%s\", E: \"%s\", D: \"%s\")\n",
-			F().filename(), F().subject(), F().episode(), F().session());
+		printf( "\nCExpDesign::register_intree_source( file: \"%s\", J: %s (\"%s\"), E: \"%s\", D: \"%s\")\n",
+			F().filename(), F().id.c_str(), F().name.c_str(), F().episode(), F().session());
 		switch ( J->measurements[F().session()].add_one(
 				 move(F), fft_params, swu_params, mc_params) ) {  // this will do it
 		case AGH_EPSEQADD_OVERLAP:
diff --git a/src/libsigfile/edf.cc b/src/libsigfile/edf.cc
index 21c63a8..123cab6 100644
--- a/src/libsigfile/edf.cc
+++ b/src/libsigfile/edf.cc
@@ -41,7 +41,7 @@ template int sigfile::CEDFFile::export_original_( const char*, const char*) cons
 
 int
 sigfile::CEDFFile::
-set_subject( const char* s)
+set_patient_id( const char* s)
 {
 	memcpy( header.patient_id, agh::str::pad( s, 80).c_str(), 80);
 	return strlen(s) > 80;
@@ -265,7 +265,7 @@ CEDFFile (const char *fname_, TSubtype subtype_, int flags_,
 	_lay_out_header();
 
 	strncpy( header.version_number, version_string, 8);
-	set_subject( "Mr. Fafa");
+	set_patient_id( "Fafa_1 M X Mr._Fafa");
 	set_recording_id( "Zzz");
 	set_comment( fname_);
 	set_start_time( time(NULL));
@@ -365,9 +365,9 @@ CEDFFile (CEDFFile&& rv)
 	_start_time = rv._start_time;
 	_end_time   = rv._end_time;
 
-	swap( _patient, rv._patient);
-	swap( _episode, rv._episode);
-	swap( _session, rv._session);
+	swap( _patient_id, rv._patient_id);
+	swap( _episode,    rv._episode);
+	swap( _session,    rv._session);
 
 	swap( channels, rv.channels);
 
@@ -512,6 +512,11 @@ _parse_header()
 		_get_next_field( header.data_record_size, 8);
 		_get_next_field( header.n_channels,       4);
 
+		if ( strncmp( header.version_number, version_string, 8) ) {
+			_status |= (bad_version | inoperable);
+			return -2;
+		}
+
 		_subtype =
 			(strncasecmp( header.reserved, "edf+c", 5) == 0)
 			? edfplus_c
@@ -519,11 +524,6 @@ _parse_header()
 			? edfplus_d
 			: edf;
 
-		if ( strncmp( header.version_number, version_string, 8) ) {
-			_status |= (bad_version | inoperable);
-			return -2;
-		}
-
 		size_t	header_length;
 
 		header_length = n_data_records = data_record_size = n_channels = 0;
@@ -542,7 +542,23 @@ _parse_header()
 			return -2;
 		}
 
-		_patient = agh::str::trim( string (header.patient_id, 80));
+		_patient_id = agh::str::trim( string (header.patient_id, 80));
+
+	      // sub-parse patient_id into SSubjectId struct
+		{
+			auto subfields = agh::str::tokens( _patient_id, " ");
+			if ( subfields.size() != 4 ) {
+				fprintf( stderr, "%s: Nonconforming patient_id\n", filename());
+				SSubjectId::id = SSubjectId::name = subfields.front();
+				SSubjectId::gender = TGender::unknown;
+			} else {
+				auto i = subfields.begin();
+				SSubjectId::id = *i++;
+				SSubjectId::gender = SSubjectId::char_to_gender((*i++)[0]);
+				SSubjectId::dob = SSubjectId::str_to_dob(*i++);
+				SSubjectId::name = agh::str::join( agh::str::tokens(*i++, "_"), " ");
+			}
+		}
 
 	      // deal with episode and session
 		{
@@ -780,7 +796,7 @@ sigfile::CEDFFile::details( bool channels_too) const
 			       " Record length\t: %zu sec\n",
 			       filename(),
 			       subtype_s(),
-			       subject(),
+			       patient_id(),
 			       agh::str::trim( string (header.recording_id, 80)).c_str(),
 			       agh::str::trim( string (header.recording_date, 8)).c_str(),
 			       agh::str::trim( string (header.recording_time, 8)).c_str(),
@@ -849,12 +865,11 @@ sigfile::CEDFFile::explain_edf_status( int status)
 	if ( status & time_unparsable )
 		recv.emplace_back( "* Time field ill-formed");
 	if ( status & nosession )
-		recv.emplace_back(
-			"* No session information in field RecordingID "
-			"(expecting this to appear after "
-			"episode designation followed by a comma)");
+		recv.emplace_back( "* No session information in field RecordingID");
 	if ( status & non1020_channel )
 		recv.emplace_back( "* Channel designation not following the 10-20 system");
+	if ( status & nonconforming_patient_id )
+		recv.emplace_back( "* PatientId not conforming to section 2.1.3.3 of EDF spec");
 	if ( status & nonkemp_signaltype )
 		recv.emplace_back( "* Signal type not listed in Kemp et al");
 	if ( status & dup_channels )
diff --git a/src/libsigfile/edf.hh b/src/libsigfile/edf.hh
index e70c1d2..cd482e8 100644
--- a/src/libsigfile/edf.hh
+++ b/src/libsigfile/edf.hh
@@ -98,8 +98,8 @@ class CEDFFile
 	// identification
 	const char* filename() const
 		{ return _filename.c_str(); }
-	const char* subject() const
-		{ return _patient.c_str(); }
+	const char* patient_id() const
+		{ return _patient_id.c_str(); }
 	const char* recording_id() const
 		{ return header.recording_id; }
 	const char* comment() const
@@ -118,7 +118,7 @@ class CEDFFile
 		{ return n_data_records * data_record_size; }
 
 	// setters
-	int set_subject( const char* s);
+	int set_patient_id( const char* s);
 	int set_recording_id( const char* s);
 	int set_episode( const char* s);
 	int set_session( const char* s);
@@ -484,22 +484,23 @@ class CEDFFile
 
 
 	enum TStatus : int {
-		ok			= 0,
-		bad_header		= (1 <<  0),
-		bad_version		= (1 <<  1),
-		bad_numfld		= (1 <<  2),
-		bad_recording		= (1 <<  3),
-		date_unparsable		= (1 <<  4),
-		time_unparsable		= (1 <<  5),
-		nosession		= (1 <<  6),
-		noepisode		= (1 <<  7),
-		nonkemp_signaltype	= (1 <<  8),
-		non1020_channel		= (1 <<  9),
-		dup_channels		= (1 << 11),
-		nogain			= (1 << 12),
-		sysfail			= (1 << 13),
-		too_many_channels	= (1 << 14),
-		inoperable		= (bad_header
+		ok			 = 0,
+		bad_header		 = (1 <<  0),
+		bad_version		 = (1 <<  1),
+		bad_numfld		 = (1 <<  2),
+		bad_recording		 = (1 <<  3),
+		date_unparsable		 = (1 <<  4),
+		time_unparsable		 = (1 <<  5),
+		nosession		 = (1 <<  6),
+		noepisode		 = (1 <<  7),
+		nonkemp_signaltype	 = (1 <<  8),
+		non1020_channel		 = (1 <<  9),
+		dup_channels		 = (1 << 11),
+		nogain			 = (1 << 12),
+		sysfail			 = (1 << 13),
+		too_many_channels	 = (1 << 14),
+		nonconforming_patient_id = (1 << 15),
+		inoperable		 = (bad_header
 					   | bad_version
 					   | bad_numfld
 					   | bad_recording
@@ -517,7 +518,7 @@ class CEDFFile
 	time_t	_start_time,
 		_end_time;
 
-	string	_patient,
+	string	_patient_id, // this is trimmed, raw; parsed into SSubjectId fields
        // take care of file being named 'episode-1.edf'
 		_episode,
        // loosely/possibly also use RecordingID as session
diff --git a/src/libsigfile/source-base.hh b/src/libsigfile/source-base.hh
index fc8ad43..45bed88 100644
--- a/src/libsigfile/source-base.hh
+++ b/src/libsigfile/source-base.hh
@@ -186,9 +186,81 @@ struct SFilterPack {
 
 
 
+// follow http://www.edfplus.info/specs/edfplus.html#datarecords, section 2.1.3.3
+struct SSubjectId {
+	string	id,
+		name;
+	time_t	dob;
+	enum class TGender : char {
+		unknown = 'X', male = 'M', female = 'F'
+	};
+	TGender	gender;
+	static TGender char_to_gender( char x)
+		{
+			switch ( x ) {
+			case 'M':
+			case 'm':
+				return TGender::male;
+			case 'F':
+			case 'f':
+				return TGender::female;
+			default:
+				return TGender::unknown;
+			}
+		}
+	static int str_to_english_month( const string& s)
+		{
+			if ( strcasecmp( s.c_str(), "jan") == 0 )
+				return 0;
+			if ( strcasecmp( s.c_str(), "feb") == 0 )
+				return 1;
+			if ( strcasecmp( s.c_str(), "mar") == 0 )
+				return 2;
+			if ( strcasecmp( s.c_str(), "apr") == 0 )
+				return 3;
+			if ( strcasecmp( s.c_str(), "may") == 0 )
+				return 4;
+			if ( strcasecmp( s.c_str(), "jun") == 0 )
+				return 5;
+			if ( strcasecmp( s.c_str(), "jul") == 0 )
+				return 6;
+			if ( strcasecmp( s.c_str(), "aug") == 0 )
+				return 7;
+			if ( strcasecmp( s.c_str(), "sep") == 0 )
+				return 8;
+			if ( strcasecmp( s.c_str(), "oct") == 0 )
+				return 9;
+			if ( strcasecmp( s.c_str(), "nov") == 0 )
+				return 10;
+			if ( strcasecmp( s.c_str(), "dec") == 0 )
+				return 11;
+			else
+				return -1;
+		}
+	static time_t str_to_dob( const string& s)
+		{
+			struct tm t;
+			memset( &t, '\0', sizeof (t));
+
+			// strptime( s, "%d-", &t); // will suck in non-US locales, so
+			auto ff = agh::str::tokens(s, "-");
+			if ( ff.size() != 3 )
+				return (time_t)0;
+			auto f = ff.begin();
+			try {
+				t.tm_mday = stoi( *f++);
+				t.tm_mon  = str_to_english_month(*f++);
+				t.tm_year = 1900 + stoi(*f);
+				return mktime( &t);
+			} catch (...) {
+				return (time_t)0;
+			}
+		}
+};
+
 
 
-class CSource {
+class CSource : public SSubjectId {
 	friend class CTypedSource;
     protected:
 	string	_filename;
@@ -216,7 +288,7 @@ class CSource {
 		{
 			return _filename.c_str();
 		}
-	virtual const char* subject()			const = 0;
+	virtual const char* patient_id()		const = 0;
 	virtual const char* recording_id()		const = 0;
 	virtual const char* comment()			const = 0;
 	// probably parsed out of recording_id
@@ -280,7 +352,7 @@ class CSource {
 		}
 
       // setters
-	virtual int set_subject( const char*)	      = 0;
+	virtual int set_patient_id( const char*)      = 0;
 	virtual int set_recording_id( const char*)    = 0;
 	virtual int set_episode( const char*)	      = 0;
 	virtual int set_session( const char*)	      = 0;
diff --git a/src/metrics/mc.cc b/src/metrics/mc.cc
index f4e5afc..26c5e5f 100644
--- a/src/metrics/mc.cc
+++ b/src/metrics/mc.cc
@@ -162,7 +162,7 @@ export_tsv( const string& fname) const
 	fprintf( f, "## Subject: %s;  Session: %s, Episode: %s recorded %.*s;  Channel: %s\n"
 		 "## Total EEG Microcontinuity course (%zu %zu-sec pages) from %g up to %g Hz in bins of %g Hz\n"
 		 "#Page\t",
-		 _using_F().subject(), _using_F().session(), _using_F().episode(),
+		 _using_F().name.c_str(), _using_F().session(), _using_F().episode(),
 		 (int)strlen(asctime_)-1, asctime_,
 		 _using_F().channel_by_id(_using_sig_no),
 		 pages(), Pp.pagesize, Pp.freq_from, Pp.freq_from + Pp.bandwidth * bins(), Pp.bandwidth);
@@ -196,7 +196,7 @@ export_tsv( size_t bin,
 	fprintf( f, "## Microcontinuity profile of\n"
 		 "## Subject: %s;  Session: %s, Episode: %s recorded %.*s;  Channel: %s\n"
 		 "## Course (%zu %zu-sec pages) in range %g-%g Hz\n",
-		 _using_F().subject(), _using_F().session(), _using_F().episode(),
+		 _using_F().name.c_str(), _using_F().session(), _using_F().episode(),
 		 (int)strlen(asctime_)-1, asctime_,
 		 _using_F().channel_by_id(_using_sig_no),
 		 pages(), Pp.pagesize, Pp.freq_from, Pp.freq_from + (bin+1) * Pp.bandwidth);
diff --git a/src/metrics/page-metrics-base.cc b/src/metrics/page-metrics-base.cc
index d7aab1a..3064806 100644
--- a/src/metrics/page-metrics-base.cc
+++ b/src/metrics/page-metrics-base.cc
@@ -217,7 +217,7 @@ export_tsv( const string& fname) const
 	char *asctime_ = asctime( localtime( &sttm));
 	fprintf( f, "## Subject: %s;  Session: %s, Episode: %s recorded %.*s;  Channel: %s\n"
 		 "#Page\t",
-		 _using_F().subject(), _using_F().session(), _using_F().episode(),
+		 _using_F().name.c_str(), _using_F().session(), _using_F().episode(),
 		 (int)strlen(asctime_)-1, asctime_,
 		 _using_F().channel_by_id(_using_sig_no));
 
diff --git a/src/metrics/psd.cc b/src/metrics/psd.cc
index 214b479..6137127 100644
--- a/src/metrics/psd.cc
+++ b/src/metrics/psd.cc
@@ -267,7 +267,7 @@ export_tsv( const string& fname) const
 	fprintf( f, "## Subject: %s;  Session: %s, Episode: %s recorded %.*s;  Channel: %s\n"
 		 "## Total spectral power course (%zu %zu-sec pages) up to %g Hz in bins of %g Hz\n"
 		 "#Page\t",
-		 _using_F().subject(), _using_F().session(), _using_F().episode(),
+		 _using_F().name.c_str(), _using_F().session(), _using_F().episode(),
 		 (int)strlen(asctime_)-1, asctime_,
 		 _using_F().channel_by_id(_using_sig_no),
 		 pages(), Pp.pagesize, _bins*Pp.binsize, Pp.binsize);
@@ -303,7 +303,7 @@ export_tsv( float from, float upto,
 	fprintf( f, "PSD profile of\n"
 		 "## Subject: %s;  Session: %s, Episode: %s recorded %.*s;  Channel: %s\n"
 		 "## Course (%zu %zu-sec pages) in range %g-%g Hz\n",
-		 _using_F().subject(), _using_F().session(), _using_F().episode(),
+		 _using_F().name.c_str(), _using_F().session(), _using_F().episode(),
 		 (int)strlen(asctime_)-1, asctime_,
 		 _using_F().channel_by_id(_using_sig_no),
 		 pages(), Pp.pagesize, from, upto);
diff --git a/src/metrics/swu.cc b/src/metrics/swu.cc
index 36a1312..0c8cafd 100644
--- a/src/metrics/swu.cc
+++ b/src/metrics/swu.cc
@@ -155,7 +155,7 @@ export_tsv( const string& fname) const
 	fprintf( f, "## Subject: %s;  Session: %s, Episode: %s recorded %.*s;  Channel: %s\n"
 		 "## SWU course (%zu %zu-sec pages)\n"
 		 "#Page\tSWU\n",
-		 _using_F().subject(), _using_F().session(), _using_F().episode(),
+		 _using_F().name.c_str(), _using_F().session(), _using_F().episode(),
 		 (int)strlen(asctime_)-1, asctime_,
 		 _using_F().channel_by_id(_using_sig_no),
 		 pages(), Pp.pagesize);
diff --git a/src/tools/edfcat.cc b/src/tools/edfcat.cc
index 62b814e..108810f 100644
--- a/src/tools/edfcat.cc
+++ b/src/tools/edfcat.cc
@@ -390,7 +390,7 @@ exec_prune( const SOperation::SObject& obj)
 			     F.data_record_size,
 			     F.n_data_records);
 
-	G.set_subject( F.subject());
+	G.set_patient_id( F.patient_id());
 	string tmp = F.recording_id();
 	G.set_recording_id( tmp.c_str());
 	tmp = F.comment();
diff --git a/src/ui/mw/admit-one.cc b/src/ui/mw/admit-one.cc
index db595fa..9f597b5 100644
--- a/src/ui/mw/admit-one.cc
+++ b/src/ui/mw/admit-one.cc
@@ -36,7 +36,7 @@ dnd_maybe_admit_one( const char* fname)
 
 		snprintf_buf( "File: <i>%s</i>", fname);
 		gtk_label_set_markup( lEdfImportCaption, __buf__);
-		snprintf_buf( "<b>%s</b>", (*Fp)().subject());
+		snprintf_buf( "<b>%s</b> (%s)", (*Fp)().id.c_str(), (*Fp)().name.c_str());
 		gtk_label_set_markup( lEdfImportSubject, __buf__);
 
 	} catch ( exception& ex) {
@@ -55,7 +55,7 @@ dnd_maybe_admit_one( const char* fname)
 	try {
 		gtk_entry_set_text(
 			eEdfImportGroupEntry,
-			ED->group_of( (*Fp)().subject()));
+			ED->group_of( (*Fp)().id.c_str()));
 		gtk_widget_set_sensitive( (GtkWidget*)eEdfImportGroup, FALSE);
 	} catch (invalid_argument ex) {
 		for ( auto &i : AghGG ) {
@@ -109,7 +109,7 @@ dnd_maybe_admit_one( const char* fname)
 		dest_path = g_strdup_printf( "%s/%s/%s/%s",
 					     ED->session_dir().c_str(),
 					     selected_group,
-					     (*Fp)().subject(),
+					     (*Fp)().id.c_str(),
 					     selected_session);
 		dest = g_strdup_printf( "%s/%s.edf",
 					dest_path,
diff --git a/src/ui/mw/construct.cc b/src/ui/mw/construct.cc
index 72d2658..ef0f36b 100644
--- a/src/ui/mw/construct.cc
+++ b/src/ui/mw/construct.cc
@@ -639,7 +639,6 @@ SExpDesignUIWidgets ()
 			  (GCallback)gtk_tree_view_expand_all,
 			  NULL);
 	G_CONNECT_2 (tvGlobalAnnotations, row, activated);
-	FAFA;
 
 	int c = 0;
 	for ( auto column : {"Recording", "Page(s)", "Channel", "Type", "Label"} ) {

-- 
Sleep experiment manager



More information about the debian-med-commit mailing list