[med-svn] [SCM] aghermann branch, master, updated. 99b1d5a023eee9df74b0e0d6f894516fc79435ad

Sun Jul 7 23:04:06 UTC 2013

The following commit has been merged in the master branch:
commit cf860c0627565c8df59a3ee8fca9d82b77f2eeb0
Author: Andrei Zavada <johnhommer at gmail.com>
Date:   Thu Jun 27 19:56:51 2013 +0300

    WIP

diff --git a/src/libsigfile/edf.cc b/src/libsigfile/edf.cc
index 657d36d..e237229 100644
--- a/src/libsigfile/edf.cc
+++ b/src/libsigfile/edf.cc
@@ -365,6 +365,7 @@ CEDFFile (CEDFFile&& rv)
 	_mmapping     = rv._mmapping;
 	_fd           = rv._fd;
 
+	rv._fd = -1; // for propriety's sake
 	rv._mmapping = (void*)-1;  // will prevent munmap in ~CEDFFile()
 }
 
diff --git a/src/libsigfile/tsv.cc b/src/libsigfile/tsv.cc
index c6283a7..c770a01 100644
--- a/src/libsigfile/tsv.cc
+++ b/src/libsigfile/tsv.cc
@@ -39,9 +39,9 @@ set_start_time( time_t s)
 {
 	char b[9];
 	strftime( b, 9, "%d.%m.%y", localtime(&s));
-	header.recording_date.assign( b);
+	_recording_date.assign( b);
 	strftime( b, 9, "%H.%M.%s", localtime(&s));
-	header.recording_time.assign( b);
+	_recording_time.assign( b);
 
 	return 0;
 }
@@ -67,12 +67,15 @@ CTSVFile (const string& fname_, const int flags_)
 	if ( _parse_header() ) {  // creates channels list
 		if ( not (flags_ & sigfile::CSource::no_field_consistency_check) ) {
 			close( _fd);
+			_fd = -1;
 			throw invalid_argument (explain_status(_status)); // _status set in _parse_header()
 		} else
 			fprintf( stderr, "CTSVFile::CTSVFile(\"%s\") Warning: parse header failed, but proceeding anyway\n", fname_.c_str());
 	}
 	// channels now available
 
+	_read_data();
+
       // ancillary files:
 	if ( not (flags_ & sigfile::CSource::no_ancillary_files) )
 		load_ancillary_files();
@@ -97,8 +100,6 @@ CTSVFile (const string& fname_, const TSubtype subtype_, const int flags_,
 	}
 
       // fill out some essential header fields
-	resize_seconds( recording_time_);
-
 	_subject = {"Fafa_1", "Mr. Fafa"};
 	set_recording_id( "Zzz");
 	set_comment( fname_);
@@ -106,85 +107,40 @@ CTSVFile (const string& fname_, const TSubtype subtype_, const int flags_,
 
 	size_t hi = 0;
 	for ( auto& h : channels_ ) {
-		auto& H = channels[hi];
+		auto& H = channels[hi++];
 		H.ucd = h;
 	}
-}
-
-
 
+	resize_seconds( recording_time_);
+}
 
-// uncomment on demand (also un-dnl AC_CHECK_FUNCS(mremap,,) in configure.ac)
-/*
-size_t
-CTSVFile::
-resize( const size_t new_records)
-{
-	size_t total_samples_per_record = 0;
-	for ( auto& H : channels )
-		total_samples_per_record += H.samples_per_record; // total samplerate
-	size_t old_records
-		= n_data_records;
-	auto new_fsize
-		= header_length + 2 * total_samples_per_record * (n_data_records = new_records);
-
-#if !HAVE_MREMAP
-	_mmapping =
-		mremap( _mmapping,
-			_fsize,
-			new_fsize,
-			0|MREMAP_MAYMOVE);
-#else
-	void *_m2 =
-		mmap( NULL,
-		      new_fsize,
-		      PROT_READ | PROT_WRITE, MAP_SHARED,
-		      _fd,
-		      0);
-	memmove( _m2, _mmapping, _fsize);
-	munmap( _mmapping, _fsize);
-	_mmapping = _m2;
-#endif
-
-	if ( _mmapping == (void*)-1 ) {
-		close( _fd);
-		throw length_error ("CTSVFile::resize(): mmap error");
-	}
 
-	_fsize = new_fsize;
-	return old_records;
-}
 
-*/
 
 CTSVFile::
 CTSVFile (CTSVFile&& rv)
       : CSource (move(rv))
 {
-	header = rv.header; // no need to re-layout as we don't mremap
-	n_data_records   = rv.n_data_records;
-	data_record_size = rv.data_record_size;
+	swap( _patient_id,   rv._patient_id);
+	swap( _recording_id, rv._recording_id);
+	swap( _recording_date, rv._recording_date);
+	swap( _recording_time, rv._recording_time);
+	swap( _episode,    rv._episode);
+	swap( _session,    rv._session);
+	swap( _comment, rv._comment);
+
+	swap( metadata, rv.metadata);
 
 	_subtype    = rv._subtype;
 	_start_time = rv._start_time;
 	_end_time   = rv._end_time;
 
-	swap( _patient_id, rv._patient_id);
-	swap( _episode,    rv._episode);
-	swap( _session,    rv._session);
 
 	swap( channels, rv.channels);
 	swap( common_annotations, rv.common_annotations);
 
-	header_length = rv.header_length;
-	_fsize        = rv._fsize;
-	_fld_pos      = rv._fld_pos;
-	_total_samples_per_record =
-		       rv._total_samples_per_record;
-	_mmapping     = rv._mmapping;
-	_fd           = rv._fd;
-
-	rv._mmapping = (void*)-1;  // will prevent munmap in ~CTSVFile()
+	_fd = rv._fd;
+	rv._fd = -1;
 }
 
 
@@ -208,273 +164,47 @@ CTSVFile::
 _parse_header()
 {
 	size_t	n_channels;
-	try {
-		_fld_pos = 0;
-		_get_next_field( header.version_number,   8);
-		_get_next_field( header.patient_id,      80);
-		_get_next_field( header.recording_id,    80);
-		_get_next_field( header.recording_date,   8);
-		_get_next_field( header.recording_time,   8);
-		_get_next_field( header.header_length,    8);
-		_get_next_field( header.reserved,        44);
-		_get_next_field( header.n_data_records,   8);
-		_get_next_field( header.data_record_size, 8);
-		_get_next_field( header.n_channels,       4);
-
-		if ( strncmp( header.version_number, version_string, 8) ) {
-			_status |= (bad_version | inoperable);
-			return -2;
-		}
-
-		_subtype =
-			(strncasecmp( header.reserved, "edf+c", 5) == 0)
-			? edfplus_c
-			: (strncasecmp( header.reserved, "edf+d", 5) == 0)
-			? edfplus_d
-			: edf;
-
-		size_t	header_length;
-
-		header_length = n_data_records = data_record_size = n_channels = 0;
-		sscanf( header.header_length,    "%8zu", &header_length);
-		sscanf( header.n_data_records,   "%8zu", &n_data_records);
-		sscanf( header.data_record_size, "%8lg", &data_record_size); // edf+ supports fractions
-		sscanf( header.n_channels,       "%4zu", &n_channels);
-
-		if ( !header_length || !n_data_records || !data_record_size || !n_channels ) {
-			_status |= bad_numfld;
-			if ( not (flags() & no_field_consistency_check) )
-				return -2;
-		}
-		if ( n_channels == 0 )  {
-			_status |= inoperable;
-			return -2;
-		}
-
-		_patient_id = trim( string (header.patient_id, 80));
-
-	      // sub-parse patient_id into SSubjectId struct
-		{
-			auto subfields = tokens( _patient_id, " ");
-			if ( unlikely (_patient_id.empty()) ) {
-				_status |= missing_patient_id;
-			} else if ( subfields.size() < 4 ) {
-				_subject.id = subfields.front();
-				_status |= nonconforming_patient_id;
-			} else {
-				if ( subfields.size() > 4 )
-					_status |= extra_patientid_subfields;
-				auto i = subfields.begin();
-				_subject.id = *i++;
-				_subject.gender = agh::SSubjectId::char_to_gender((*i++)[0]);
-				_subject.dob = agh::SSubjectId::str_to_dob(*i++);
-				_subject.name = join( tokens(*i++, "_"), " ");
-				if ( not _subject.valid() )
-					_status |= invalid_subject_details;
-			}
-		}
+	_subtype = TSubtype::tsv;
 
-	      // deal with episode and session
-		{
-		      // (a) parsed from RecordingID_raw
-			char int_session[81], int_episode[81];
-			string rec_id_isolated (trim( string (header.recording_id, 80)));
+	// deal with episode and session
+	{
+		// (a) parsed from RecordingID_raw
+		char int_session[81], int_episode[81];
+		string rec_id_isolated (trim( _recording_id));
 #define T "%80[-a-zA-Z0-9 _]"
-			if ( sscanf( rec_id_isolated.c_str(), T ", " T,     int_episode, int_session) == 2 ||
-			     sscanf( rec_id_isolated.c_str(), T ": " T,     int_session, int_episode) == 2 ||
-			     sscanf( rec_id_isolated.c_str(), T "/"  T,     int_session, int_episode) == 2 ||
-			     sscanf( rec_id_isolated.c_str(), T " (" T ")", int_session, int_episode) == 2 )
-				;
-			else
-				_status |= (nosession | noepisode);
+		if ( sscanf( rec_id_isolated.c_str(), T ", " T,     int_episode, int_session) == 2 ||
+		     sscanf( rec_id_isolated.c_str(), T ": " T,     int_session, int_episode) == 2 ||
+		     sscanf( rec_id_isolated.c_str(), T "/"  T,     int_session, int_episode) == 2 ||
+		     sscanf( rec_id_isolated.c_str(), T " (" T ")", int_session, int_episode) == 2 )
+			;
+		else
+			_status |= (nosession | noepisode);
 #undef T
-		      // (b) identified from file name
-			string fn_episode;
-			size_t basename_start = _filename.rfind( '/');
-			fn_episode =
-				_filename.substr(
-					basename_start + 1,
-					_filename.size() - basename_start - 4 /* strlen(".edf") */ - 1);
-			// chip away '-1' if present
-			if ( fn_episode.size() >= 3 /* strlen("a-1") */ ) {
-				size_t sz = fn_episode.size();
-				if ( fn_episode[sz-2] == '-' && isdigit(fn_episode[sz-1]) )
-					fn_episode.erase( sz-2, 2);
-			}
-
-			if ( _status & noepisode ) { // (a) failed
-				_episode.assign( fn_episode);    // use RecordingID_raw as Session
-				_session.assign( rec_id_isolated);
-			} else {
-				_episode.assign( int_episode);
-				_session.assign( int_session);
-			}
-		}
-
-		{
-			struct tm ts;
-			char *p;
-			//memset( &ts, 0, sizeof(struct tm));
-			ts.tm_isdst = 0;  // importantly
-			string tmp (header.recording_date, 8);
-			p = strptime( tmp.c_str(), "%d.%m.%y", &ts);
-			if ( p == NULL || *p != '\0' ) {
-				_status |= date_unparsable;
-				if ( not (flags() & no_field_consistency_check) )
-					return -2;
-			}
-			tmp = {string (header.recording_time, 8)};
-			p = strptime( tmp.c_str(), "%H.%M.%S", &ts);
-			if ( p == NULL || *p != '\0' ) {
-				_status |= time_unparsable;
-				if ( not (flags() & no_field_consistency_check) )
-					return -2;
-			}
-
-			// if ( ts.tm_year < 50 )
-			// 	ts.tm_year += 100;
-			_start_time = mktime( &ts);
-			if ( _start_time == (time_t)-1 )
-				_status |= (date_unparsable|time_unparsable);
-			else
-				_end_time = _start_time + n_data_records * data_record_size;
+		// (b) identified from file name
+		string fn_episode;
+		size_t basename_start = _filename.rfind( '/');
+		fn_episode =
+			_filename.substr(
+				basename_start + 1,
+				_filename.size() - basename_start - 4 /* strlen(".edf") */ - 1);
+		// chip away '-1' if present
+		if ( fn_episode.size() >= 3 /* strlen("a-1") */ ) {
+			size_t sz = fn_episode.size();
+			if ( fn_episode[sz-2] == '-' && isdigit(fn_episode[sz-1]) )
+				fn_episode.erase( sz-2, 2);
 		}
 
-		if ( n_channels > max_channels ) {
-			_status |= bad_numfld;
-			if ( not (flags() & no_field_consistency_check) )
-				return -2;
+		if ( _status & noepisode ) { // (a) failed
+			_episode.assign( fn_episode);    // use RecordingID_raw as Session
+			_session.assign( rec_id_isolated);
 		} else {
-			channels.resize( n_channels);
-
-		      // determine & validate signal types
-			for ( auto &H : channels ) {
-				_get_next_field( H.header.label, 16);
-				string isolated_label = trim( string (H.header.label, 16));
-
-				if ( isolated_label == sigfile::edf_annotations_label )
-					H.ucd = {sigfile::SChannel::TType::embedded_annotation, 0};
-				else {
-					auto tt = agh::str::tokens( isolated_label, " ");
-					// parse legacy pre 0.9 specs ("EEG F3" etc)
-					if ( tt.size() > 1 ) {
-						string suggested_type = tt.front();
-						H.ucd = {(tt.pop_front(), agh::str::join( tt, " "))};
-						if ( suggested_type != H.ucd.type_s() )
-							_status |= recognised_channel_conflicting_type;
-					} else {
-						H.ucd = sigfile::SChannel (isolated_label);
-
-						if ( H.ucd.type() == sigfile::SChannel::TType::eeg &&
-						     H.ucd.idx()  == sigfile::EEG::custom )
-							_status |= non1020_channel;
-						if ( H.ucd.type() == SChannel::SChannel::TType::other )
-							_status |= nonkemp_signaltype;
-					}
-				}
-			}
-			for ( auto &H : channels )
-				H.transducer_type =
-					trim( string (_get_next_field( H.header.transducer_type, 80), 80));
-
-			for ( auto &H : channels )
-				H.physical_dim =
-					trim( string (_get_next_field( H.header.physical_dim, 8), 8));
-
-			for ( auto &H : channels ) {
-				_get_next_field( H.header.physical_min, 8);
-				if ( H.ucd.type() == sigfile::SChannel::TType::embedded_annotation )
-					continue;
-				if ( sscanf( H.header.physical_min, "%8lg",
-					     &H.physical_min) != 1 ) {
-					_status |= bad_numfld;
-					if ( not (flags() & no_field_consistency_check) )
-						return -2;
-				}
-			}
-			for ( auto &H : channels ) {
-				_get_next_field( H.header.physical_max, 8);
-				if ( H.ucd.type() == sigfile::SChannel::TType::embedded_annotation )
-					continue;
-				if ( sscanf( H.header.physical_max, "%8lg",
-					     &H.physical_max) != 1 ) {
-					_status |= bad_numfld;
-					if ( not (flags() & no_field_consistency_check) )
-						return -2;
-				}
-			}
-
-			for ( auto &H : channels ) {
-				_get_next_field( H.header.digital_min, 8);
-				if ( H.ucd.type() == sigfile::SChannel::TType::embedded_annotation )
-					continue;
-				if ( sscanf( H.header.digital_min, "%8d",
-					     &H.digital_min) != 1 ) {
-					_status |= bad_numfld;
-					if ( not (flags() & no_field_consistency_check) )
-						return -2;
-				}
-			}
-			for ( auto &H : channels ) {
-				_get_next_field( H.header.digital_max, 8);
-				if ( H.ucd.type() == sigfile::SChannel::TType::embedded_annotation )
-					continue;
-				if ( sscanf( H.header.digital_max, "%8d",
-					     &H.digital_max) != 1 ) {
-					_status |= bad_numfld;
-					if ( not (flags() & no_field_consistency_check) )
-						return -2;
-				}
-			}
-
-			for ( auto &H : channels )
-				H.filtering_info.assign(
-					trim( string (_get_next_field( H.header.filtering_info, 80), 80)));
-
-			for ( auto &H : channels ) {
-				char *tail;
-				string t {trim( string (_get_next_field( H.header.samples_per_record, 8), 8))};
-				H.samples_per_record =
-					strtoul( t.c_str(), &tail, 10);
-				if ( tail == NULL || *tail != '\0' ) {
-					_status |= bad_numfld;
-					if ( not (flags() & no_field_consistency_check) )
-						return -2;
-				}
-			}
-
-			for ( auto &H : channels )
-				H.reserved.assign(
-					trim( string (_get_next_field( H.header.reserved, 32), 32)));
+			_episode.assign( int_episode);
+			_session.assign( int_session);
 		}
-	} catch (TStatus ex) {
-		return -1;
-	} catch (invalid_argument ex) {
-		_status |= bad_numfld;
-		if ( not (flags() & no_field_consistency_check) )
-			return -3;
 	}
 
-      // calculate gain
-	for ( auto &H : channels )
-		if ( H.ucd.type() != sigfile::SChannel::TType::embedded_annotation ) {
-			if ( H.physical_max <= H.physical_min ||
-			     H.digital_max  <= H.digital_min  )
-				_status |= nogain;
-			H.scale =
-				(H.physical_max - H.physical_min) /
-				(H.digital_max  - H.digital_min );
-		}
 
-
-      // convenience field
-	_total_samples_per_record = 0;
-	for ( auto &H : channels ) {
-		H._at = _total_samples_per_record;
-		_total_samples_per_record += H.samples_per_record;
-	}
-
-      // are channels unique?
+	// are channels unique?
 	for ( auto &H : channels )
 		for ( auto &J : channels ) {
 			if ( &J != &H && J.ucd == H.ucd ) {
@@ -488,77 +218,11 @@ outer_break:
 }
 
 
-
-
-
-
-
-
 int
 CTSVFile::
-_extract_embedded_annotations()
+_read_data()
 {
-	auto S = find( channels.begin(), channels.end(), sigfile::edf_annotations_label);
-	if ( S == channels.end() )
-		return 0;
-	auto& AH = *S;
-
-	size_t alen = AH.samples_per_record * 2;
-
-	for ( size_t r = 0; r < n_data_records; ++r ) {
-		char   *this_a =
-			(char*)_mmapping + header_length
-			+ r * _total_samples_per_record * 2	// full records before
-			+ AH._at * 2;				// offset to our samples
-
-		if ( (this_a[0] == '+'   || this_a[0] == '-') &&
-		     (isdigit(this_a[1]) || this_a[1] == '.') ) {
-
-			string	abuf (this_a, alen); // NULL-terminated, possibly at pos <alen
-
-			float	offset,
-				duration;
-			const char
-				*offset_p = abuf.c_str(),
-				*duration_p,
-				*tals_p;
-
-			while ( (tals_p = strchr( offset_p, 20)) ) {
-				// determine if we have duration
-				try {
-					if ( (duration = 0.,
-					      (duration_p = strchr( offset_p, 21))) &&
-					     duration_p < tals_p ) {
-						offset = stof( string (offset_p, duration_p - offset_p));
-						if ( *duration_p != 20 )
-							duration = stof( string (duration_p, tals_p - duration_p));
-					} else {
-						offset = stof( string (offset_p, tals_p - offset_p));
-					}
-				} catch (...) {
-					break;
-				}
-
-				if ( offset_p == this_a && *tals_p == 20 ) // no TALs, it's an explicit record timestamp, not an annotation
-					_record_offsets.push_back( offset);
-
-				else {
-					auto tals = tokens_trimmed( tals_p, (char)20);
-					for ( auto& t : tals )
-						if ( not t.empty() ) {
-							common_annotations.emplace_back(
-								offset,
-								offset + duration,
-								t,
-								SAnnotation::TType::plain);
-						}
-				}
-
-				offset_p = tals_p + strlen(tals_p) + 1;
-			}
-		}
-	}
-
+	
 	return 0;
 }
 
@@ -568,6 +232,7 @@ _extract_embedded_annotations()
 
 
 
+
 string
 CTSVFile::
 details( const int which) const
@@ -600,8 +265,8 @@ details( const int which) const
 			  filename(),
 			  subtype_s(),
 			  patient_id(),
-			  trim( string (header.recording_id, 80)).c_str(),
-			  trim( string (header.recording_date, 8)).c_str(),
+			  recording_id.c_str(),
+			  recording_date, 8)).c_str(),
 			  trim( string (header.recording_time, 8)).c_str(),
 			  channels.size(),
 			  n_data_records,
diff --git a/src/libsigfile/tsv.hh b/src/libsigfile/tsv.hh
index c046975..3812e3d 100644
--- a/src/libsigfile/tsv.hh
+++ b/src/libsigfile/tsv.hh
@@ -90,15 +90,15 @@ class CTSVFile
 
 	// identification
 	const char* patient_id() const
-		{ return header.patient_id.c_str(); }
+		{ return _patient_id.c_str(); }
 	const char* recording_id() const
-		{ return header.recording_id.c_str(); }
+		{ return _recording_id.c_str(); }
 	const char* comment() const
-		{ return header.comment.c_str(); }
+		{ return _comment.c_str(); }
 	const char* episode() const
-		{ return header._episode.c_str(); }
+		{ return _episode.c_str(); }
 	const char* session() const
-		{ return header._session.c_str(); }
+		{ return _session.c_str(); }
 
 	// times
 	time_t start_time() const
@@ -111,27 +111,27 @@ class CTSVFile
 	// setters
 	int set_patient_id( const string& s)
 		{
-			header.patient_id = s;
+			_patient_id = s;
 			return 0;
 		}
 	int set_recording_id( const string& s)
 		{
-			header.recording_id = s;
+			_recording_id = s;
 			return 0;
 		}
 	int set_episode( const string& s) // assigning to _episode or _session directly won't have a lasting effect; think again.
 		{
-			header._episode = s;
+			_episode = s;
 			return 0;
 		}
 	int set_session( const string& s)
 		{
-			header._session = s;
+			_session = s;
 			return 0;
 		}
 	int set_comment( const string& s)
 		{
-			header.comment = s;
+			_comment = s;
 			return 0;
 		}
 
@@ -267,19 +267,6 @@ class CTSVFile
 
 	sigproc::TWinType af_dampen_window_type; // master copy
 
-      // header
-	struct SHeader {
-		string	patient_id,
-			recording_id,
-			recording_date,
-			recording_time,
-			comment;
-
-		string	_episode,
-			_session;
-	};
-	SHeader header;
-
 	map<string,string>
 		metadata;
 
@@ -373,6 +360,16 @@ class CTSVFile
 	static string explain_status( int);
 
     private:
+      // header... why is it private?
+	string	_patient_id,
+		_recording_id,
+		_recording_date,
+		_recording_time,
+		_comment;
+
+	string	_episode,
+		_session;
+
 	TSubtype _subtype;
 
 	size_t	_samplerate;
@@ -382,6 +379,7 @@ class CTSVFile
 	int	_fd;
 
 	int _parse_header();
+	int _read_data();
 };
 
 

-- 
Sleep experiment manager