[med-svn] [SCM] aghermann branch, master, updated. 3603e7ef1289f9ec79a3eb415b6d141166e9498a
Andrei Zavada
johnhommer at gmail.com
Sun Jul 14 22:28:25 UTC 2013
The following commit has been merged in the master branch:
commit da05a9c9b36279b5d82a3e0121bbf1199e6b323b
Author: Andrei Zavada <johnhommer at gmail.com>
Date: Mon Jul 15 01:24:16 2013 +0300
CTSVFile now functional
diff --git a/src/libsigfile/tsv.cc b/src/libsigfile/tsv.cc
index 2c11920..c37ba07 100644
--- a/src/libsigfile/tsv.cc
+++ b/src/libsigfile/tsv.cc
@@ -72,7 +72,8 @@ CTSVFile (const string& fname_, const int flags_)
}
// channels now available
- _read_data();
+ if ( _read_data() )
+ throw invalid_argument (explain_status(_status)); // _status set in _parse_header()
if ( not (flags_ & CSource::no_ancillary_files) )
load_ancillary_files();
@@ -88,12 +89,13 @@ CTSVFile (const string& fname_, const TSubtype subtype_, const int flags_,
const double recording_time_)
: CSource (fname_, flags_),
_subtype (subtype_),
- _samplerate (samplerate_)
+ _samplerate (samplerate_),
+ _line0 (nullptr)
{
_f = fopen( fname_.c_str(), "r");
if ( !_f ) {
fprintf( stderr, "CTSVFile::CTSVFile(\"%s\"): Failed to open file for writing\n", fname_.c_str());
- throw invalid_argument (explain_status(_status |= TStatus::sysfail));
+ throw invalid_argument (explain_status(_status |= CSource::TStatus::sysfail));
}
// fill out some essential header fields
@@ -138,6 +140,11 @@ CTSVFile (CTSVFile&& rv)
_f = rv._f;
rv._f = nullptr;
+
+ _line0_mallocked_bytes = rv._line0_mallocked_bytes;
+ _line0 = rv._line0;
+ rv._line0 = nullptr;
+
}
@@ -146,6 +153,8 @@ CTSVFile::
{
if ( not (flags() & sigfile::CSource::no_ancillary_files) )
save_ancillary_files();
+ if ( _line0 )
+ free( (void*)_line0);
}
@@ -156,29 +165,54 @@ int
CTSVFile::
_parse_header()
{
- size_t n_channels;
-
// 1. read metadata
-
regex_t RE;
- assert (0 == regcomp( &RE, "^#\\W*(\\w+)\\W*(:|=)\\", REG_EXTENDED));
- regmatch_t M[1+2];
-
- size_t n = 4096;
- char *line = (char*)malloc( n);
- while ( getline( &line, &n, _f) > 0 ) {
- if ( regexec( &RE, line, 1+2, M, 0) == 0 ) {
- metadata[string (line, M[1].rm_so, M[1].rm_eo)] =
- string (line, M[2].rm_so, M[2].rm_eo);
- printf( "matched metadata [%s] = %s\n", string (line, M[1].rm_so, M[1].rm_eo).c_str(), string (line, M[2].rm_so, M[2].rm_eo).c_str());
- } else
- if ( line[0] != '#' )
- break; // end of header
+ assert (0 == regcomp( &RE, "^#\\W*([a-zA-Z_][a-zA-Z_0-9]*)\\W*(:|=)\\W*(.+)\\W*\n", REG_EXTENDED));
+ regmatch_t M[1+1+2];
+
+ _line0_mallocked_bytes = 4096;
+ _line0 = (char*)malloc( _line0_mallocked_bytes);
+
+ while ( getline( &_line0, &_line0_mallocked_bytes, _f) != -1 ) {
+ if ( _line0[0] == '\n' )
+ continue;
+ if ( regexec( &RE, _line0, 1+1+2, M, 0) == 0 ) {
+ string K = agh::str::trim( string (_line0, M[1].rm_so, (M[1].rm_eo - M[1].rm_so))),
+ V = agh::str::trim( string (_line0, M[3].rm_so, (M[3].rm_eo - M[3].rm_so)));
+ metadata[K] = V;
+ } else if ( _line0[0] != '#' )
+ break; // end of header
}
- free( (void*)line);
// 2. pick essential bits
-
+ if ( metadata.find( "recording_id") == metadata.end() ) {
+ fprintf( stderr, "No session/episode in header\n");
+ _status |= (nosession | noepisode);
+ return -1;
+ }
+ _recording_id = metadata["recording_id"];
+
+ if ( metadata.find( "patient_id") == metadata.end() ) {
+ fprintf( stderr, "No patient_id in header\n");
+ _status |= (nosession | noepisode);
+ return -1;
+ }
+ _patient_id = metadata["patient_id"];
+
+ if ( metadata.find( "samplerate") == metadata.end() ||
+ (_samplerate = stoi( metadata["samplerate"])) > 2048 ) {
+ fprintf( stderr, "Samplerate missing or too high in header\n");
+ _status |= bad_header;
+ return -1;
+ }
+
+ if ( metadata.find( "channels") == metadata.end() ) {
+ fprintf( stderr, "No channels in header\n");
+ _status |= bad_header;
+ return -1;
+ }
+ for ( const auto& h : agh::str::tokens( metadata["channels"], " ,;\t") )
+ channels.emplace_back( h);
// 3. deal with episode and session
int parsed_with_issues;
@@ -187,7 +221,7 @@ _parse_header()
if ( parsed_with_issues )
_status |= (nosession | noepisode);
- // are channels unique?
+ // 4. are channels unique?
for ( auto &H : channels )
for ( auto &J : channels ) {
if ( &J != &H && J.ucd == H.ucd ) {
@@ -197,8 +231,12 @@ _parse_header()
}
outer_break:
- // 4. read one line of channel data, figure subtype and number of channels
-
+ // 4. sample one line of channel data
+ if ( agh::str::tokens( _line0, "\t;, ").size() != channels.size() ) {
+ fprintf( stderr, "Number of channels declared in header (%zu) different from number of columns of data\n", channels.size());
+ _status |= bad_channel_count;
+ return -1;
+ }
return 0;
}
@@ -208,7 +246,37 @@ int
CTSVFile::
_read_data()
{
-
+ vector<vector<double>> c2 (channels.size());
+
+ // _line0 contains the first row of data already (it is the
+ // first line not beginning with a #)
+ size_t r, ll = 0;
+ do {
+ for ( r = 0; r < channels.size(); ++r ) {
+ double x;
+ if ( 1 != fscanf( _f, "%lg", &x) )
+ goto outer_break;
+ c2[r].push_back( x);
+ }
+ ++ll;
+ } while ( getline( &_line0, &_line0_mallocked_bytes, _f) > 0 );
+
+outer_break:
+
+ if ( r != 0 && r != channels.size() ) {
+ fprintf( stderr, "Number of data read (%zu) not a multiple of channel count (%zu)\n", r, channels.size());
+ _status |= bad_channel_count;
+ return -1;
+ }
+
+ printf( "read %zu samples in %zu channels\n", ll/channels.size(), channels.size());
+ // vector -> valarray
+ for ( size_t h = 0; h < channels.size(); ++h ) {
+ channels[h].data.resize( ll);
+ for ( size_t i = 0; i < ll; ++i )
+ channels[h].data[i] = c2[h][i];
+ }
+
return 0;
}
@@ -249,37 +317,33 @@ CTSVFile::
details( const int which) const
{
ostringstream recv;
- if ( _status & bad_header )
- recv << "Bad header, or no file\n";
- else {
- char b[20];
- recv << agh::str::sasprintf(
- "File\t: %s\n"
- " subtype\t: %s\n"
- " PatientID\t: %s\n"
- " RecordingID\t: %s\n"
- " Start time\t: %s\n"
- " Duration\t: %s\n"
- " # of channels\t: %zu\n"
- " Sample rate\t: %zu\n",
- filename(),
- subtype_s(),
- patient_id(),
- recording_id(),
- (strftime( b, 20, "%F %T", localtime(&_start_time)), b),
- agh::str::dhms( recording_time()).c_str(),
- channels.size(),
- _samplerate);
-
- if ( which & with_channels ) {
- size_t i = 0;
- for ( auto &H : channels )
- recv << agh::str::sasprintf(
- " Channel %zu:\n"
- " Label\t: %s\n",
- ++i,
- H.ucd.name());
- }
+ char b[20];
+ recv << agh::str::sasprintf(
+ "File\t: %s\n"
+ " subtype\t: %s\n"
+ " PatientID\t: %s\n"
+ " RecordingID\t: %s\n"
+ " Start time\t: %s\n"
+ " Duration\t: %s\n"
+ " # of channels\t: %zu\n"
+ " Sample rate\t: %zu\n",
+ filename(),
+ subtype_s(),
+ patient_id(),
+ recording_id(),
+ (strftime( b, 20, "%F %T", localtime(&_start_time)), b),
+ agh::str::dhms( recording_time()).c_str(),
+ channels.size(),
+ _samplerate);
+
+ if ( which & with_channels ) {
+ size_t i = 0;
+ for ( auto &H : channels )
+ recv << agh::str::sasprintf(
+ " Channel %zu:\n"
+ " Label\t: %s\n",
+ ++i,
+ H.ucd.name());
}
return recv.str();
@@ -324,6 +388,8 @@ explain_status( const int status)
recv.emplace_back( string("* Number of channels grearter than ") + to_string(max_channels));
if ( status & extra_patientid_subfields )
recv.emplace_back( "* Extra subfields in PatientId");
+ if ( status & bad_channel_count )
+ recv.emplace_back( "* Number of channels declared in header different from number of columns of data");
return join(recv, "\n");
}
diff --git a/src/libsigfile/tsv.hh b/src/libsigfile/tsv.hh
index a80edb6..b996238 100644
--- a/src/libsigfile/tsv.hh
+++ b/src/libsigfile/tsv.hh
@@ -107,7 +107,7 @@ class CTSVFile
time_t end_time() const
{ return _end_time; }
double recording_time() const // in seconds
- { return channels.front().data.size() * _samplerate; } // all channels have the same sr, obviously
+ { return (double)channels.front().data.size() / _samplerate; } // all channels have the same sr, obviously
// setters
int set_patient_id( const string& s)
@@ -260,6 +260,10 @@ class CTSVFile
// channels
struct SSignal {
+ SSignal (const SChannel& ch)
+ : ucd (ch)
+ {}
+
SChannel
ucd; // Universal Channel Designation, епта
@@ -348,6 +352,8 @@ class CTSVFile
_end_time;
FILE *_f;
+ char *_line0;
+ size_t _line0_mallocked_bytes;
int _parse_header();
int _read_data();
--
Sleep experiment manager
More information about the debian-med-commit
mailing list