From e3f6527274d1f68c01571f3019f1400c6658920a Mon Sep 17 00:00:00 2001 From: John Poole Date: Mon, 6 Apr 2026 11:36:29 -0700 Subject: [PATCH] Revised Database to include 2 additional fields suggested by ChatGPT --- .../scripts/import_satellite_logs.pl | 415 +++++++++--------- .../sql/satellite_data_schema.sql | 237 +++++----- 2 files changed, 323 insertions(+), 329 deletions(-) diff --git a/exercises/18_GPS_Field_QA/scripts/import_satellite_logs.pl b/exercises/18_GPS_Field_QA/scripts/import_satellite_logs.pl index f832f3b..4481210 100644 --- a/exercises/18_GPS_Field_QA/scripts/import_satellite_logs.pl +++ b/exercises/18_GPS_Field_QA/scripts/import_satellite_logs.pl @@ -13,8 +13,10 @@ # Notes: # * Imports one or more CSV files into tables logs and log_data. # * Preserves all leading hash-prefixed header lines in logs.raw_header_text. -# * Uses the file's own CSV header row when present; otherwise falls back to -# the expected project header defined in this script. +# * Uses the file's own CSV header row when present. +# * When no CSV header row is present, it falls back by column count: +# - legacy schema without sample_seq/ms_since_run_start +# - enhanced schema with sample_seq/ms_since_run_start use strict; use warnings; @@ -26,7 +28,7 @@ use File::Basename qw(basename); use Getopt::Long qw(GetOptions); use Text::CSV_XS; -my $DEFAULT_HEADER = join ',', qw( +my @LEGACY_COLUMNS = qw( record_type timestamp_utc board_id gnss_chip firmware_exercise_name firmware_version boot_timestamp_utc run_id fix_type fix_dimension sats_in_view sat_seen sats_used hdop vdop pdop latitude longitude altitude_m speed_mps course_deg pps_seen @@ -36,6 +38,19 @@ sat_talker sat_constellation sat_prn sat_elevation_deg sat_azimuth_deg sat_snr sat_used_in_solution ); +my @ENHANCED_COLUMNS = qw( +record_type timestamp_utc board_id gnss_chip firmware_exercise_name firmware_version +boot_timestamp_utc run_id sample_seq ms_since_run_start fix_type fix_dimension +sats_in_view sat_seen sats_used hdop vdop pdop latitude longitude altitude_m +speed_mps course_deg pps_seen quality_class gps_count galileo_count glonass_count +beidou_count navic_count qzss_count sbas_count mean_cn0 max_cn0 age_of_fix_ms +ttff_ms longest_no_fix_ms sat_talker sat_constellation sat_prn sat_elevation_deg +sat_azimuth_deg sat_snr sat_used_in_solution +); + +my $LEGACY_HEADER = join ',', @LEGACY_COLUMNS; +my $ENHANCED_HEADER = join ',', @ENHANCED_COLUMNS; + my %opt = ( dbname => 'satellite_data', host => 'localhost', @@ -60,20 +75,20 @@ if ($opt{help} || !@ARGV) { exit 0; } -my $dsn = sprintf 'dbi:Pg:dbname=%s;host=%s;port=%d', - $opt{dbname}, $opt{host}, $opt{port}; +my $dsn = sprintf('dbi:Pg:dbname=%s;host=%s;port=%d', $opt{dbname}, $opt{host}, $opt{port}); +my $dbh = DBI->connect( + $dsn, + $opt{user}, + $opt{password}, + { + RaiseError => 1, + AutoCommit => 1, + PrintError => 0, + pg_enable_utf8 => 1, + } +) or die DBI->errstr; -my %dbh_attr = ( - RaiseError => 1, - AutoCommit => 1, - PrintError => 0, - pg_enable_utf8 => 1, -); - -my $dbh = DBI->connect($dsn, $opt{user}, $opt{password}, \%dbh_attr) - or die "Unable to connect to PostgreSQL\n"; - -$dbh->do(sprintf 'SET search_path TO %s', $dbh->quote_identifier($opt{schema})); +$dbh->do("SET search_path TO $opt{schema}"); for my $file (@ARGV) { import_file($dbh, $file, \%opt); @@ -82,95 +97,89 @@ for my $file (@ARGV) { $dbh->disconnect; exit 0; -sub usage { - return <<'USAGE'; -Usage: - perl import_satellite_logs.pl [options] file1.csv [file2.csv ...] - -Options: - --dbname NAME PostgreSQL database name. Default: satellite_data - --host HOST PostgreSQL host. Default: localhost - --port PORT PostgreSQL port. Default: 5432 - --user USER PostgreSQL user name - --password PASS PostgreSQL password - --schema NAME Target schema. Default: public - --header-line TEXT Override the expected CSV header line when file lacks one - --notes TEXT Import notes stored in logs.import_notes - --help Show this help text -USAGE -} - sub import_file { my ($dbh, $file, $opt) = @_; - open my $fh, '<:encoding(UTF-8)', $file - or die "Unable to open $file: $!\n"; - - my $file_text = do { local $/; <$fh> }; + open my $fh, '<:raw', $file or die "Cannot open $file: $!\n"; + local $/; + my $blob = <$fh>; close $fh; - my $sha256 = sha256_hex($file_text); - my $file_size_bytes = length $file_text; + my $sha256 = sha256_hex($blob // ''); + my $file_size = -s $file; - my @lines = split /\n/, $file_text, -1; - my @comment_lines; - my $header_line; + open my $in, '<:encoding(UTF-8)', $file or die "Cannot open $file: $!\n"; + + my @header_lines; + my $csv_header_line; my @data_lines; - my $saw_header = 0; - while (@lines) { - my $line = shift @lines; - next if !defined $line; + while (my $line = <$in>) { + chomp $line; + $line =~ s/\r\z//; + + next if $line =~ /^\s*$/ && !@data_lines && !defined $csv_header_line && !@header_lines; if ($line =~ /^#/) { - push @comment_lines, $line; + push @header_lines, $line; next; } - if ($line =~ /^\s*$/ && !@data_lines && !$saw_header) { - next; - } - - if (!$saw_header && $line =~ /^record_type,/) { - $header_line = $line; - $saw_header = 1; + if (!defined $csv_header_line && $line =~ /^record_type,/) { + $csv_header_line = $line; next; } push @data_lines, $line; - push @data_lines, @lines; - last; + } + close $in; + + die "No CSV data rows found in $file\n" if !@data_lines; + + if (!defined $csv_header_line) { + if (defined $opt->{header_line}) { + $csv_header_line = $opt->{header_line}; + } + else { + my $count = count_csv_fields($data_lines[0]); + if ($count == scalar(@ENHANCED_COLUMNS)) { + $csv_header_line = $ENHANCED_HEADER; + } + elsif ($count == scalar(@LEGACY_COLUMNS)) { + $csv_header_line = $LEGACY_HEADER; + } + else { + die sprintf( + "Unable to infer header for %s: first data row has %d fields, expected %d (legacy) or %d (enhanced).\n", + $file, + $count, + scalar(@LEGACY_COLUMNS), + scalar(@ENHANCED_COLUMNS), + ); + } + } } - @data_lines = grep { defined $_ && $_ !~ /^\s*$/ } @data_lines; + my @columns = parse_header_columns($csv_header_line); - $header_line ||= $opt->{header_line} || $DEFAULT_HEADER; + my %allowed = map { $_ => 1 } qw( + record_type timestamp_utc board_id gnss_chip firmware_exercise_name firmware_version + boot_timestamp_utc run_id sample_seq ms_since_run_start fix_type fix_dimension + sats_in_view sat_seen sats_used hdop vdop pdop latitude longitude altitude_m + speed_mps course_deg pps_seen quality_class gps_count galileo_count glonass_count + beidou_count navic_count qzss_count sbas_count mean_cn0 max_cn0 age_of_fix_ms + ttff_ms longest_no_fix_ms sat_talker sat_constellation sat_prn sat_elevation_deg + sat_azimuth_deg sat_snr sat_used_in_solution + ); - my $raw_header_text = @comment_lines ? join("\n", @comment_lines) . "\n" : undef; - - my $csv = Text::CSV_XS->new({ - binary => 1, - auto_diag => 1, - allow_loose_quotes => 1, - allow_loose_escapes => 1, - }); - - $csv->parse($header_line); - my @header = $csv->fields; - - my %idx; - for my $i (0 .. $#header) { - $idx{$header[$i]} = $i; + for my $col (@columns) { + die "Unexpected column '$col' in $file\n" if !$allowed{$col}; } - my @required = qw(record_type timestamp_utc board_id gnss_chip run_id); - for my $name (@required) { - die "Header is missing required column: $name\n" if !exists $idx{$name}; - } + my $raw_header_text = join("\n", @header_lines); + $raw_header_text .= "\n" if length $raw_header_text; - $dbh->begin_work; - - my $log_insert_sql = <<'SQL'; + my $insert_log_sql = <<'SQL'; INSERT INTO logs ( source_filename, source_path, @@ -183,115 +192,89 @@ INSERT INTO logs ( RETURNING log_id SQL - my $log_sth = $dbh->prepare($log_insert_sql); - $log_sth->execute( + my ($log_id) = $dbh->selectrow_array( + $insert_log_sql, + undef, basename($file), $file, $sha256, - $file_size_bytes, + $file_size, $raw_header_text, - $header_line, + $csv_header_line, $opt->{import_notes}, ); - my ($log_id) = $log_sth->fetchrow_array; - my $data_insert_sql = <<'SQL'; -INSERT INTO log_data ( - log_id, row_num, record_type, timestamp_utc, board_id, gnss_chip, - firmware_exercise_name, firmware_version, boot_timestamp_utc, run_id, - fix_type, fix_dimension, sats_in_view, sat_seen, sats_used, - hdop, vdop, pdop, latitude, longitude, altitude_m, speed_mps, course_deg, - pps_seen, quality_class, gps_count, galileo_count, glonass_count, - beidou_count, navic_count, qzss_count, sbas_count, mean_cn0, max_cn0, - age_of_fix_ms, ttff_ms, longest_no_fix_ms, sat_talker, sat_constellation, - sat_prn, sat_elevation_deg, sat_azimuth_deg, sat_snr, sat_used_in_solution -) VALUES ( - ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, - ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? -) -SQL + my @insert_columns = ( + 'log_id', 'source_row_number', @columns, 'raw_csv_line' + ); - my $data_sth = $dbh->prepare($data_insert_sql); + my $placeholders = join ', ', ('?') x @insert_columns; + my $insert_sql = sprintf( + 'INSERT INTO log_data (%s) VALUES (%s)', + join(', ', @insert_columns), + $placeholders, + ); + + my $csv = Text::CSV_XS->new({ + binary => 1, + auto_diag => 1, + allow_loose_quotes => 1, + allow_loose_escapes => 1, + blank_is_undef => 0, + }); + + my $sth = $dbh->prepare($insert_sql); my ($row_count, $sample_count, $satellite_count) = (0, 0, 0); - my ($first_ts, $last_ts); - my ($board_id, $gnss_chip, $firmware_exercise_name, $firmware_version, $boot_ts, $run_id); + my ($first_ts, $last_ts, $board_id, $gnss_chip, $fw_name, $fw_ver, $boot_ts, $run_id); - ROW: - for my $line (@data_lines) { - next ROW if $line =~ /^\s*$/; + $dbh->begin_work; - $csv->parse($line); - my @f = $csv->fields; + for my $i (0 .. $#data_lines) { + my $line = $data_lines[$i]; + next if $line =~ /^\s*$/; - my %row; - for my $name (@header) { - my $value = $f[$idx{$name}]; - $row{$name} = normalize_value($value); + $csv->parse($line) or die "CSV parse failed in $file line @{[$i+1]}: " . $csv->error_diag . "\n"; + my @fields = $csv->fields; + + if (@fields != @columns) { + die sprintf( + "Column mismatch in %s data row %d: got %d fields, expected %d\nLine: %s\n", + $file, $i + 1, scalar(@fields), scalar(@columns), $line + ); } - ++$row_count; - ++$sample_count if defined $row{record_type} && $row{record_type} eq 'sample'; - ++$satellite_count if defined $row{record_type} && $row{record_type} eq 'satellite'; + my %row; + @row{@columns} = @fields; + + normalize_row(\%row); + + my $record_type = $row{record_type} // ''; + $sample_count++ if $record_type eq 'sample'; + $satellite_count++ if $record_type eq 'satellite'; + $row_count++; $first_ts //= $row{timestamp_utc}; - $last_ts = $row{timestamp_utc} if defined $row{timestamp_utc}; + $last_ts = $row{timestamp_utc} if defined $row{timestamp_utc}; + $board_id //= $row{board_id}; + $gnss_chip //= $row{gnss_chip}; + $fw_name //= $row{firmware_exercise_name}; + $fw_ver //= $row{firmware_version}; + $boot_ts //= $row{boot_timestamp_utc}; + $run_id //= $row{run_id}; - $board_id //= $row{board_id}; - $gnss_chip //= $row{gnss_chip}; - $firmware_exercise_name //= $row{firmware_exercise_name}; - $firmware_version //= $row{firmware_version}; - $boot_ts //= $row{boot_timestamp_utc}; - $run_id //= $row{run_id}; - - $data_sth->execute( + my @values = ( $log_id, - $row_count, - $row{record_type}, - $row{timestamp_utc}, - $row{board_id}, - $row{gnss_chip}, - $row{firmware_exercise_name}, - $row{firmware_version}, - $row{boot_timestamp_utc}, - $row{run_id}, - $row{fix_type}, - to_int($row{fix_dimension}), - to_int($row{sats_in_view}), - to_int($row{sat_seen}), - to_int($row{sats_used}), - to_num($row{hdop}), - to_num($row{vdop}), - to_num($row{pdop}), - to_num($row{latitude}), - to_num($row{longitude}), - to_num($row{altitude_m}), - to_num($row{speed_mps}), - to_num($row{course_deg}), - to_bool($row{pps_seen}), - $row{quality_class}, - to_int($row{gps_count}), - to_int($row{galileo_count}), - to_int($row{glonass_count}), - to_int($row{beidou_count}), - to_int($row{navic_count}), - to_int($row{qzss_count}), - to_int($row{sbas_count}), - to_num($row{mean_cn0}), - to_num($row{max_cn0}), - to_int($row{age_of_fix_ms}), - to_int($row{ttff_ms}), - to_int($row{longest_no_fix_ms}), - $row{sat_talker}, - $row{sat_constellation}, - to_int($row{sat_prn}), - to_int($row{sat_elevation_deg}), - to_int($row{sat_azimuth_deg}), - to_num($row{sat_snr}), - to_bool($row{sat_used_in_solution}), + $i + 1, + (map { $row{$_} } @columns), + $line, ); + + $sth->execute(@values); } + $dbh->commit; + my $update_sql = <<'SQL'; UPDATE logs SET board_id = ?, @@ -308,12 +291,13 @@ UPDATE logs WHERE log_id = ? SQL - my $update_sth = $dbh->prepare($update_sql); - $update_sth->execute( + $dbh->do( + $update_sql, + undef, $board_id, $gnss_chip, - $firmware_exercise_name, - $firmware_version, + $fw_name, + $fw_ver, $boot_ts, $run_id, $first_ts, @@ -324,39 +308,72 @@ SQL $log_id, ); - $dbh->commit; - - print STDERR sprintf( - "Imported %s => log_id=%d rows=%d samples=%d satellites=%d\n", - $file, $log_id, $row_count, $sample_count, $satellite_count, - ); + print "Imported $file => log_id=$log_id rows=$row_count samples=$sample_count satellites=$satellite_count\n"; } -sub normalize_value { - my ($value) = @_; - return undef if !defined $value; - $value =~ s/^\s+//; - $value =~ s/\s+$//; - return undef if $value eq ''; - return $value; +sub parse_header_columns { + my ($line) = @_; + my $csv = Text::CSV_XS->new({ binary => 1, auto_diag => 1 }); + $csv->parse($line) or die "Cannot parse header line: " . $csv->error_diag . "\n"; + my @cols = $csv->fields; + s/^\s+|\s+$//g for @cols; + return @cols; } -sub to_int { - my ($value) = @_; - return undef if !defined $value; - return int($value); +sub count_csv_fields { + my ($line) = @_; + my $csv = Text::CSV_XS->new({ binary => 1, auto_diag => 1 }); + $csv->parse($line) or die "Cannot parse first data row while inferring header: " . $csv->error_diag . "\n"; + my @fields = $csv->fields; + return scalar @fields; } -sub to_num { - my ($value) = @_; - return undef if !defined $value; - return $value + 0; +sub normalize_row { + my ($row) = @_; + + for my $key (keys %{$row}) { + next if !defined $row->{$key}; + $row->{$key} =~ s/^\s+//; + $row->{$key} =~ s/\s+$//; + $row->{$key} = undef if $row->{$key} eq ''; + } + + for my $bool_key (qw(pps_seen sat_used_in_solution)) { + next if !exists $row->{$bool_key}; + next if !defined $row->{$bool_key}; + if ($row->{$bool_key} =~ /^(?:1|true|t|yes|y)$/i) { + $row->{$bool_key} = 'true'; + } + elsif ($row->{$bool_key} =~ /^(?:0|false|f|no|n)$/i) { + $row->{$bool_key} = 'false'; + } + } } -sub to_bool { - my ($value) = @_; - return undef if !defined $value; - return 1 if $value =~ /^(?:1|true|t|yes|y)$/i; - return 0 if $value =~ /^(?:0|false|f|no|n)$/i; - return undef; +sub usage { + return <<'USAGE'; +Usage: + perl import_satellite_logs.pl [options] file1.csv [file2.csv ...] + +Options: + --dbname NAME Database name. Default: satellite_data + --host HOST PostgreSQL host. Default: localhost + --port PORT PostgreSQL port. Default: 5432 + --user USER PostgreSQL user name + --password PASS PostgreSQL password + --schema NAME Schema name. Default: public + --header-line LINE Override the CSV header line when the file lacks one + --notes TEXT Optional import note stored in logs.import_notes + --help Show this help + +Examples: + createdb satellite_data + psql -d satellite_data -f satellite_data_schema.sql + + perl import_satellite_logs.pl \ + --dbname satellite_data \ + --host localhost \ + --user jlpoole \ + /path/to/20260406_175441_GUY.csv +USAGE } diff --git a/exercises/18_GPS_Field_QA/sql/satellite_data_schema.sql b/exercises/18_GPS_Field_QA/sql/satellite_data_schema.sql index 69b888d..789a955 100644 --- a/exercises/18_GPS_Field_QA/sql/satellite_data_schema.sql +++ b/exercises/18_GPS_Field_QA/sql/satellite_data_schema.sql @@ -9,12 +9,15 @@ -- Schema for importing GNSS field QA CSV logs generated by T-Beam units. -- A log file is recorded in table logs, and each CSV row is stored in -- table log_data with a foreign-key reference back to logs. +-- +-- Notes: +-- This revision adds support for the enhanced logger fields: +-- * sample_seq +-- * ms_since_run_start +-- The importer can still load older files that do not contain those fields. BEGIN; --- Optional convenience note: --- CREATE DATABASE satellite_data; - CREATE TABLE IF NOT EXISTS logs ( log_id bigserial PRIMARY KEY, source_filename text NOT NULL, @@ -39,71 +42,57 @@ CREATE TABLE IF NOT EXISTS logs ( sample_count integer NOT NULL DEFAULT 0, satellite_count integer NOT NULL DEFAULT 0, - CONSTRAINT logs_source_filename_ck CHECK (btrim(source_filename) <> ''), - CONSTRAINT logs_csv_header_line_ck CHECK (btrim(csv_header_line) <> ''), - CONSTRAINT logs_row_count_ck CHECK (row_count >= 0), - CONSTRAINT logs_sample_count_ck CHECK (sample_count >= 0), - CONSTRAINT logs_satellite_count_ck CHECK (satellite_count >= 0) + CONSTRAINT logs_source_filename_key UNIQUE (source_filename, file_sha256) ); COMMENT ON TABLE logs IS -'One row per imported CSV file. Stores file-level provenance, importer notes, raw hash-prefixed header text, the effective CSV column header line, and summary counts for the import.'; +'One row per imported CSV log file. Stores file provenance, hash, raw hash-prefixed header text, CSV header line, and summary counts.'; COMMENT ON COLUMN logs.log_id IS -'Surrogate primary key for one imported log file.'; +'Primary key for this imported file.'; COMMENT ON COLUMN logs.source_filename IS -'Base filename of the imported CSV file, such as 20260406_175441_GUY.csv.'; +'Filename of the imported CSV file, e.g. 20260406_175441_GUY.csv.'; COMMENT ON COLUMN logs.source_path IS -'Full or relative filesystem path used at import time.'; +'Path used at import time. Useful for provenance when files are staged from different directories.'; COMMENT ON COLUMN logs.file_sha256 IS -'SHA-256 digest of the file contents for provenance and duplicate detection.'; +'SHA-256 digest of the source file. Helps prevent duplicate imports and supports provenance audits.'; COMMENT ON COLUMN logs.file_size_bytes IS -'File size in bytes at import time.'; +'Size of the source file in bytes.'; COMMENT ON COLUMN logs.raw_header_text IS -'All leading lines in the source file whose first character is #. This is the free-form metadata header preserved exactly as found.'; +'All leading hash-prefixed lines from the file, preserved verbatim as a text block.'; COMMENT ON COLUMN logs.csv_header_line IS -'The effective CSV column header line used for import. This may come from the file itself or from the importer''s expected header when the file has no explicit header row.'; +'Effective CSV column header line used by the importer, either from the file or from importer fallback logic.'; COMMENT ON COLUMN logs.imported_at IS -'UTC timestamp when the file was imported into PostgreSQL.'; +'Timestamp when the file was imported into PostgreSQL.'; COMMENT ON COLUMN logs.import_notes IS -'Optional notes about the import, schema assumptions, or anomalies observed during ingestion.'; +'Optional free-form notes supplied at import time.'; COMMENT ON COLUMN logs.board_id IS -'Identifier of the T-Beam or other logger board, for example GUY, AMY, or CY, copied from the data rows when available.'; +'Board identifier observed in the file, e.g. GUY, AMY, or CY.'; COMMENT ON COLUMN logs.gnss_chip IS -'GNSS receiver model reported by the firmware, such as MAX-M10S or L76K.'; +'GNSS receiver chip or module name, e.g. MAX-M10S or L76K.'; COMMENT ON COLUMN logs.firmware_exercise_name IS -'Firmware exercise or program name that generated the log, useful for tracking logger behavior across exercises.'; +'Firmware exercise or logger program name that produced the file.'; COMMENT ON COLUMN logs.firmware_version IS -'Firmware version string or build identifier reported by the logger.'; +'Firmware build or version string written by the device.'; COMMENT ON COLUMN logs.boot_timestamp_utc IS -'UTC timestamp that the device believed it booted. Useful for relating run timing back to power-up timing.'; +'UTC timestamp representing when the device booted, as reported by the firmware.'; COMMENT ON COLUMN logs.run_id IS -'Run/session identifier generated by the firmware for this data collection session.'; +'Run identifier shared by all rows from one logger session.'; COMMENT ON COLUMN logs.first_timestamp_utc IS -'Earliest timestamp_utc found in the imported data rows.'; +'First UTC sample timestamp found in this file.'; COMMENT ON COLUMN logs.last_timestamp_utc IS -'Latest timestamp_utc found in the imported data rows.'; +'Last UTC sample timestamp found in this file.'; COMMENT ON COLUMN logs.row_count IS -'Total number of imported data rows for this file, including both sample and satellite rows.'; +'Total number of imported CSV data rows in the file.'; COMMENT ON COLUMN logs.sample_count IS -'Count of rows whose record_type is sample.'; +'Number of imported rows where record_type = sample.'; COMMENT ON COLUMN logs.satellite_count IS -'Count of rows whose record_type is satellite.'; - -CREATE UNIQUE INDEX IF NOT EXISTS logs_source_sha256_uq - ON logs (file_sha256) - WHERE file_sha256 IS NOT NULL; - -CREATE INDEX IF NOT EXISTS logs_run_id_idx - ON logs (run_id); - -CREATE INDEX IF NOT EXISTS logs_board_id_idx - ON logs (board_id); +'Number of imported rows where record_type = satellite.'; CREATE TABLE IF NOT EXISTS log_data ( log_data_id bigserial PRIMARY KEY, log_id bigint NOT NULL REFERENCES logs(log_id) ON DELETE CASCADE, - row_num integer NOT NULL, + source_row_number integer NOT NULL, record_type text NOT NULL, timestamp_utc timestamptz, @@ -113,20 +102,22 @@ CREATE TABLE IF NOT EXISTS log_data ( firmware_version text, boot_timestamp_utc timestamptz, run_id text, + sample_seq bigint, + ms_since_run_start bigint, fix_type text, - fix_dimension smallint, + fix_dimension integer, sats_in_view integer, sat_seen integer, sats_used integer, hdop numeric(8,3), vdop numeric(8,3), pdop numeric(8,3), - latitude numeric(11,8), - longitude numeric(11,8), - altitude_m numeric(10,2), - speed_mps numeric(10,3), - course_deg numeric(7,3), + latitude double precision, + longitude double precision, + altitude_m numeric(12,3), + speed_mps numeric(12,3), + course_deg numeric(12,3), pps_seen boolean, quality_class text, @@ -140,154 +131,140 @@ CREATE TABLE IF NOT EXISTS log_data ( mean_cn0 numeric(8,3), max_cn0 numeric(8,3), - age_of_fix_ms integer, - ttff_ms integer, - longest_no_fix_ms integer, + age_of_fix_ms bigint, + ttff_ms bigint, + longest_no_fix_ms bigint, sat_talker text, sat_constellation text, sat_prn integer, - sat_elevation_deg integer, - sat_azimuth_deg integer, + sat_elevation_deg numeric(8,3), + sat_azimuth_deg numeric(8,3), sat_snr numeric(8,3), sat_used_in_solution boolean, - CONSTRAINT log_data_log_id_row_num_uq UNIQUE (log_id, row_num), - CONSTRAINT log_data_row_num_ck CHECK (row_num >= 1), - CONSTRAINT log_data_record_type_ck CHECK (record_type IN ('sample', 'satellite')), - CONSTRAINT log_data_fix_dimension_ck CHECK (fix_dimension IS NULL OR fix_dimension BETWEEN 0 AND 9), - CONSTRAINT log_data_latitude_ck CHECK (latitude IS NULL OR latitude BETWEEN -90 AND 90), - CONSTRAINT log_data_longitude_ck CHECK (longitude IS NULL OR longitude BETWEEN -180 AND 180), - CONSTRAINT log_data_course_deg_ck CHECK (course_deg IS NULL OR course_deg >= 0 AND course_deg < 360), - CONSTRAINT log_data_sat_elevation_deg_ck CHECK (sat_elevation_deg IS NULL OR sat_elevation_deg BETWEEN 0 AND 90), - CONSTRAINT log_data_sat_azimuth_deg_ck CHECK (sat_azimuth_deg IS NULL OR sat_azimuth_deg BETWEEN 0 AND 359), - CONSTRAINT log_data_sat_counts_nonnegative_ck CHECK ( - coalesce(sats_in_view, 0) >= 0 AND - coalesce(sat_seen, 0) >= 0 AND - coalesce(sats_used, 0) >= 0 AND - coalesce(gps_count, 0) >= 0 AND - coalesce(galileo_count, 0) >= 0 AND - coalesce(glonass_count, 0) >= 0 AND - coalesce(beidou_count, 0) >= 0 AND - coalesce(navic_count, 0) >= 0 AND - coalesce(qzss_count, 0) >= 0 AND - coalesce(sbas_count, 0) >= 0 - ), - CONSTRAINT log_data_ms_nonnegative_ck CHECK ( - coalesce(age_of_fix_ms, 0) >= 0 AND - coalesce(ttff_ms, 0) >= 0 AND - coalesce(longest_no_fix_ms, 0) >= 0 - ) + raw_csv_line text, + + CONSTRAINT log_data_record_type_chk CHECK (record_type IN ('sample', 'satellite')) ); COMMENT ON TABLE log_data IS -'One row per CSV data record. Stores both sample rows and per-satellite rows, preserving the file''s mixed row model in one typed table.'; +'One row per CSV data row from a GNSS logger file. Stores both sample rows and per-satellite rows.'; COMMENT ON COLUMN log_data.log_data_id IS -'Surrogate primary key for one imported data row.'; +'Primary key for one imported CSV data row.'; COMMENT ON COLUMN log_data.log_id IS -'Foreign key back to logs.log_id identifying which source file this row came from.'; -COMMENT ON COLUMN log_data.row_num IS -'1-based row number within the imported data file, excluding comment lines and any header line.'; +'Foreign key to logs.log_id, linking this row back to the source file.'; +COMMENT ON COLUMN log_data.source_row_number IS +'1-based row number within the CSV data section, excluding preserved hash-prefixed header lines.'; COMMENT ON COLUMN log_data.record_type IS -'Kind of row: sample for one epoch summary row, or satellite for one satellite observed at that epoch.'; +'Logical row type. sample = one epoch summary row. satellite = one satellite snapshot tied to a sample epoch.'; COMMENT ON COLUMN log_data.timestamp_utc IS -'UTC timestamp attached to the row by the GNSS logger.'; +'UTC time for the sample epoch, as reported by the GNSS receiver.'; COMMENT ON COLUMN log_data.board_id IS -'Identifier of the data-collecting board, such as GUY, AMY, or CY.'; +'Board identifier such as GUY, AMY, or CY.'; COMMENT ON COLUMN log_data.gnss_chip IS -'GNSS receiver model, such as MAX-M10S or L76K.'; +'GNSS module name, for example MAX-M10S or L76K.'; COMMENT ON COLUMN log_data.firmware_exercise_name IS -'Firmware exercise/program name that emitted the row.'; +'Name of the firmware exercise or logger mode that generated the row.'; COMMENT ON COLUMN log_data.firmware_version IS -'Firmware version/build identifier used when the row was logged.'; +'Firmware build or version string embedded in the row.'; COMMENT ON COLUMN log_data.boot_timestamp_utc IS -'Device boot timestamp in UTC as reported by the logger.'; +'UTC timestamp representing when the device booted, according to firmware.'; COMMENT ON COLUMN log_data.run_id IS -'Run/session identifier assigned by the firmware.'; +'Run identifier shared across one logger session.'; +COMMENT ON COLUMN log_data.sample_seq IS +'Sequential sample number within a run. Starts at 1 when a new log begins. Satellite rows inherit the parent sample sequence value.'; +COMMENT ON COLUMN log_data.ms_since_run_start IS +'Monotonic milliseconds elapsed since the log file was opened. Useful for jitter, gap, and SD-write-impact analysis.'; COMMENT ON COLUMN log_data.fix_type IS -'GNSS fix classification such as NO_FIX, 2D, 3D, DGPS, or similar receiver-reported state.'; +'Fix quality label such as NO_FIX, 2D, 3D, or DGPS depending on what the firmware emits.'; COMMENT ON COLUMN log_data.fix_dimension IS -'Numeric dimensionality of the solution. Typical values are 1 for no fix, 2 for horizontal-only, and 3 for full 3D.'; +'Numeric dimension of the position fix, typically 0, 2, or 3.'; COMMENT ON COLUMN log_data.sats_in_view IS -'Receiver-reported total satellites in view at the epoch, if provided by the firmware.'; +'Count of satellites reportedly in view at this epoch according to the receiver summary.'; COMMENT ON COLUMN log_data.sat_seen IS -'Count of satellites actually emitted as satellite rows or otherwise counted by the logger at the epoch. May differ from sats_in_view depending on receiver API behavior.'; +'Count of satellites actually observed or emitted by the logger for this epoch. This may differ from sats_in_view depending on firmware logic.'; COMMENT ON COLUMN log_data.sats_used IS -'Number of satellites used by the receiver in the navigation solution at the epoch.'; +'Count of satellites used in the navigation solution at this epoch.'; COMMENT ON COLUMN log_data.hdop IS -'Horizontal Dilution of Precision. Lower values indicate better horizontal geometry.'; +'Horizontal Dilution of Precision. Lower values generally indicate better horizontal geometry.'; COMMENT ON COLUMN log_data.vdop IS -'Vertical Dilution of Precision. Lower values indicate better vertical geometry.'; +'Vertical Dilution of Precision. Lower values generally indicate better vertical geometry.'; COMMENT ON COLUMN log_data.pdop IS -'Position Dilution of Precision, a combined geometry indicator for 3D positioning.'; +'Position Dilution of Precision. Combined geometry indicator for the position solution.'; COMMENT ON COLUMN log_data.latitude IS -'Latitude in decimal degrees referenced to the receiver''s current navigation solution.'; +'Latitude in decimal degrees.'; COMMENT ON COLUMN log_data.longitude IS -'Longitude in decimal degrees referenced to the receiver''s current navigation solution.'; +'Longitude in decimal degrees.'; COMMENT ON COLUMN log_data.altitude_m IS -'Altitude in meters as reported by the GNSS receiver.'; +'Altitude in meters, generally above mean sea level according to receiver output.'; COMMENT ON COLUMN log_data.speed_mps IS -'Ground speed in meters per second.'; +'Receiver-reported speed over ground in meters per second.'; COMMENT ON COLUMN log_data.course_deg IS -'Course over ground in degrees clockwise from true north.'; +'Receiver-reported course over ground in degrees.'; COMMENT ON COLUMN log_data.pps_seen IS -'Boolean indicator that a one-pulse-per-second timing event was observed for the epoch.'; +'Boolean indicating whether a PPS pulse was seen by the firmware during this epoch.'; COMMENT ON COLUMN log_data.quality_class IS -'Firmware-defined coarse quality label such as POOR, FAIR, GOOD, or similar.'; +'Human-friendly firmware quality label such as POOR, FAIR, GOOD, or similar.'; COMMENT ON COLUMN log_data.gps_count IS -'Number of GPS satellites counted at the epoch.'; +'Count of GPS satellites observed at this epoch.'; COMMENT ON COLUMN log_data.galileo_count IS -'Number of Galileo satellites counted at the epoch.'; +'Count of Galileo satellites observed at this epoch.'; COMMENT ON COLUMN log_data.glonass_count IS -'Number of GLONASS satellites counted at the epoch.'; +'Count of GLONASS satellites observed at this epoch.'; COMMENT ON COLUMN log_data.beidou_count IS -'Number of BeiDou satellites counted at the epoch.'; +'Count of BeiDou satellites observed at this epoch.'; COMMENT ON COLUMN log_data.navic_count IS -'Number of NavIC/IRNSS satellites counted at the epoch.'; +'Count of NavIC satellites observed at this epoch.'; COMMENT ON COLUMN log_data.qzss_count IS -'Number of QZSS satellites counted at the epoch.'; +'Count of QZSS satellites observed at this epoch.'; COMMENT ON COLUMN log_data.sbas_count IS -'Number of SBAS satellites counted at the epoch.'; +'Count of SBAS satellites observed at this epoch.'; COMMENT ON COLUMN log_data.mean_cn0 IS -'Mean carrier-to-noise-density ratio, typically in dB-Hz, across the satellites considered by the firmware at the epoch.'; +'Mean carrier-to-noise density estimate across observed satellites. Higher values generally indicate stronger signals.'; COMMENT ON COLUMN log_data.max_cn0 IS -'Maximum carrier-to-noise-density ratio, typically in dB-Hz, seen at the epoch.'; +'Maximum carrier-to-noise density estimate among observed satellites for this epoch.'; COMMENT ON COLUMN log_data.age_of_fix_ms IS -'Age in milliseconds of the current fix solution when logged.'; +'Age of the current fix in milliseconds, as reported by the firmware or receiver API.'; COMMENT ON COLUMN log_data.ttff_ms IS -'Time To First Fix in milliseconds for the run or receiver state being reported.'; +'Time to first fix in milliseconds for the run or current acquisition state.'; COMMENT ON COLUMN log_data.longest_no_fix_ms IS -'Longest interval in milliseconds spent without a usable fix during the run so far.'; +'Longest contiguous no-fix interval observed so far in the run, in milliseconds.'; COMMENT ON COLUMN log_data.sat_talker IS -'Talker or source prefix for the satellite row, for example GP, GL, GA, GB, or similar receiver-provided code.'; +'Talker or source prefix associated with the satellite row, such as GP, GA, GL, GB, or GN.'; COMMENT ON COLUMN log_data.sat_constellation IS -'Human-readable constellation name for the satellite row, such as GPS, GALILEO, GLONASS, or BEIDOU.'; +'Constellation name for the satellite row, such as GPS, Galileo, GLONASS, BeiDou, NavIC, QZSS, or SBAS.'; COMMENT ON COLUMN log_data.sat_prn IS 'PRN or SVID number identifying the satellite within its constellation.'; COMMENT ON COLUMN log_data.sat_elevation_deg IS -'Satellite elevation angle in degrees above the horizon.'; +'Satellite elevation angle above the horizon in degrees.'; COMMENT ON COLUMN log_data.sat_azimuth_deg IS -'Satellite azimuth angle in degrees clockwise from north.'; +'Satellite azimuth in degrees clockwise from true north, according to receiver output.'; COMMENT ON COLUMN log_data.sat_snr IS -'Satellite signal-to-noise or similar quality metric as reported by the firmware, commonly in dB-Hz.'; +'Signal-to-noise style quality measure for this satellite row. Depending on firmware, this may be SNR or CN0-like output.'; COMMENT ON COLUMN log_data.sat_used_in_solution IS -'Boolean indicator that this satellite was used in the navigation solution for the epoch.'; +'Boolean indicating whether this specific satellite was used in the navigation solution.'; +COMMENT ON COLUMN log_data.raw_csv_line IS +'Original CSV line preserved verbatim for audit and recovery purposes.'; -CREATE INDEX IF NOT EXISTS log_data_log_id_timestamp_idx - ON log_data (log_id, timestamp_utc); +CREATE INDEX IF NOT EXISTS log_data_log_id_idx + ON log_data(log_id); CREATE INDEX IF NOT EXISTS log_data_run_id_idx - ON log_data (run_id); + ON log_data(run_id); -CREATE INDEX IF NOT EXISTS log_data_board_id_idx - ON log_data (board_id); +CREATE INDEX IF NOT EXISTS log_data_timestamp_idx + ON log_data(timestamp_utc); CREATE INDEX IF NOT EXISTS log_data_record_type_idx - ON log_data (record_type); + ON log_data(record_type); + +CREATE INDEX IF NOT EXISTS log_data_board_run_seq_idx + ON log_data(board_id, run_id, sample_seq, record_type); CREATE INDEX IF NOT EXISTS log_data_satellite_lookup_idx - ON log_data (sat_constellation, sat_prn, timestamp_utc) + ON log_data(sat_constellation, sat_prn, timestamp_utc) WHERE record_type = 'satellite'; COMMIT;