Revised Database to include 2 additional fields suggested by ChatGPT

2026-04-06 11:36:29 -07:00 · 2026-04-06 11:36:29 -07:00 · e3f6527274
commit e3f6527274
parent 32ad481fcf
2 changed files with 323 additions and 329 deletions
--- a/exercises/18_GPS_Field_QA/scripts/import_satellite_logs.pl
+++ b/exercises/18_GPS_Field_QA/scripts/import_satellite_logs.pl
@ -13,8 +13,10 @@
 # Notes:
 #   * Imports one or more CSV files into tables logs and log_data.
 #   * Preserves all leading hash-prefixed header lines in logs.raw_header_text.
-#   * Uses the file's own CSV header row when present; otherwise falls back to
-#     the expected project header defined in this script.
+#   * Uses the file's own CSV header row when present.
+#   * When no CSV header row is present, it falls back by column count:
+#       - legacy schema without sample_seq/ms_since_run_start
+#       - enhanced schema with sample_seq/ms_since_run_start

 use strict;
 use warnings;
@ -26,7 +28,7 @@ use File::Basename qw(basename);
 use Getopt::Long qw(GetOptions);
 use Text::CSV_XS;

-my $DEFAULT_HEADER = join ',', qw(
+my @LEGACY_COLUMNS = qw(
 record_type timestamp_utc board_id gnss_chip firmware_exercise_name firmware_version
 boot_timestamp_utc run_id fix_type fix_dimension sats_in_view sat_seen sats_used
 hdop vdop pdop latitude longitude altitude_m speed_mps course_deg pps_seen
@ -36,6 +38,19 @@ sat_talker sat_constellation sat_prn sat_elevation_deg sat_azimuth_deg sat_snr
 sat_used_in_solution
 );

+my @ENHANCED_COLUMNS = qw(
+record_type timestamp_utc board_id gnss_chip firmware_exercise_name firmware_version
+boot_timestamp_utc run_id sample_seq ms_since_run_start fix_type fix_dimension
+sats_in_view sat_seen sats_used hdop vdop pdop latitude longitude altitude_m
+speed_mps course_deg pps_seen quality_class gps_count galileo_count glonass_count
+beidou_count navic_count qzss_count sbas_count mean_cn0 max_cn0 age_of_fix_ms
+ttff_ms longest_no_fix_ms sat_talker sat_constellation sat_prn sat_elevation_deg
+sat_azimuth_deg sat_snr sat_used_in_solution
+);
+
+my $LEGACY_HEADER   = join ',', @LEGACY_COLUMNS;
+my $ENHANCED_HEADER = join ',', @ENHANCED_COLUMNS;
+
 my %opt = (
    dbname => 'satellite_data',
    host   => 'localhost',
@ -60,20 +75,20 @@ if ($opt{help} || !@ARGV) {
    exit 0;
 }

-my $dsn = sprintf 'dbi:Pg:dbname=%s;host=%s;port=%d',
-    $opt{dbname}, $opt{host}, $opt{port};
-
-my %dbh_attr = (
+my $dsn = sprintf('dbi:Pg:dbname=%s;host=%s;port=%d', $opt{dbname}, $opt{host}, $opt{port});
+my $dbh = DBI->connect(
+    $dsn,
+    $opt{user},
+    $opt{password},
+    {
        RaiseError => 1,
        AutoCommit => 1,
        PrintError => 0,
        pg_enable_utf8 => 1,
-);
+    }
+) or die DBI->errstr;

-my $dbh = DBI->connect($dsn, $opt{user}, $opt{password}, \%dbh_attr)
-    or die "Unable to connect to PostgreSQL\n";
-
-$dbh->do(sprintf 'SET search_path TO %s', $dbh->quote_identifier($opt{schema}));
+$dbh->do("SET search_path TO $opt{schema}");

 for my $file (@ARGV) {
    import_file($dbh, $file, \%opt);
@ -82,95 +97,89 @@ for my $file (@ARGV) {
 $dbh->disconnect;
 exit 0;

-sub usage {
-    return <<'USAGE';
-Usage:
-  perl import_satellite_logs.pl [options] file1.csv [file2.csv ...]
-
-Options:
-  --dbname NAME         PostgreSQL database name. Default: satellite_data
-  --host HOST           PostgreSQL host. Default: localhost
-  --port PORT           PostgreSQL port. Default: 5432
-  --user USER           PostgreSQL user name
-  --password PASS       PostgreSQL password
-  --schema NAME         Target schema. Default: public
-  --header-line TEXT    Override the expected CSV header line when file lacks one
-  --notes TEXT          Import notes stored in logs.import_notes
-  --help                Show this help text
-USAGE
-}
-
 sub import_file {
    my ($dbh, $file, $opt) = @_;

-    open my $fh, '<:encoding(UTF-8)', $file
-        or die "Unable to open $file: $!\n";
-
-    my $file_text = do { local $/; <$fh> };
+    open my $fh, '<:raw', $file or die "Cannot open $file: $!\n";
+    local $/;
+    my $blob = <$fh>;
    close $fh;

-    my $sha256 = sha256_hex($file_text);
-    my $file_size_bytes = length $file_text;
+    my $sha256    = sha256_hex($blob // '');
+    my $file_size = -s $file;

-    my @lines = split /\n/, $file_text, -1;
-    my @comment_lines;
-    my $header_line;
+    open my $in, '<:encoding(UTF-8)', $file or die "Cannot open $file: $!\n";
+
+    my @header_lines;
+    my $csv_header_line;
    my @data_lines;
-    my $saw_header = 0;

-    while (@lines) {
-        my $line = shift @lines;
-        next if !defined $line;
+    while (my $line = <$in>) {
+        chomp $line;
+        $line =~ s/\r\z//;
+
+        next if $line =~ /^\s*$/ && !@data_lines && !defined $csv_header_line && !@header_lines;

        if ($line =~ /^#/) {
-            push @comment_lines, $line;
+            push @header_lines, $line;
            next;
        }

-        if ($line =~ /^\s*$/ && !@data_lines && !$saw_header) {
-            next;
-        }
-
-        if (!$saw_header && $line =~ /^record_type,/) {
-            $header_line = $line;
-            $saw_header = 1;
+        if (!defined $csv_header_line && $line =~ /^record_type,/) {
+            $csv_header_line = $line;
            next;
        }

        push @data_lines, $line;
-        push @data_lines, @lines;
-        last;
+    }
+    close $in;
+
+    die "No CSV data rows found in $file\n" if !@data_lines;
+
+    if (!defined $csv_header_line) {
+        if (defined $opt->{header_line}) {
+            $csv_header_line = $opt->{header_line};
+        }
+        else {
+            my $count = count_csv_fields($data_lines[0]);
+            if ($count == scalar(@ENHANCED_COLUMNS)) {
+                $csv_header_line = $ENHANCED_HEADER;
+            }
+            elsif ($count == scalar(@LEGACY_COLUMNS)) {
+                $csv_header_line = $LEGACY_HEADER;
+            }
+            else {
+                die sprintf(
+                    "Unable to infer header for %s: first data row has %d fields, expected %d (legacy) or %d (enhanced).\n",
+                    $file,
+                    $count,
+                    scalar(@LEGACY_COLUMNS),
+                    scalar(@ENHANCED_COLUMNS),
+                );
+            }
+        }
    }

-    @data_lines = grep { defined $_ && $_ !~ /^\s*$/ } @data_lines;
+    my @columns = parse_header_columns($csv_header_line);

-    $header_line ||= $opt->{header_line} || $DEFAULT_HEADER;
+    my %allowed = map { $_ => 1 } qw(
+        record_type timestamp_utc board_id gnss_chip firmware_exercise_name firmware_version
+        boot_timestamp_utc run_id sample_seq ms_since_run_start fix_type fix_dimension
+        sats_in_view sat_seen sats_used hdop vdop pdop latitude longitude altitude_m
+        speed_mps course_deg pps_seen quality_class gps_count galileo_count glonass_count
+        beidou_count navic_count qzss_count sbas_count mean_cn0 max_cn0 age_of_fix_ms
+        ttff_ms longest_no_fix_ms sat_talker sat_constellation sat_prn sat_elevation_deg
+        sat_azimuth_deg sat_snr sat_used_in_solution
+    );

-    my $raw_header_text = @comment_lines ? join("\n", @comment_lines) . "\n" : undef;
-
-    my $csv = Text::CSV_XS->new({
-        binary => 1,
-        auto_diag => 1,
-        allow_loose_quotes => 1,
-        allow_loose_escapes => 1,
-    });
-
-    $csv->parse($header_line);
-    my @header = $csv->fields;
-
-    my %idx;
-    for my $i (0 .. $#header) {
-        $idx{$header[$i]} = $i;
+    for my $col (@columns) {
+        die "Unexpected column '$col' in $file\n" if !$allowed{$col};
    }

-    my @required = qw(record_type timestamp_utc board_id gnss_chip run_id);
-    for my $name (@required) {
-        die "Header is missing required column: $name\n" if !exists $idx{$name};
-    }
+    my $raw_header_text = join("\n", @header_lines);
+    $raw_header_text .= "\n" if length $raw_header_text;

-    $dbh->begin_work;
-
-    my $log_insert_sql = <<'SQL';
+    my $insert_log_sql = <<'SQL';
 INSERT INTO logs (
    source_filename,
    source_path,
@ -183,115 +192,89 @@ INSERT INTO logs (
 RETURNING log_id
 SQL

-    my $log_sth = $dbh->prepare($log_insert_sql);
-    $log_sth->execute(
+    my ($log_id) = $dbh->selectrow_array(
+        $insert_log_sql,
+        undef,
        basename($file),
        $file,
        $sha256,
-        $file_size_bytes,
+        $file_size,
        $raw_header_text,
-        $header_line,
+        $csv_header_line,
        $opt->{import_notes},
    );
-    my ($log_id) = $log_sth->fetchrow_array;

-    my $data_insert_sql = <<'SQL';
-INSERT INTO log_data (
-    log_id, row_num, record_type, timestamp_utc, board_id, gnss_chip,
-    firmware_exercise_name, firmware_version, boot_timestamp_utc, run_id,
-    fix_type, fix_dimension, sats_in_view, sat_seen, sats_used,
-    hdop, vdop, pdop, latitude, longitude, altitude_m, speed_mps, course_deg,
-    pps_seen, quality_class, gps_count, galileo_count, glonass_count,
-    beidou_count, navic_count, qzss_count, sbas_count, mean_cn0, max_cn0,
-    age_of_fix_ms, ttff_ms, longest_no_fix_ms, sat_talker, sat_constellation,
-    sat_prn, sat_elevation_deg, sat_azimuth_deg, sat_snr, sat_used_in_solution
-) VALUES (
-    ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
-    ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?
-)
-SQL
+    my @insert_columns = (
+        'log_id', 'source_row_number', @columns, 'raw_csv_line'
+    );

-    my $data_sth = $dbh->prepare($data_insert_sql);
+    my $placeholders = join ', ', ('?') x @insert_columns;
+    my $insert_sql = sprintf(
+        'INSERT INTO log_data (%s) VALUES (%s)',
+        join(', ', @insert_columns),
+        $placeholders,
+    );
+
+    my $csv = Text::CSV_XS->new({
+        binary => 1,
+        auto_diag => 1,
+        allow_loose_quotes => 1,
+        allow_loose_escapes => 1,
+        blank_is_undef => 0,
+    });
+
+    my $sth = $dbh->prepare($insert_sql);

    my ($row_count, $sample_count, $satellite_count) = (0, 0, 0);
-    my ($first_ts, $last_ts);
-    my ($board_id, $gnss_chip, $firmware_exercise_name, $firmware_version, $boot_ts, $run_id);
+    my ($first_ts, $last_ts, $board_id, $gnss_chip, $fw_name, $fw_ver, $boot_ts, $run_id);

-    ROW:
-    for my $line (@data_lines) {
-        next ROW if $line =~ /^\s*$/;
+    $dbh->begin_work;

-        $csv->parse($line);
-        my @f = $csv->fields;
+    for my $i (0 .. $#data_lines) {
+        my $line = $data_lines[$i];
+        next if $line =~ /^\s*$/;

-        my %row;
-        for my $name (@header) {
-            my $value = $f[$idx{$name}];
-            $row{$name} = normalize_value($value);
+        $csv->parse($line) or die "CSV parse failed in $file line @{[$i+1]}: " . $csv->error_diag . "\n";
+        my @fields = $csv->fields;
+
+        if (@fields != @columns) {
+            die sprintf(
+                "Column mismatch in %s data row %d: got %d fields, expected %d\nLine: %s\n",
+                $file, $i + 1, scalar(@fields), scalar(@columns), $line
+            );
        }

-        ++$row_count;
-        ++$sample_count    if defined $row{record_type} && $row{record_type} eq 'sample';
-        ++$satellite_count if defined $row{record_type} && $row{record_type} eq 'satellite';
+        my %row;
+        @row{@columns} = @fields;
+
+        normalize_row(\%row);
+
+        my $record_type = $row{record_type} // '';
+        $sample_count++    if $record_type eq 'sample';
+        $satellite_count++ if $record_type eq 'satellite';
+        $row_count++;

        $first_ts //= $row{timestamp_utc};
        $last_ts    = $row{timestamp_utc} if defined $row{timestamp_utc};
-
        $board_id //= $row{board_id};
        $gnss_chip //= $row{gnss_chip};
-        $firmware_exercise_name //= $row{firmware_exercise_name};
-        $firmware_version       //= $row{firmware_version};
+        $fw_name   //= $row{firmware_exercise_name};
+        $fw_ver    //= $row{firmware_version};
        $boot_ts   //= $row{boot_timestamp_utc};
        $run_id    //= $row{run_id};

-        $data_sth->execute(
+        my @values = (
            $log_id,
-            $row_count,
-            $row{record_type},
-            $row{timestamp_utc},
-            $row{board_id},
-            $row{gnss_chip},
-            $row{firmware_exercise_name},
-            $row{firmware_version},
-            $row{boot_timestamp_utc},
-            $row{run_id},
-            $row{fix_type},
-            to_int($row{fix_dimension}),
-            to_int($row{sats_in_view}),
-            to_int($row{sat_seen}),
-            to_int($row{sats_used}),
-            to_num($row{hdop}),
-            to_num($row{vdop}),
-            to_num($row{pdop}),
-            to_num($row{latitude}),
-            to_num($row{longitude}),
-            to_num($row{altitude_m}),
-            to_num($row{speed_mps}),
-            to_num($row{course_deg}),
-            to_bool($row{pps_seen}),
-            $row{quality_class},
-            to_int($row{gps_count}),
-            to_int($row{galileo_count}),
-            to_int($row{glonass_count}),
-            to_int($row{beidou_count}),
-            to_int($row{navic_count}),
-            to_int($row{qzss_count}),
-            to_int($row{sbas_count}),
-            to_num($row{mean_cn0}),
-            to_num($row{max_cn0}),
-            to_int($row{age_of_fix_ms}),
-            to_int($row{ttff_ms}),
-            to_int($row{longest_no_fix_ms}),
-            $row{sat_talker},
-            $row{sat_constellation},
-            to_int($row{sat_prn}),
-            to_int($row{sat_elevation_deg}),
-            to_int($row{sat_azimuth_deg}),
-            to_num($row{sat_snr}),
-            to_bool($row{sat_used_in_solution}),
+            $i + 1,
+            (map { $row{$_} } @columns),
+            $line,
        );
+
+        $sth->execute(@values);
    }

+    $dbh->commit;
+
    my $update_sql = <<'SQL';
 UPDATE logs
   SET board_id = ?,
@ -308,12 +291,13 @@ UPDATE logs
 WHERE log_id = ?
 SQL

-    my $update_sth = $dbh->prepare($update_sql);
-    $update_sth->execute(
+    $dbh->do(
+        $update_sql,
+        undef,
        $board_id,
        $gnss_chip,
-        $firmware_exercise_name,
-        $firmware_version,
+        $fw_name,
+        $fw_ver,
        $boot_ts,
        $run_id,
        $first_ts,
@ -324,39 +308,72 @@ SQL
        $log_id,
    );

-    $dbh->commit;
-
-    print STDERR sprintf(
-        "Imported %s => log_id=%d rows=%d samples=%d satellites=%d\n",
-        $file, $log_id, $row_count, $sample_count, $satellite_count,
-    );
+    print "Imported $file => log_id=$log_id rows=$row_count samples=$sample_count satellites=$satellite_count\n";
 }

-sub normalize_value {
-    my ($value) = @_;
-    return undef if !defined $value;
-    $value =~ s/^\s+//;
-    $value =~ s/\s+$//;
-    return undef if $value eq '';
-    return $value;
+sub parse_header_columns {
+    my ($line) = @_;
+    my $csv = Text::CSV_XS->new({ binary => 1, auto_diag => 1 });
+    $csv->parse($line) or die "Cannot parse header line: " . $csv->error_diag . "\n";
+    my @cols = $csv->fields;
+    s/^\s+|\s+$//g for @cols;
+    return @cols;
 }

-sub to_int {
-    my ($value) = @_;
-    return undef if !defined $value;
-    return int($value);
+sub count_csv_fields {
+    my ($line) = @_;
+    my $csv = Text::CSV_XS->new({ binary => 1, auto_diag => 1 });
+    $csv->parse($line) or die "Cannot parse first data row while inferring header: " . $csv->error_diag . "\n";
+    my @fields = $csv->fields;
+    return scalar @fields;
 }

-sub to_num {
-    my ($value) = @_;
-    return undef if !defined $value;
-    return $value + 0;
+sub normalize_row {
+    my ($row) = @_;
+
+    for my $key (keys %{$row}) {
+        next if !defined $row->{$key};
+        $row->{$key} =~ s/^\s+//;
+        $row->{$key} =~ s/\s+$//;
+        $row->{$key} = undef if $row->{$key} eq '';
    }

-sub to_bool {
-    my ($value) = @_;
-    return undef if !defined $value;
-    return 1 if $value =~ /^(?:1|true|t|yes|y)$/i;
-    return 0 if $value =~ /^(?:0|false|f|no|n)$/i;
-    return undef;
+    for my $bool_key (qw(pps_seen sat_used_in_solution)) {
+        next if !exists $row->{$bool_key};
+        next if !defined $row->{$bool_key};
+        if ($row->{$bool_key} =~ /^(?:1|true|t|yes|y)$/i) {
+            $row->{$bool_key} = 'true';
+        }
+        elsif ($row->{$bool_key} =~ /^(?:0|false|f|no|n)$/i) {
+            $row->{$bool_key} = 'false';
+        }
+    }
+}
+
+sub usage {
+    return <<'USAGE';
+Usage:
+  perl import_satellite_logs.pl [options] file1.csv [file2.csv ...]
+
+Options:
+  --dbname NAME         Database name. Default: satellite_data
+  --host HOST           PostgreSQL host. Default: localhost
+  --port PORT           PostgreSQL port. Default: 5432
+  --user USER           PostgreSQL user name
+  --password PASS       PostgreSQL password
+  --schema NAME         Schema name. Default: public
+  --header-line LINE    Override the CSV header line when the file lacks one
+  --notes TEXT          Optional import note stored in logs.import_notes
+  --help                Show this help
+
+Examples:
+  createdb satellite_data
+  psql -d satellite_data -f satellite_data_schema.sql
+
+  perl import_satellite_logs.pl \
+    --dbname satellite_data \
+    --host localhost \
+    --user jlpoole \
+    /path/to/20260406_175441_GUY.csv
+USAGE
 }
--- a/exercises/18_GPS_Field_QA/sql/satellite_data_schema.sql
+++ b/exercises/18_GPS_Field_QA/sql/satellite_data_schema.sql
@ -9,12 +9,15 @@
 --   Schema for importing GNSS field QA CSV logs generated by T-Beam units.
 --   A log file is recorded in table logs, and each CSV row is stored in
 --   table log_data with a foreign-key reference back to logs.
+--
+-- Notes:
+--   This revision adds support for the enhanced logger fields:
+--     * sample_seq
+--     * ms_since_run_start
+--   The importer can still load older files that do not contain those fields.

 BEGIN;

-- Optional convenience note:
-- CREATE DATABASE satellite_data;
-
 CREATE TABLE IF NOT EXISTS logs (
    log_id                      bigserial PRIMARY KEY,
    source_filename             text NOT NULL,
@ -39,71 +42,57 @@ CREATE TABLE IF NOT EXISTS logs (
    sample_count                integer NOT NULL DEFAULT 0,
    satellite_count             integer NOT NULL DEFAULT 0,

-    CONSTRAINT logs_source_filename_ck CHECK (btrim(source_filename) <> ''),
-    CONSTRAINT logs_csv_header_line_ck CHECK (btrim(csv_header_line) <> ''),
-    CONSTRAINT logs_row_count_ck CHECK (row_count >= 0),
-    CONSTRAINT logs_sample_count_ck CHECK (sample_count >= 0),
-    CONSTRAINT logs_satellite_count_ck CHECK (satellite_count >= 0)
+    CONSTRAINT logs_source_filename_key UNIQUE (source_filename, file_sha256)
 );

 COMMENT ON TABLE logs IS
-'One row per imported CSV file. Stores file-level provenance, importer notes, raw hash-prefixed header text, the effective CSV column header line, and summary counts for the import.';
+'One row per imported CSV log file. Stores file provenance, hash, raw hash-prefixed header text, CSV header line, and summary counts.';

 COMMENT ON COLUMN logs.log_id IS
-'Surrogate primary key for one imported log file.';
+'Primary key for this imported file.';
 COMMENT ON COLUMN logs.source_filename IS
-'Base filename of the imported CSV file, such as 20260406_175441_GUY.csv.';
+'Filename of the imported CSV file, e.g. 20260406_175441_GUY.csv.';
 COMMENT ON COLUMN logs.source_path IS
-'Full or relative filesystem path used at import time.';
+'Path used at import time. Useful for provenance when files are staged from different directories.';
 COMMENT ON COLUMN logs.file_sha256 IS
-'SHA-256 digest of the file contents for provenance and duplicate detection.';
+'SHA-256 digest of the source file. Helps prevent duplicate imports and supports provenance audits.';
 COMMENT ON COLUMN logs.file_size_bytes IS
-'File size in bytes at import time.';
+'Size of the source file in bytes.';
 COMMENT ON COLUMN logs.raw_header_text IS
-'All leading lines in the source file whose first character is #. This is the free-form metadata header preserved exactly as found.';
+'All leading hash-prefixed lines from the file, preserved verbatim as a text block.';
 COMMENT ON COLUMN logs.csv_header_line IS
-'The effective CSV column header line used for import. This may come from the file itself or from the importer''s expected header when the file has no explicit header row.';
+'Effective CSV column header line used by the importer, either from the file or from importer fallback logic.';
 COMMENT ON COLUMN logs.imported_at IS
-'UTC timestamp when the file was imported into PostgreSQL.';
+'Timestamp when the file was imported into PostgreSQL.';
 COMMENT ON COLUMN logs.import_notes IS
-'Optional notes about the import, schema assumptions, or anomalies observed during ingestion.';
+'Optional free-form notes supplied at import time.';
 COMMENT ON COLUMN logs.board_id IS
-'Identifier of the T-Beam or other logger board, for example GUY, AMY, or CY, copied from the data rows when available.';
+'Board identifier observed in the file, e.g. GUY, AMY, or CY.';
 COMMENT ON COLUMN logs.gnss_chip IS
-'GNSS receiver model reported by the firmware, such as MAX-M10S or L76K.';
+'GNSS receiver chip or module name, e.g. MAX-M10S or L76K.';
 COMMENT ON COLUMN logs.firmware_exercise_name IS
-'Firmware exercise or program name that generated the log, useful for tracking logger behavior across exercises.';
+'Firmware exercise or logger program name that produced the file.';
 COMMENT ON COLUMN logs.firmware_version IS
-'Firmware version string or build identifier reported by the logger.';
+'Firmware build or version string written by the device.';
 COMMENT ON COLUMN logs.boot_timestamp_utc IS
-'UTC timestamp that the device believed it booted. Useful for relating run timing back to power-up timing.';
+'UTC timestamp representing when the device booted, as reported by the firmware.';
 COMMENT ON COLUMN logs.run_id IS
-'Run/session identifier generated by the firmware for this data collection session.';
+'Run identifier shared by all rows from one logger session.';
 COMMENT ON COLUMN logs.first_timestamp_utc IS
-'Earliest timestamp_utc found in the imported data rows.';
+'First UTC sample timestamp found in this file.';
 COMMENT ON COLUMN logs.last_timestamp_utc IS
-'Latest timestamp_utc found in the imported data rows.';
+'Last UTC sample timestamp found in this file.';
 COMMENT ON COLUMN logs.row_count IS
-'Total number of imported data rows for this file, including both sample and satellite rows.';
+'Total number of imported CSV data rows in the file.';
 COMMENT ON COLUMN logs.sample_count IS
-'Count of rows whose record_type is sample.';
+'Number of imported rows where record_type = sample.';
 COMMENT ON COLUMN logs.satellite_count IS
-'Count of rows whose record_type is satellite.';
-
-CREATE UNIQUE INDEX IF NOT EXISTS logs_source_sha256_uq
-    ON logs (file_sha256)
-    WHERE file_sha256 IS NOT NULL;
-
-CREATE INDEX IF NOT EXISTS logs_run_id_idx
-    ON logs (run_id);
-
-CREATE INDEX IF NOT EXISTS logs_board_id_idx
-    ON logs (board_id);
+'Number of imported rows where record_type = satellite.';

 CREATE TABLE IF NOT EXISTS log_data (
    log_data_id                  bigserial PRIMARY KEY,
    log_id                       bigint NOT NULL REFERENCES logs(log_id) ON DELETE CASCADE,
-    row_num                      integer NOT NULL,
+    source_row_number            integer NOT NULL,

    record_type                  text NOT NULL,
    timestamp_utc                timestamptz,
@ -113,20 +102,22 @@ CREATE TABLE IF NOT EXISTS log_data (
    firmware_version             text,
    boot_timestamp_utc           timestamptz,
    run_id                       text,
+    sample_seq                   bigint,
+    ms_since_run_start           bigint,

    fix_type                     text,
-    fix_dimension                smallint,
+    fix_dimension                integer,
    sats_in_view                 integer,
    sat_seen                     integer,
    sats_used                    integer,
    hdop                         numeric(8,3),
    vdop                         numeric(8,3),
    pdop                         numeric(8,3),
-    latitude                     numeric(11,8),
-    longitude                    numeric(11,8),
-    altitude_m                   numeric(10,2),
-    speed_mps                    numeric(10,3),
-    course_deg                   numeric(7,3),
+    latitude                     double precision,
+    longitude                    double precision,
+    altitude_m                   numeric(12,3),
+    speed_mps                    numeric(12,3),
+    course_deg                   numeric(12,3),
    pps_seen                     boolean,
    quality_class                text,

@ -140,152 +131,138 @@ CREATE TABLE IF NOT EXISTS log_data (

    mean_cn0                     numeric(8,3),
    max_cn0                      numeric(8,3),
-    age_of_fix_ms                integer,
-    ttff_ms                      integer,
-    longest_no_fix_ms            integer,
+    age_of_fix_ms                bigint,
+    ttff_ms                      bigint,
+    longest_no_fix_ms            bigint,

    sat_talker                   text,
    sat_constellation            text,
    sat_prn                      integer,
-    sat_elevation_deg            integer,
-    sat_azimuth_deg              integer,
+    sat_elevation_deg            numeric(8,3),
+    sat_azimuth_deg              numeric(8,3),
    sat_snr                      numeric(8,3),
    sat_used_in_solution         boolean,

-    CONSTRAINT log_data_log_id_row_num_uq UNIQUE (log_id, row_num),
-    CONSTRAINT log_data_row_num_ck CHECK (row_num >= 1),
-    CONSTRAINT log_data_record_type_ck CHECK (record_type IN ('sample', 'satellite')),
-    CONSTRAINT log_data_fix_dimension_ck CHECK (fix_dimension IS NULL OR fix_dimension BETWEEN 0 AND 9),
-    CONSTRAINT log_data_latitude_ck CHECK (latitude IS NULL OR latitude BETWEEN -90 AND 90),
-    CONSTRAINT log_data_longitude_ck CHECK (longitude IS NULL OR longitude BETWEEN -180 AND 180),
-    CONSTRAINT log_data_course_deg_ck CHECK (course_deg IS NULL OR course_deg >= 0 AND course_deg < 360),
-    CONSTRAINT log_data_sat_elevation_deg_ck CHECK (sat_elevation_deg IS NULL OR sat_elevation_deg BETWEEN 0 AND 90),
-    CONSTRAINT log_data_sat_azimuth_deg_ck CHECK (sat_azimuth_deg IS NULL OR sat_azimuth_deg BETWEEN 0 AND 359),
-    CONSTRAINT log_data_sat_counts_nonnegative_ck CHECK (
-        coalesce(sats_in_view, 0) >= 0 AND
-        coalesce(sat_seen, 0) >= 0 AND
-        coalesce(sats_used, 0) >= 0 AND
-        coalesce(gps_count, 0) >= 0 AND
-        coalesce(galileo_count, 0) >= 0 AND
-        coalesce(glonass_count, 0) >= 0 AND
-        coalesce(beidou_count, 0) >= 0 AND
-        coalesce(navic_count, 0) >= 0 AND
-        coalesce(qzss_count, 0) >= 0 AND
-        coalesce(sbas_count, 0) >= 0
-    ),
-    CONSTRAINT log_data_ms_nonnegative_ck CHECK (
-        coalesce(age_of_fix_ms, 0) >= 0 AND
-        coalesce(ttff_ms, 0) >= 0 AND
-        coalesce(longest_no_fix_ms, 0) >= 0
-    )
+    raw_csv_line                 text,
+
+    CONSTRAINT log_data_record_type_chk CHECK (record_type IN ('sample', 'satellite'))
 );

 COMMENT ON TABLE log_data IS
-'One row per CSV data record. Stores both sample rows and per-satellite rows, preserving the file''s mixed row model in one typed table.';
+'One row per CSV data row from a GNSS logger file. Stores both sample rows and per-satellite rows.';

 COMMENT ON COLUMN log_data.log_data_id IS
-'Surrogate primary key for one imported data row.';
+'Primary key for one imported CSV data row.';
 COMMENT ON COLUMN log_data.log_id IS
-'Foreign key back to logs.log_id identifying which source file this row came from.';
-COMMENT ON COLUMN log_data.row_num IS
-'1-based row number within the imported data file, excluding comment lines and any header line.';
+'Foreign key to logs.log_id, linking this row back to the source file.';
+COMMENT ON COLUMN log_data.source_row_number IS
+'1-based row number within the CSV data section, excluding preserved hash-prefixed header lines.';
 COMMENT ON COLUMN log_data.record_type IS
-'Kind of row: sample for one epoch summary row, or satellite for one satellite observed at that epoch.';
+'Logical row type. sample = one epoch summary row. satellite = one satellite snapshot tied to a sample epoch.';
 COMMENT ON COLUMN log_data.timestamp_utc IS
-'UTC timestamp attached to the row by the GNSS logger.';
+'UTC time for the sample epoch, as reported by the GNSS receiver.';
 COMMENT ON COLUMN log_data.board_id IS
-'Identifier of the data-collecting board, such as GUY, AMY, or CY.';
+'Board identifier such as GUY, AMY, or CY.';
 COMMENT ON COLUMN log_data.gnss_chip IS
-'GNSS receiver model, such as MAX-M10S or L76K.';
+'GNSS module name, for example MAX-M10S or L76K.';
 COMMENT ON COLUMN log_data.firmware_exercise_name IS
-'Firmware exercise/program name that emitted the row.';
+'Name of the firmware exercise or logger mode that generated the row.';
 COMMENT ON COLUMN log_data.firmware_version IS
-'Firmware version/build identifier used when the row was logged.';
+'Firmware build or version string embedded in the row.';
 COMMENT ON COLUMN log_data.boot_timestamp_utc IS
-'Device boot timestamp in UTC as reported by the logger.';
+'UTC timestamp representing when the device booted, according to firmware.';
 COMMENT ON COLUMN log_data.run_id IS
-'Run/session identifier assigned by the firmware.';
+'Run identifier shared across one logger session.';
+COMMENT ON COLUMN log_data.sample_seq IS
+'Sequential sample number within a run. Starts at 1 when a new log begins. Satellite rows inherit the parent sample sequence value.';
+COMMENT ON COLUMN log_data.ms_since_run_start IS
+'Monotonic milliseconds elapsed since the log file was opened. Useful for jitter, gap, and SD-write-impact analysis.';
 COMMENT ON COLUMN log_data.fix_type IS
-'GNSS fix classification such as NO_FIX, 2D, 3D, DGPS, or similar receiver-reported state.';
+'Fix quality label such as NO_FIX, 2D, 3D, or DGPS depending on what the firmware emits.';
 COMMENT ON COLUMN log_data.fix_dimension IS
-'Numeric dimensionality of the solution. Typical values are 1 for no fix, 2 for horizontal-only, and 3 for full 3D.';
+'Numeric dimension of the position fix, typically 0, 2, or 3.';
 COMMENT ON COLUMN log_data.sats_in_view IS
-'Receiver-reported total satellites in view at the epoch, if provided by the firmware.';
+'Count of satellites reportedly in view at this epoch according to the receiver summary.';
 COMMENT ON COLUMN log_data.sat_seen IS
-'Count of satellites actually emitted as satellite rows or otherwise counted by the logger at the epoch. May differ from sats_in_view depending on receiver API behavior.';
+'Count of satellites actually observed or emitted by the logger for this epoch. This may differ from sats_in_view depending on firmware logic.';
 COMMENT ON COLUMN log_data.sats_used IS
-'Number of satellites used by the receiver in the navigation solution at the epoch.';
+'Count of satellites used in the navigation solution at this epoch.';
 COMMENT ON COLUMN log_data.hdop IS
-'Horizontal Dilution of Precision. Lower values indicate better horizontal geometry.';
+'Horizontal Dilution of Precision. Lower values generally indicate better horizontal geometry.';
 COMMENT ON COLUMN log_data.vdop IS
-'Vertical Dilution of Precision. Lower values indicate better vertical geometry.';
+'Vertical Dilution of Precision. Lower values generally indicate better vertical geometry.';
 COMMENT ON COLUMN log_data.pdop IS
-'Position Dilution of Precision, a combined geometry indicator for 3D positioning.';
+'Position Dilution of Precision. Combined geometry indicator for the position solution.';
 COMMENT ON COLUMN log_data.latitude IS
-'Latitude in decimal degrees referenced to the receiver''s current navigation solution.';
+'Latitude in decimal degrees.';
 COMMENT ON COLUMN log_data.longitude IS
-'Longitude in decimal degrees referenced to the receiver''s current navigation solution.';
+'Longitude in decimal degrees.';
 COMMENT ON COLUMN log_data.altitude_m IS
-'Altitude in meters as reported by the GNSS receiver.';
+'Altitude in meters, generally above mean sea level according to receiver output.';
 COMMENT ON COLUMN log_data.speed_mps IS
-'Ground speed in meters per second.';
+'Receiver-reported speed over ground in meters per second.';
 COMMENT ON COLUMN log_data.course_deg IS
-'Course over ground in degrees clockwise from true north.';
+'Receiver-reported course over ground in degrees.';
 COMMENT ON COLUMN log_data.pps_seen IS
-'Boolean indicator that a one-pulse-per-second timing event was observed for the epoch.';
+'Boolean indicating whether a PPS pulse was seen by the firmware during this epoch.';
 COMMENT ON COLUMN log_data.quality_class IS
-'Firmware-defined coarse quality label such as POOR, FAIR, GOOD, or similar.';
+'Human-friendly firmware quality label such as POOR, FAIR, GOOD, or similar.';
 COMMENT ON COLUMN log_data.gps_count IS
-'Number of GPS satellites counted at the epoch.';
+'Count of GPS satellites observed at this epoch.';
 COMMENT ON COLUMN log_data.galileo_count IS
-'Number of Galileo satellites counted at the epoch.';
+'Count of Galileo satellites observed at this epoch.';
 COMMENT ON COLUMN log_data.glonass_count IS
-'Number of GLONASS satellites counted at the epoch.';
+'Count of GLONASS satellites observed at this epoch.';
 COMMENT ON COLUMN log_data.beidou_count IS
-'Number of BeiDou satellites counted at the epoch.';
+'Count of BeiDou satellites observed at this epoch.';
 COMMENT ON COLUMN log_data.navic_count IS
-'Number of NavIC/IRNSS satellites counted at the epoch.';
+'Count of NavIC satellites observed at this epoch.';
 COMMENT ON COLUMN log_data.qzss_count IS
-'Number of QZSS satellites counted at the epoch.';
+'Count of QZSS satellites observed at this epoch.';
 COMMENT ON COLUMN log_data.sbas_count IS
-'Number of SBAS satellites counted at the epoch.';
+'Count of SBAS satellites observed at this epoch.';
 COMMENT ON COLUMN log_data.mean_cn0 IS
-'Mean carrier-to-noise-density ratio, typically in dB-Hz, across the satellites considered by the firmware at the epoch.';
+'Mean carrier-to-noise density estimate across observed satellites. Higher values generally indicate stronger signals.';
 COMMENT ON COLUMN log_data.max_cn0 IS
-'Maximum carrier-to-noise-density ratio, typically in dB-Hz, seen at the epoch.';
+'Maximum carrier-to-noise density estimate among observed satellites for this epoch.';
 COMMENT ON COLUMN log_data.age_of_fix_ms IS
-'Age in milliseconds of the current fix solution when logged.';
+'Age of the current fix in milliseconds, as reported by the firmware or receiver API.';
 COMMENT ON COLUMN log_data.ttff_ms IS
-'Time To First Fix in milliseconds for the run or receiver state being reported.';
+'Time to first fix in milliseconds for the run or current acquisition state.';
 COMMENT ON COLUMN log_data.longest_no_fix_ms IS
-'Longest interval in milliseconds spent without a usable fix during the run so far.';
+'Longest contiguous no-fix interval observed so far in the run, in milliseconds.';
 COMMENT ON COLUMN log_data.sat_talker IS
-'Talker or source prefix for the satellite row, for example GP, GL, GA, GB, or similar receiver-provided code.';
+'Talker or source prefix associated with the satellite row, such as GP, GA, GL, GB, or GN.';
 COMMENT ON COLUMN log_data.sat_constellation IS
-'Human-readable constellation name for the satellite row, such as GPS, GALILEO, GLONASS, or BEIDOU.';
+'Constellation name for the satellite row, such as GPS, Galileo, GLONASS, BeiDou, NavIC, QZSS, or SBAS.';
 COMMENT ON COLUMN log_data.sat_prn IS
 'PRN or SVID number identifying the satellite within its constellation.';
 COMMENT ON COLUMN log_data.sat_elevation_deg IS
-'Satellite elevation angle in degrees above the horizon.';
+'Satellite elevation angle above the horizon in degrees.';
 COMMENT ON COLUMN log_data.sat_azimuth_deg IS
-'Satellite azimuth angle in degrees clockwise from north.';
+'Satellite azimuth in degrees clockwise from true north, according to receiver output.';
 COMMENT ON COLUMN log_data.sat_snr IS
-'Satellite signal-to-noise or similar quality metric as reported by the firmware, commonly in dB-Hz.';
+'Signal-to-noise style quality measure for this satellite row. Depending on firmware, this may be SNR or CN0-like output.';
 COMMENT ON COLUMN log_data.sat_used_in_solution IS
-'Boolean indicator that this satellite was used in the navigation solution for the epoch.';
+'Boolean indicating whether this specific satellite was used in the navigation solution.';
+COMMENT ON COLUMN log_data.raw_csv_line IS
+'Original CSV line preserved verbatim for audit and recovery purposes.';

-CREATE INDEX IF NOT EXISTS log_data_log_id_timestamp_idx
-    ON log_data (log_id, timestamp_utc);
+CREATE INDEX IF NOT EXISTS log_data_log_id_idx
+    ON log_data(log_id);

 CREATE INDEX IF NOT EXISTS log_data_run_id_idx
    ON log_data(run_id);

-CREATE INDEX IF NOT EXISTS log_data_board_id_idx
-    ON log_data (board_id);
+CREATE INDEX IF NOT EXISTS log_data_timestamp_idx
+    ON log_data(timestamp_utc);

 CREATE INDEX IF NOT EXISTS log_data_record_type_idx
    ON log_data(record_type);

+CREATE INDEX IF NOT EXISTS log_data_board_run_seq_idx
+    ON log_data(board_id, run_id, sample_seq, record_type);
+
 CREATE INDEX IF NOT EXISTS log_data_satellite_lookup_idx
    ON log_data(sat_constellation, sat_prn, timestamp_utc)
    WHERE record_type = 'satellite';