Revised Database to include 2 additional fields suggested by ChatGPT
This commit is contained in:
parent
32ad481fcf
commit
e3f6527274
2 changed files with 323 additions and 329 deletions
|
|
@ -13,8 +13,10 @@
|
|||
# Notes:
|
||||
# * Imports one or more CSV files into tables logs and log_data.
|
||||
# * Preserves all leading hash-prefixed header lines in logs.raw_header_text.
|
||||
# * Uses the file's own CSV header row when present; otherwise falls back to
|
||||
# the expected project header defined in this script.
|
||||
# * Uses the file's own CSV header row when present.
|
||||
# * When no CSV header row is present, it falls back by column count:
|
||||
# - legacy schema without sample_seq/ms_since_run_start
|
||||
# - enhanced schema with sample_seq/ms_since_run_start
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
|
@ -26,7 +28,7 @@ use File::Basename qw(basename);
|
|||
use Getopt::Long qw(GetOptions);
|
||||
use Text::CSV_XS;
|
||||
|
||||
my $DEFAULT_HEADER = join ',', qw(
|
||||
my @LEGACY_COLUMNS = qw(
|
||||
record_type timestamp_utc board_id gnss_chip firmware_exercise_name firmware_version
|
||||
boot_timestamp_utc run_id fix_type fix_dimension sats_in_view sat_seen sats_used
|
||||
hdop vdop pdop latitude longitude altitude_m speed_mps course_deg pps_seen
|
||||
|
|
@ -36,6 +38,19 @@ sat_talker sat_constellation sat_prn sat_elevation_deg sat_azimuth_deg sat_snr
|
|||
sat_used_in_solution
|
||||
);
|
||||
|
||||
my @ENHANCED_COLUMNS = qw(
|
||||
record_type timestamp_utc board_id gnss_chip firmware_exercise_name firmware_version
|
||||
boot_timestamp_utc run_id sample_seq ms_since_run_start fix_type fix_dimension
|
||||
sats_in_view sat_seen sats_used hdop vdop pdop latitude longitude altitude_m
|
||||
speed_mps course_deg pps_seen quality_class gps_count galileo_count glonass_count
|
||||
beidou_count navic_count qzss_count sbas_count mean_cn0 max_cn0 age_of_fix_ms
|
||||
ttff_ms longest_no_fix_ms sat_talker sat_constellation sat_prn sat_elevation_deg
|
||||
sat_azimuth_deg sat_snr sat_used_in_solution
|
||||
);
|
||||
|
||||
my $LEGACY_HEADER = join ',', @LEGACY_COLUMNS;
|
||||
my $ENHANCED_HEADER = join ',', @ENHANCED_COLUMNS;
|
||||
|
||||
my %opt = (
|
||||
dbname => 'satellite_data',
|
||||
host => 'localhost',
|
||||
|
|
@ -60,20 +75,20 @@ if ($opt{help} || !@ARGV) {
|
|||
exit 0;
|
||||
}
|
||||
|
||||
my $dsn = sprintf 'dbi:Pg:dbname=%s;host=%s;port=%d',
|
||||
$opt{dbname}, $opt{host}, $opt{port};
|
||||
my $dsn = sprintf('dbi:Pg:dbname=%s;host=%s;port=%d', $opt{dbname}, $opt{host}, $opt{port});
|
||||
my $dbh = DBI->connect(
|
||||
$dsn,
|
||||
$opt{user},
|
||||
$opt{password},
|
||||
{
|
||||
RaiseError => 1,
|
||||
AutoCommit => 1,
|
||||
PrintError => 0,
|
||||
pg_enable_utf8 => 1,
|
||||
}
|
||||
) or die DBI->errstr;
|
||||
|
||||
my %dbh_attr = (
|
||||
RaiseError => 1,
|
||||
AutoCommit => 1,
|
||||
PrintError => 0,
|
||||
pg_enable_utf8 => 1,
|
||||
);
|
||||
|
||||
my $dbh = DBI->connect($dsn, $opt{user}, $opt{password}, \%dbh_attr)
|
||||
or die "Unable to connect to PostgreSQL\n";
|
||||
|
||||
$dbh->do(sprintf 'SET search_path TO %s', $dbh->quote_identifier($opt{schema}));
|
||||
$dbh->do("SET search_path TO $opt{schema}");
|
||||
|
||||
for my $file (@ARGV) {
|
||||
import_file($dbh, $file, \%opt);
|
||||
|
|
@ -82,95 +97,89 @@ for my $file (@ARGV) {
|
|||
$dbh->disconnect;
|
||||
exit 0;
|
||||
|
||||
sub usage {
|
||||
return <<'USAGE';
|
||||
Usage:
|
||||
perl import_satellite_logs.pl [options] file1.csv [file2.csv ...]
|
||||
|
||||
Options:
|
||||
--dbname NAME PostgreSQL database name. Default: satellite_data
|
||||
--host HOST PostgreSQL host. Default: localhost
|
||||
--port PORT PostgreSQL port. Default: 5432
|
||||
--user USER PostgreSQL user name
|
||||
--password PASS PostgreSQL password
|
||||
--schema NAME Target schema. Default: public
|
||||
--header-line TEXT Override the expected CSV header line when file lacks one
|
||||
--notes TEXT Import notes stored in logs.import_notes
|
||||
--help Show this help text
|
||||
USAGE
|
||||
}
|
||||
|
||||
sub import_file {
|
||||
my ($dbh, $file, $opt) = @_;
|
||||
|
||||
open my $fh, '<:encoding(UTF-8)', $file
|
||||
or die "Unable to open $file: $!\n";
|
||||
|
||||
my $file_text = do { local $/; <$fh> };
|
||||
open my $fh, '<:raw', $file or die "Cannot open $file: $!\n";
|
||||
local $/;
|
||||
my $blob = <$fh>;
|
||||
close $fh;
|
||||
|
||||
my $sha256 = sha256_hex($file_text);
|
||||
my $file_size_bytes = length $file_text;
|
||||
my $sha256 = sha256_hex($blob // '');
|
||||
my $file_size = -s $file;
|
||||
|
||||
my @lines = split /\n/, $file_text, -1;
|
||||
my @comment_lines;
|
||||
my $header_line;
|
||||
open my $in, '<:encoding(UTF-8)', $file or die "Cannot open $file: $!\n";
|
||||
|
||||
my @header_lines;
|
||||
my $csv_header_line;
|
||||
my @data_lines;
|
||||
my $saw_header = 0;
|
||||
|
||||
while (@lines) {
|
||||
my $line = shift @lines;
|
||||
next if !defined $line;
|
||||
while (my $line = <$in>) {
|
||||
chomp $line;
|
||||
$line =~ s/\r\z//;
|
||||
|
||||
next if $line =~ /^\s*$/ && !@data_lines && !defined $csv_header_line && !@header_lines;
|
||||
|
||||
if ($line =~ /^#/) {
|
||||
push @comment_lines, $line;
|
||||
push @header_lines, $line;
|
||||
next;
|
||||
}
|
||||
|
||||
if ($line =~ /^\s*$/ && !@data_lines && !$saw_header) {
|
||||
next;
|
||||
}
|
||||
|
||||
if (!$saw_header && $line =~ /^record_type,/) {
|
||||
$header_line = $line;
|
||||
$saw_header = 1;
|
||||
if (!defined $csv_header_line && $line =~ /^record_type,/) {
|
||||
$csv_header_line = $line;
|
||||
next;
|
||||
}
|
||||
|
||||
push @data_lines, $line;
|
||||
push @data_lines, @lines;
|
||||
last;
|
||||
}
|
||||
close $in;
|
||||
|
||||
die "No CSV data rows found in $file\n" if !@data_lines;
|
||||
|
||||
if (!defined $csv_header_line) {
|
||||
if (defined $opt->{header_line}) {
|
||||
$csv_header_line = $opt->{header_line};
|
||||
}
|
||||
else {
|
||||
my $count = count_csv_fields($data_lines[0]);
|
||||
if ($count == scalar(@ENHANCED_COLUMNS)) {
|
||||
$csv_header_line = $ENHANCED_HEADER;
|
||||
}
|
||||
elsif ($count == scalar(@LEGACY_COLUMNS)) {
|
||||
$csv_header_line = $LEGACY_HEADER;
|
||||
}
|
||||
else {
|
||||
die sprintf(
|
||||
"Unable to infer header for %s: first data row has %d fields, expected %d (legacy) or %d (enhanced).\n",
|
||||
$file,
|
||||
$count,
|
||||
scalar(@LEGACY_COLUMNS),
|
||||
scalar(@ENHANCED_COLUMNS),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@data_lines = grep { defined $_ && $_ !~ /^\s*$/ } @data_lines;
|
||||
my @columns = parse_header_columns($csv_header_line);
|
||||
|
||||
$header_line ||= $opt->{header_line} || $DEFAULT_HEADER;
|
||||
my %allowed = map { $_ => 1 } qw(
|
||||
record_type timestamp_utc board_id gnss_chip firmware_exercise_name firmware_version
|
||||
boot_timestamp_utc run_id sample_seq ms_since_run_start fix_type fix_dimension
|
||||
sats_in_view sat_seen sats_used hdop vdop pdop latitude longitude altitude_m
|
||||
speed_mps course_deg pps_seen quality_class gps_count galileo_count glonass_count
|
||||
beidou_count navic_count qzss_count sbas_count mean_cn0 max_cn0 age_of_fix_ms
|
||||
ttff_ms longest_no_fix_ms sat_talker sat_constellation sat_prn sat_elevation_deg
|
||||
sat_azimuth_deg sat_snr sat_used_in_solution
|
||||
);
|
||||
|
||||
my $raw_header_text = @comment_lines ? join("\n", @comment_lines) . "\n" : undef;
|
||||
|
||||
my $csv = Text::CSV_XS->new({
|
||||
binary => 1,
|
||||
auto_diag => 1,
|
||||
allow_loose_quotes => 1,
|
||||
allow_loose_escapes => 1,
|
||||
});
|
||||
|
||||
$csv->parse($header_line);
|
||||
my @header = $csv->fields;
|
||||
|
||||
my %idx;
|
||||
for my $i (0 .. $#header) {
|
||||
$idx{$header[$i]} = $i;
|
||||
for my $col (@columns) {
|
||||
die "Unexpected column '$col' in $file\n" if !$allowed{$col};
|
||||
}
|
||||
|
||||
my @required = qw(record_type timestamp_utc board_id gnss_chip run_id);
|
||||
for my $name (@required) {
|
||||
die "Header is missing required column: $name\n" if !exists $idx{$name};
|
||||
}
|
||||
my $raw_header_text = join("\n", @header_lines);
|
||||
$raw_header_text .= "\n" if length $raw_header_text;
|
||||
|
||||
$dbh->begin_work;
|
||||
|
||||
my $log_insert_sql = <<'SQL';
|
||||
my $insert_log_sql = <<'SQL';
|
||||
INSERT INTO logs (
|
||||
source_filename,
|
||||
source_path,
|
||||
|
|
@ -183,115 +192,89 @@ INSERT INTO logs (
|
|||
RETURNING log_id
|
||||
SQL
|
||||
|
||||
my $log_sth = $dbh->prepare($log_insert_sql);
|
||||
$log_sth->execute(
|
||||
my ($log_id) = $dbh->selectrow_array(
|
||||
$insert_log_sql,
|
||||
undef,
|
||||
basename($file),
|
||||
$file,
|
||||
$sha256,
|
||||
$file_size_bytes,
|
||||
$file_size,
|
||||
$raw_header_text,
|
||||
$header_line,
|
||||
$csv_header_line,
|
||||
$opt->{import_notes},
|
||||
);
|
||||
my ($log_id) = $log_sth->fetchrow_array;
|
||||
|
||||
my $data_insert_sql = <<'SQL';
|
||||
INSERT INTO log_data (
|
||||
log_id, row_num, record_type, timestamp_utc, board_id, gnss_chip,
|
||||
firmware_exercise_name, firmware_version, boot_timestamp_utc, run_id,
|
||||
fix_type, fix_dimension, sats_in_view, sat_seen, sats_used,
|
||||
hdop, vdop, pdop, latitude, longitude, altitude_m, speed_mps, course_deg,
|
||||
pps_seen, quality_class, gps_count, galileo_count, glonass_count,
|
||||
beidou_count, navic_count, qzss_count, sbas_count, mean_cn0, max_cn0,
|
||||
age_of_fix_ms, ttff_ms, longest_no_fix_ms, sat_talker, sat_constellation,
|
||||
sat_prn, sat_elevation_deg, sat_azimuth_deg, sat_snr, sat_used_in_solution
|
||||
) VALUES (
|
||||
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
|
||||
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?
|
||||
)
|
||||
SQL
|
||||
my @insert_columns = (
|
||||
'log_id', 'source_row_number', @columns, 'raw_csv_line'
|
||||
);
|
||||
|
||||
my $data_sth = $dbh->prepare($data_insert_sql);
|
||||
my $placeholders = join ', ', ('?') x @insert_columns;
|
||||
my $insert_sql = sprintf(
|
||||
'INSERT INTO log_data (%s) VALUES (%s)',
|
||||
join(', ', @insert_columns),
|
||||
$placeholders,
|
||||
);
|
||||
|
||||
my $csv = Text::CSV_XS->new({
|
||||
binary => 1,
|
||||
auto_diag => 1,
|
||||
allow_loose_quotes => 1,
|
||||
allow_loose_escapes => 1,
|
||||
blank_is_undef => 0,
|
||||
});
|
||||
|
||||
my $sth = $dbh->prepare($insert_sql);
|
||||
|
||||
my ($row_count, $sample_count, $satellite_count) = (0, 0, 0);
|
||||
my ($first_ts, $last_ts);
|
||||
my ($board_id, $gnss_chip, $firmware_exercise_name, $firmware_version, $boot_ts, $run_id);
|
||||
my ($first_ts, $last_ts, $board_id, $gnss_chip, $fw_name, $fw_ver, $boot_ts, $run_id);
|
||||
|
||||
ROW:
|
||||
for my $line (@data_lines) {
|
||||
next ROW if $line =~ /^\s*$/;
|
||||
$dbh->begin_work;
|
||||
|
||||
$csv->parse($line);
|
||||
my @f = $csv->fields;
|
||||
for my $i (0 .. $#data_lines) {
|
||||
my $line = $data_lines[$i];
|
||||
next if $line =~ /^\s*$/;
|
||||
|
||||
my %row;
|
||||
for my $name (@header) {
|
||||
my $value = $f[$idx{$name}];
|
||||
$row{$name} = normalize_value($value);
|
||||
$csv->parse($line) or die "CSV parse failed in $file line @{[$i+1]}: " . $csv->error_diag . "\n";
|
||||
my @fields = $csv->fields;
|
||||
|
||||
if (@fields != @columns) {
|
||||
die sprintf(
|
||||
"Column mismatch in %s data row %d: got %d fields, expected %d\nLine: %s\n",
|
||||
$file, $i + 1, scalar(@fields), scalar(@columns), $line
|
||||
);
|
||||
}
|
||||
|
||||
++$row_count;
|
||||
++$sample_count if defined $row{record_type} && $row{record_type} eq 'sample';
|
||||
++$satellite_count if defined $row{record_type} && $row{record_type} eq 'satellite';
|
||||
my %row;
|
||||
@row{@columns} = @fields;
|
||||
|
||||
normalize_row(\%row);
|
||||
|
||||
my $record_type = $row{record_type} // '';
|
||||
$sample_count++ if $record_type eq 'sample';
|
||||
$satellite_count++ if $record_type eq 'satellite';
|
||||
$row_count++;
|
||||
|
||||
$first_ts //= $row{timestamp_utc};
|
||||
$last_ts = $row{timestamp_utc} if defined $row{timestamp_utc};
|
||||
$last_ts = $row{timestamp_utc} if defined $row{timestamp_utc};
|
||||
$board_id //= $row{board_id};
|
||||
$gnss_chip //= $row{gnss_chip};
|
||||
$fw_name //= $row{firmware_exercise_name};
|
||||
$fw_ver //= $row{firmware_version};
|
||||
$boot_ts //= $row{boot_timestamp_utc};
|
||||
$run_id //= $row{run_id};
|
||||
|
||||
$board_id //= $row{board_id};
|
||||
$gnss_chip //= $row{gnss_chip};
|
||||
$firmware_exercise_name //= $row{firmware_exercise_name};
|
||||
$firmware_version //= $row{firmware_version};
|
||||
$boot_ts //= $row{boot_timestamp_utc};
|
||||
$run_id //= $row{run_id};
|
||||
|
||||
$data_sth->execute(
|
||||
my @values = (
|
||||
$log_id,
|
||||
$row_count,
|
||||
$row{record_type},
|
||||
$row{timestamp_utc},
|
||||
$row{board_id},
|
||||
$row{gnss_chip},
|
||||
$row{firmware_exercise_name},
|
||||
$row{firmware_version},
|
||||
$row{boot_timestamp_utc},
|
||||
$row{run_id},
|
||||
$row{fix_type},
|
||||
to_int($row{fix_dimension}),
|
||||
to_int($row{sats_in_view}),
|
||||
to_int($row{sat_seen}),
|
||||
to_int($row{sats_used}),
|
||||
to_num($row{hdop}),
|
||||
to_num($row{vdop}),
|
||||
to_num($row{pdop}),
|
||||
to_num($row{latitude}),
|
||||
to_num($row{longitude}),
|
||||
to_num($row{altitude_m}),
|
||||
to_num($row{speed_mps}),
|
||||
to_num($row{course_deg}),
|
||||
to_bool($row{pps_seen}),
|
||||
$row{quality_class},
|
||||
to_int($row{gps_count}),
|
||||
to_int($row{galileo_count}),
|
||||
to_int($row{glonass_count}),
|
||||
to_int($row{beidou_count}),
|
||||
to_int($row{navic_count}),
|
||||
to_int($row{qzss_count}),
|
||||
to_int($row{sbas_count}),
|
||||
to_num($row{mean_cn0}),
|
||||
to_num($row{max_cn0}),
|
||||
to_int($row{age_of_fix_ms}),
|
||||
to_int($row{ttff_ms}),
|
||||
to_int($row{longest_no_fix_ms}),
|
||||
$row{sat_talker},
|
||||
$row{sat_constellation},
|
||||
to_int($row{sat_prn}),
|
||||
to_int($row{sat_elevation_deg}),
|
||||
to_int($row{sat_azimuth_deg}),
|
||||
to_num($row{sat_snr}),
|
||||
to_bool($row{sat_used_in_solution}),
|
||||
$i + 1,
|
||||
(map { $row{$_} } @columns),
|
||||
$line,
|
||||
);
|
||||
|
||||
$sth->execute(@values);
|
||||
}
|
||||
|
||||
$dbh->commit;
|
||||
|
||||
my $update_sql = <<'SQL';
|
||||
UPDATE logs
|
||||
SET board_id = ?,
|
||||
|
|
@ -308,12 +291,13 @@ UPDATE logs
|
|||
WHERE log_id = ?
|
||||
SQL
|
||||
|
||||
my $update_sth = $dbh->prepare($update_sql);
|
||||
$update_sth->execute(
|
||||
$dbh->do(
|
||||
$update_sql,
|
||||
undef,
|
||||
$board_id,
|
||||
$gnss_chip,
|
||||
$firmware_exercise_name,
|
||||
$firmware_version,
|
||||
$fw_name,
|
||||
$fw_ver,
|
||||
$boot_ts,
|
||||
$run_id,
|
||||
$first_ts,
|
||||
|
|
@ -324,39 +308,72 @@ SQL
|
|||
$log_id,
|
||||
);
|
||||
|
||||
$dbh->commit;
|
||||
|
||||
print STDERR sprintf(
|
||||
"Imported %s => log_id=%d rows=%d samples=%d satellites=%d\n",
|
||||
$file, $log_id, $row_count, $sample_count, $satellite_count,
|
||||
);
|
||||
print "Imported $file => log_id=$log_id rows=$row_count samples=$sample_count satellites=$satellite_count\n";
|
||||
}
|
||||
|
||||
sub normalize_value {
|
||||
my ($value) = @_;
|
||||
return undef if !defined $value;
|
||||
$value =~ s/^\s+//;
|
||||
$value =~ s/\s+$//;
|
||||
return undef if $value eq '';
|
||||
return $value;
|
||||
sub parse_header_columns {
|
||||
my ($line) = @_;
|
||||
my $csv = Text::CSV_XS->new({ binary => 1, auto_diag => 1 });
|
||||
$csv->parse($line) or die "Cannot parse header line: " . $csv->error_diag . "\n";
|
||||
my @cols = $csv->fields;
|
||||
s/^\s+|\s+$//g for @cols;
|
||||
return @cols;
|
||||
}
|
||||
|
||||
sub to_int {
|
||||
my ($value) = @_;
|
||||
return undef if !defined $value;
|
||||
return int($value);
|
||||
sub count_csv_fields {
|
||||
my ($line) = @_;
|
||||
my $csv = Text::CSV_XS->new({ binary => 1, auto_diag => 1 });
|
||||
$csv->parse($line) or die "Cannot parse first data row while inferring header: " . $csv->error_diag . "\n";
|
||||
my @fields = $csv->fields;
|
||||
return scalar @fields;
|
||||
}
|
||||
|
||||
sub to_num {
|
||||
my ($value) = @_;
|
||||
return undef if !defined $value;
|
||||
return $value + 0;
|
||||
sub normalize_row {
|
||||
my ($row) = @_;
|
||||
|
||||
for my $key (keys %{$row}) {
|
||||
next if !defined $row->{$key};
|
||||
$row->{$key} =~ s/^\s+//;
|
||||
$row->{$key} =~ s/\s+$//;
|
||||
$row->{$key} = undef if $row->{$key} eq '';
|
||||
}
|
||||
|
||||
for my $bool_key (qw(pps_seen sat_used_in_solution)) {
|
||||
next if !exists $row->{$bool_key};
|
||||
next if !defined $row->{$bool_key};
|
||||
if ($row->{$bool_key} =~ /^(?:1|true|t|yes|y)$/i) {
|
||||
$row->{$bool_key} = 'true';
|
||||
}
|
||||
elsif ($row->{$bool_key} =~ /^(?:0|false|f|no|n)$/i) {
|
||||
$row->{$bool_key} = 'false';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub to_bool {
|
||||
my ($value) = @_;
|
||||
return undef if !defined $value;
|
||||
return 1 if $value =~ /^(?:1|true|t|yes|y)$/i;
|
||||
return 0 if $value =~ /^(?:0|false|f|no|n)$/i;
|
||||
return undef;
|
||||
sub usage {
|
||||
return <<'USAGE';
|
||||
Usage:
|
||||
perl import_satellite_logs.pl [options] file1.csv [file2.csv ...]
|
||||
|
||||
Options:
|
||||
--dbname NAME Database name. Default: satellite_data
|
||||
--host HOST PostgreSQL host. Default: localhost
|
||||
--port PORT PostgreSQL port. Default: 5432
|
||||
--user USER PostgreSQL user name
|
||||
--password PASS PostgreSQL password
|
||||
--schema NAME Schema name. Default: public
|
||||
--header-line LINE Override the CSV header line when the file lacks one
|
||||
--notes TEXT Optional import note stored in logs.import_notes
|
||||
--help Show this help
|
||||
|
||||
Examples:
|
||||
createdb satellite_data
|
||||
psql -d satellite_data -f satellite_data_schema.sql
|
||||
|
||||
perl import_satellite_logs.pl \
|
||||
--dbname satellite_data \
|
||||
--host localhost \
|
||||
--user jlpoole \
|
||||
/path/to/20260406_175441_GUY.csv
|
||||
USAGE
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue