Revised Database to include 2 additional fields suggested by ChatGPT

This commit is contained in:
John Poole 2026-04-06 11:36:29 -07:00
commit e3f6527274
2 changed files with 323 additions and 329 deletions

View file

@ -9,12 +9,15 @@
-- Schema for importing GNSS field QA CSV logs generated by T-Beam units.
-- A log file is recorded in table logs, and each CSV row is stored in
-- table log_data with a foreign-key reference back to logs.
--
-- Notes:
-- This revision adds support for the enhanced logger fields:
-- * sample_seq
-- * ms_since_run_start
-- The importer can still load older files that do not contain those fields.
BEGIN;
-- Optional convenience note:
-- CREATE DATABASE satellite_data;
CREATE TABLE IF NOT EXISTS logs (
log_id bigserial PRIMARY KEY,
source_filename text NOT NULL,
@ -39,71 +42,57 @@ CREATE TABLE IF NOT EXISTS logs (
sample_count integer NOT NULL DEFAULT 0,
satellite_count integer NOT NULL DEFAULT 0,
CONSTRAINT logs_source_filename_ck CHECK (btrim(source_filename) <> ''),
CONSTRAINT logs_csv_header_line_ck CHECK (btrim(csv_header_line) <> ''),
CONSTRAINT logs_row_count_ck CHECK (row_count >= 0),
CONSTRAINT logs_sample_count_ck CHECK (sample_count >= 0),
CONSTRAINT logs_satellite_count_ck CHECK (satellite_count >= 0)
CONSTRAINT logs_source_filename_key UNIQUE (source_filename, file_sha256)
);
COMMENT ON TABLE logs IS
'One row per imported CSV file. Stores file-level provenance, importer notes, raw hash-prefixed header text, the effective CSV column header line, and summary counts for the import.';
'One row per imported CSV log file. Stores file provenance, hash, raw hash-prefixed header text, CSV header line, and summary counts.';
COMMENT ON COLUMN logs.log_id IS
'Surrogate primary key for one imported log file.';
'Primary key for this imported file.';
COMMENT ON COLUMN logs.source_filename IS
'Base filename of the imported CSV file, such as 20260406_175441_GUY.csv.';
'Filename of the imported CSV file, e.g. 20260406_175441_GUY.csv.';
COMMENT ON COLUMN logs.source_path IS
'Full or relative filesystem path used at import time.';
'Path used at import time. Useful for provenance when files are staged from different directories.';
COMMENT ON COLUMN logs.file_sha256 IS
'SHA-256 digest of the file contents for provenance and duplicate detection.';
'SHA-256 digest of the source file. Helps prevent duplicate imports and supports provenance audits.';
COMMENT ON COLUMN logs.file_size_bytes IS
'File size in bytes at import time.';
'Size of the source file in bytes.';
COMMENT ON COLUMN logs.raw_header_text IS
'All leading lines in the source file whose first character is #. This is the free-form metadata header preserved exactly as found.';
'All leading hash-prefixed lines from the file, preserved verbatim as a text block.';
COMMENT ON COLUMN logs.csv_header_line IS
'The effective CSV column header line used for import. This may come from the file itself or from the importer''s expected header when the file has no explicit header row.';
'Effective CSV column header line used by the importer, either from the file or from importer fallback logic.';
COMMENT ON COLUMN logs.imported_at IS
'UTC timestamp when the file was imported into PostgreSQL.';
'Timestamp when the file was imported into PostgreSQL.';
COMMENT ON COLUMN logs.import_notes IS
'Optional notes about the import, schema assumptions, or anomalies observed during ingestion.';
'Optional free-form notes supplied at import time.';
COMMENT ON COLUMN logs.board_id IS
'Identifier of the T-Beam or other logger board, for example GUY, AMY, or CY, copied from the data rows when available.';
'Board identifier observed in the file, e.g. GUY, AMY, or CY.';
COMMENT ON COLUMN logs.gnss_chip IS
'GNSS receiver model reported by the firmware, such as MAX-M10S or L76K.';
'GNSS receiver chip or module name, e.g. MAX-M10S or L76K.';
COMMENT ON COLUMN logs.firmware_exercise_name IS
'Firmware exercise or program name that generated the log, useful for tracking logger behavior across exercises.';
'Firmware exercise or logger program name that produced the file.';
COMMENT ON COLUMN logs.firmware_version IS
'Firmware version string or build identifier reported by the logger.';
'Firmware build or version string written by the device.';
COMMENT ON COLUMN logs.boot_timestamp_utc IS
'UTC timestamp that the device believed it booted. Useful for relating run timing back to power-up timing.';
'UTC timestamp representing when the device booted, as reported by the firmware.';
COMMENT ON COLUMN logs.run_id IS
'Run/session identifier generated by the firmware for this data collection session.';
'Run identifier shared by all rows from one logger session.';
COMMENT ON COLUMN logs.first_timestamp_utc IS
'Earliest timestamp_utc found in the imported data rows.';
'First UTC sample timestamp found in this file.';
COMMENT ON COLUMN logs.last_timestamp_utc IS
'Latest timestamp_utc found in the imported data rows.';
'Last UTC sample timestamp found in this file.';
COMMENT ON COLUMN logs.row_count IS
'Total number of imported data rows for this file, including both sample and satellite rows.';
'Total number of imported CSV data rows in the file.';
COMMENT ON COLUMN logs.sample_count IS
'Count of rows whose record_type is sample.';
'Number of imported rows where record_type = sample.';
COMMENT ON COLUMN logs.satellite_count IS
'Count of rows whose record_type is satellite.';
CREATE UNIQUE INDEX IF NOT EXISTS logs_source_sha256_uq
ON logs (file_sha256)
WHERE file_sha256 IS NOT NULL;
CREATE INDEX IF NOT EXISTS logs_run_id_idx
ON logs (run_id);
CREATE INDEX IF NOT EXISTS logs_board_id_idx
ON logs (board_id);
'Number of imported rows where record_type = satellite.';
CREATE TABLE IF NOT EXISTS log_data (
log_data_id bigserial PRIMARY KEY,
log_id bigint NOT NULL REFERENCES logs(log_id) ON DELETE CASCADE,
row_num integer NOT NULL,
source_row_number integer NOT NULL,
record_type text NOT NULL,
timestamp_utc timestamptz,
@ -113,20 +102,22 @@ CREATE TABLE IF NOT EXISTS log_data (
firmware_version text,
boot_timestamp_utc timestamptz,
run_id text,
sample_seq bigint,
ms_since_run_start bigint,
fix_type text,
fix_dimension smallint,
fix_dimension integer,
sats_in_view integer,
sat_seen integer,
sats_used integer,
hdop numeric(8,3),
vdop numeric(8,3),
pdop numeric(8,3),
latitude numeric(11,8),
longitude numeric(11,8),
altitude_m numeric(10,2),
speed_mps numeric(10,3),
course_deg numeric(7,3),
latitude double precision,
longitude double precision,
altitude_m numeric(12,3),
speed_mps numeric(12,3),
course_deg numeric(12,3),
pps_seen boolean,
quality_class text,
@ -140,154 +131,140 @@ CREATE TABLE IF NOT EXISTS log_data (
mean_cn0 numeric(8,3),
max_cn0 numeric(8,3),
age_of_fix_ms integer,
ttff_ms integer,
longest_no_fix_ms integer,
age_of_fix_ms bigint,
ttff_ms bigint,
longest_no_fix_ms bigint,
sat_talker text,
sat_constellation text,
sat_prn integer,
sat_elevation_deg integer,
sat_azimuth_deg integer,
sat_elevation_deg numeric(8,3),
sat_azimuth_deg numeric(8,3),
sat_snr numeric(8,3),
sat_used_in_solution boolean,
CONSTRAINT log_data_log_id_row_num_uq UNIQUE (log_id, row_num),
CONSTRAINT log_data_row_num_ck CHECK (row_num >= 1),
CONSTRAINT log_data_record_type_ck CHECK (record_type IN ('sample', 'satellite')),
CONSTRAINT log_data_fix_dimension_ck CHECK (fix_dimension IS NULL OR fix_dimension BETWEEN 0 AND 9),
CONSTRAINT log_data_latitude_ck CHECK (latitude IS NULL OR latitude BETWEEN -90 AND 90),
CONSTRAINT log_data_longitude_ck CHECK (longitude IS NULL OR longitude BETWEEN -180 AND 180),
CONSTRAINT log_data_course_deg_ck CHECK (course_deg IS NULL OR course_deg >= 0 AND course_deg < 360),
CONSTRAINT log_data_sat_elevation_deg_ck CHECK (sat_elevation_deg IS NULL OR sat_elevation_deg BETWEEN 0 AND 90),
CONSTRAINT log_data_sat_azimuth_deg_ck CHECK (sat_azimuth_deg IS NULL OR sat_azimuth_deg BETWEEN 0 AND 359),
CONSTRAINT log_data_sat_counts_nonnegative_ck CHECK (
coalesce(sats_in_view, 0) >= 0 AND
coalesce(sat_seen, 0) >= 0 AND
coalesce(sats_used, 0) >= 0 AND
coalesce(gps_count, 0) >= 0 AND
coalesce(galileo_count, 0) >= 0 AND
coalesce(glonass_count, 0) >= 0 AND
coalesce(beidou_count, 0) >= 0 AND
coalesce(navic_count, 0) >= 0 AND
coalesce(qzss_count, 0) >= 0 AND
coalesce(sbas_count, 0) >= 0
),
CONSTRAINT log_data_ms_nonnegative_ck CHECK (
coalesce(age_of_fix_ms, 0) >= 0 AND
coalesce(ttff_ms, 0) >= 0 AND
coalesce(longest_no_fix_ms, 0) >= 0
)
raw_csv_line text,
CONSTRAINT log_data_record_type_chk CHECK (record_type IN ('sample', 'satellite'))
);
COMMENT ON TABLE log_data IS
'One row per CSV data record. Stores both sample rows and per-satellite rows, preserving the file''s mixed row model in one typed table.';
'One row per CSV data row from a GNSS logger file. Stores both sample rows and per-satellite rows.';
COMMENT ON COLUMN log_data.log_data_id IS
'Surrogate primary key for one imported data row.';
'Primary key for one imported CSV data row.';
COMMENT ON COLUMN log_data.log_id IS
'Foreign key back to logs.log_id identifying which source file this row came from.';
COMMENT ON COLUMN log_data.row_num IS
'1-based row number within the imported data file, excluding comment lines and any header line.';
'Foreign key to logs.log_id, linking this row back to the source file.';
COMMENT ON COLUMN log_data.source_row_number IS
'1-based row number within the CSV data section, excluding preserved hash-prefixed header lines.';
COMMENT ON COLUMN log_data.record_type IS
'Kind of row: sample for one epoch summary row, or satellite for one satellite observed at that epoch.';
'Logical row type. sample = one epoch summary row. satellite = one satellite snapshot tied to a sample epoch.';
COMMENT ON COLUMN log_data.timestamp_utc IS
'UTC timestamp attached to the row by the GNSS logger.';
'UTC time for the sample epoch, as reported by the GNSS receiver.';
COMMENT ON COLUMN log_data.board_id IS
'Identifier of the data-collecting board, such as GUY, AMY, or CY.';
'Board identifier such as GUY, AMY, or CY.';
COMMENT ON COLUMN log_data.gnss_chip IS
'GNSS receiver model, such as MAX-M10S or L76K.';
'GNSS module name, for example MAX-M10S or L76K.';
COMMENT ON COLUMN log_data.firmware_exercise_name IS
'Firmware exercise/program name that emitted the row.';
'Name of the firmware exercise or logger mode that generated the row.';
COMMENT ON COLUMN log_data.firmware_version IS
'Firmware version/build identifier used when the row was logged.';
'Firmware build or version string embedded in the row.';
COMMENT ON COLUMN log_data.boot_timestamp_utc IS
'Device boot timestamp in UTC as reported by the logger.';
'UTC timestamp representing when the device booted, according to firmware.';
COMMENT ON COLUMN log_data.run_id IS
'Run/session identifier assigned by the firmware.';
'Run identifier shared across one logger session.';
COMMENT ON COLUMN log_data.sample_seq IS
'Sequential sample number within a run. Starts at 1 when a new log begins. Satellite rows inherit the parent sample sequence value.';
COMMENT ON COLUMN log_data.ms_since_run_start IS
'Monotonic milliseconds elapsed since the log file was opened. Useful for jitter, gap, and SD-write-impact analysis.';
COMMENT ON COLUMN log_data.fix_type IS
'GNSS fix classification such as NO_FIX, 2D, 3D, DGPS, or similar receiver-reported state.';
'Fix quality label such as NO_FIX, 2D, 3D, or DGPS depending on what the firmware emits.';
COMMENT ON COLUMN log_data.fix_dimension IS
'Numeric dimensionality of the solution. Typical values are 1 for no fix, 2 for horizontal-only, and 3 for full 3D.';
'Numeric dimension of the position fix, typically 0, 2, or 3.';
COMMENT ON COLUMN log_data.sats_in_view IS
'Receiver-reported total satellites in view at the epoch, if provided by the firmware.';
'Count of satellites reportedly in view at this epoch according to the receiver summary.';
COMMENT ON COLUMN log_data.sat_seen IS
'Count of satellites actually emitted as satellite rows or otherwise counted by the logger at the epoch. May differ from sats_in_view depending on receiver API behavior.';
'Count of satellites actually observed or emitted by the logger for this epoch. This may differ from sats_in_view depending on firmware logic.';
COMMENT ON COLUMN log_data.sats_used IS
'Number of satellites used by the receiver in the navigation solution at the epoch.';
'Count of satellites used in the navigation solution at this epoch.';
COMMENT ON COLUMN log_data.hdop IS
'Horizontal Dilution of Precision. Lower values indicate better horizontal geometry.';
'Horizontal Dilution of Precision. Lower values generally indicate better horizontal geometry.';
COMMENT ON COLUMN log_data.vdop IS
'Vertical Dilution of Precision. Lower values indicate better vertical geometry.';
'Vertical Dilution of Precision. Lower values generally indicate better vertical geometry.';
COMMENT ON COLUMN log_data.pdop IS
'Position Dilution of Precision, a combined geometry indicator for 3D positioning.';
'Position Dilution of Precision. Combined geometry indicator for the position solution.';
COMMENT ON COLUMN log_data.latitude IS
'Latitude in decimal degrees referenced to the receiver''s current navigation solution.';
'Latitude in decimal degrees.';
COMMENT ON COLUMN log_data.longitude IS
'Longitude in decimal degrees referenced to the receiver''s current navigation solution.';
'Longitude in decimal degrees.';
COMMENT ON COLUMN log_data.altitude_m IS
'Altitude in meters as reported by the GNSS receiver.';
'Altitude in meters, generally above mean sea level according to receiver output.';
COMMENT ON COLUMN log_data.speed_mps IS
'Ground speed in meters per second.';
'Receiver-reported speed over ground in meters per second.';
COMMENT ON COLUMN log_data.course_deg IS
'Course over ground in degrees clockwise from true north.';
'Receiver-reported course over ground in degrees.';
COMMENT ON COLUMN log_data.pps_seen IS
'Boolean indicator that a one-pulse-per-second timing event was observed for the epoch.';
'Boolean indicating whether a PPS pulse was seen by the firmware during this epoch.';
COMMENT ON COLUMN log_data.quality_class IS
'Firmware-defined coarse quality label such as POOR, FAIR, GOOD, or similar.';
'Human-friendly firmware quality label such as POOR, FAIR, GOOD, or similar.';
COMMENT ON COLUMN log_data.gps_count IS
'Number of GPS satellites counted at the epoch.';
'Count of GPS satellites observed at this epoch.';
COMMENT ON COLUMN log_data.galileo_count IS
'Number of Galileo satellites counted at the epoch.';
'Count of Galileo satellites observed at this epoch.';
COMMENT ON COLUMN log_data.glonass_count IS
'Number of GLONASS satellites counted at the epoch.';
'Count of GLONASS satellites observed at this epoch.';
COMMENT ON COLUMN log_data.beidou_count IS
'Number of BeiDou satellites counted at the epoch.';
'Count of BeiDou satellites observed at this epoch.';
COMMENT ON COLUMN log_data.navic_count IS
'Number of NavIC/IRNSS satellites counted at the epoch.';
'Count of NavIC satellites observed at this epoch.';
COMMENT ON COLUMN log_data.qzss_count IS
'Number of QZSS satellites counted at the epoch.';
'Count of QZSS satellites observed at this epoch.';
COMMENT ON COLUMN log_data.sbas_count IS
'Number of SBAS satellites counted at the epoch.';
'Count of SBAS satellites observed at this epoch.';
COMMENT ON COLUMN log_data.mean_cn0 IS
'Mean carrier-to-noise-density ratio, typically in dB-Hz, across the satellites considered by the firmware at the epoch.';
'Mean carrier-to-noise density estimate across observed satellites. Higher values generally indicate stronger signals.';
COMMENT ON COLUMN log_data.max_cn0 IS
'Maximum carrier-to-noise-density ratio, typically in dB-Hz, seen at the epoch.';
'Maximum carrier-to-noise density estimate among observed satellites for this epoch.';
COMMENT ON COLUMN log_data.age_of_fix_ms IS
'Age in milliseconds of the current fix solution when logged.';
'Age of the current fix in milliseconds, as reported by the firmware or receiver API.';
COMMENT ON COLUMN log_data.ttff_ms IS
'Time To First Fix in milliseconds for the run or receiver state being reported.';
'Time to first fix in milliseconds for the run or current acquisition state.';
COMMENT ON COLUMN log_data.longest_no_fix_ms IS
'Longest interval in milliseconds spent without a usable fix during the run so far.';
'Longest contiguous no-fix interval observed so far in the run, in milliseconds.';
COMMENT ON COLUMN log_data.sat_talker IS
'Talker or source prefix for the satellite row, for example GP, GL, GA, GB, or similar receiver-provided code.';
'Talker or source prefix associated with the satellite row, such as GP, GA, GL, GB, or GN.';
COMMENT ON COLUMN log_data.sat_constellation IS
'Human-readable constellation name for the satellite row, such as GPS, GALILEO, GLONASS, or BEIDOU.';
'Constellation name for the satellite row, such as GPS, Galileo, GLONASS, BeiDou, NavIC, QZSS, or SBAS.';
COMMENT ON COLUMN log_data.sat_prn IS
'PRN or SVID number identifying the satellite within its constellation.';
COMMENT ON COLUMN log_data.sat_elevation_deg IS
'Satellite elevation angle in degrees above the horizon.';
'Satellite elevation angle above the horizon in degrees.';
COMMENT ON COLUMN log_data.sat_azimuth_deg IS
'Satellite azimuth angle in degrees clockwise from north.';
'Satellite azimuth in degrees clockwise from true north, according to receiver output.';
COMMENT ON COLUMN log_data.sat_snr IS
'Satellite signal-to-noise or similar quality metric as reported by the firmware, commonly in dB-Hz.';
'Signal-to-noise style quality measure for this satellite row. Depending on firmware, this may be SNR or CN0-like output.';
COMMENT ON COLUMN log_data.sat_used_in_solution IS
'Boolean indicator that this satellite was used in the navigation solution for the epoch.';
'Boolean indicating whether this specific satellite was used in the navigation solution.';
COMMENT ON COLUMN log_data.raw_csv_line IS
'Original CSV line preserved verbatim for audit and recovery purposes.';
CREATE INDEX IF NOT EXISTS log_data_log_id_timestamp_idx
ON log_data (log_id, timestamp_utc);
CREATE INDEX IF NOT EXISTS log_data_log_id_idx
ON log_data(log_id);
CREATE INDEX IF NOT EXISTS log_data_run_id_idx
ON log_data (run_id);
ON log_data(run_id);
CREATE INDEX IF NOT EXISTS log_data_board_id_idx
ON log_data (board_id);
CREATE INDEX IF NOT EXISTS log_data_timestamp_idx
ON log_data(timestamp_utc);
CREATE INDEX IF NOT EXISTS log_data_record_type_idx
ON log_data (record_type);
ON log_data(record_type);
CREATE INDEX IF NOT EXISTS log_data_board_run_seq_idx
ON log_data(board_id, run_id, sample_seq, record_type);
CREATE INDEX IF NOT EXISTS log_data_satellite_lookup_idx
ON log_data (sat_constellation, sat_prn, timestamp_utc)
ON log_data(sat_constellation, sat_prn, timestamp_utc)
WHERE record_type = 'satellite';
COMMIT;