ble-reticulum/migration/scripts/import_codex_review.pl

456 lines
12 KiB
Perl
Raw Normal View History

#!/usr/bin/env perl
use strict;
use warnings;
use Getopt::Long qw(GetOptions);
use File::Basename qw(dirname);
use Cwd qw(abs_path);
my $script_dir = dirname(abs_path($0));
my $default_db = "$script_dir/ble_migration.sqlite";
my $default_md = "$script_dir/Codex_response_20260616_1514.md";
my $default_inventory = "$script_dir/ble_symbols_20260516_1429.txt";
my $db_path = $default_db;
my $markdown_path = $default_md;
my $inventory_path = $default_inventory;
my $reviewer = 'codex';
my $dry_run = 0;
my $help = 0;
GetOptions(
'db=s' => \$db_path,
'markdown=s' => \$markdown_path,
'inventory=s' => \$inventory_path,
'reviewer=s' => \$reviewer,
'dry-run' => \$dry_run,
'help' => \$help,
) or usage(2);
usage(0) if $help;
die "Markdown file not found: $markdown_path\n" unless -f $markdown_path;
die "SQLite database not found: $db_path\n" unless -f $db_path;
die "Inventory file not found: $inventory_path\n" unless -f $inventory_path;
my $inventory = load_inventory($inventory_path);
my @rows = parse_markdown_tables($markdown_path, $inventory);
die "No symbol rows found in $markdown_path\n" unless @rows;
my $sql = build_sql(\@rows, $reviewer);
if ($dry_run) {
print $sql;
exit 0;
}
open(my $sqlite, '|-', 'sqlite3', $db_path)
or die "Unable to start sqlite3 for $db_path: $!\n";
print {$sqlite} $sql;
close($sqlite) or die "sqlite3 import failed for $db_path\n";
print "Imported " . scalar(@rows) . " symbol review rows into $db_path\n";
sub usage {
my ($exit_code) = @_;
print <<"USAGE";
Usage:
perl migration/import_codex_review.pl [options]
Options:
--db PATH SQLite database path
default: migration/ble_migration.sqlite
--markdown PATH Codex Markdown review path
default: migration/Codex_response_20260616_1514.md
--inventory PATH symbol inventory used to infer class_name for unqualified
method rows
default: migration/ble_symbols_20260516_1429.txt
--reviewer NAME reviewer value for reviews table
default: codex
--dry-run print SQL instead of applying it
--help show this help
The importer is idempotent for symbols: it upserts by the schema's unique key
(source_file, class_name, symbol_name, line_number). It appends one review row
per imported Markdown table row each time it is run.
USAGE
exit $exit_code;
}
sub load_inventory {
my ($path) = @_;
open(my $fh, '<', $path) or die "Unable to read $path: $!\n";
my %inventory;
my $current_file;
my $current_class = '';
while (my $line = <$fh>) {
chomp $line;
if ($line =~ /^(\S+\.py)\s*$/) {
$current_file = $1;
$current_class = '';
next;
}
next unless defined $current_file;
if ($line =~ /^\s{4}class\s+([A-Za-z_][A-Za-z0-9_]*)\s+line\s+(\d+)/) {
my ($class, $line_number) = ($1, $2);
$current_class = $class;
my $key = inventory_key($current_file, $class, $line_number);
$inventory{$key} = {
symbol_type => 'class',
class_name => '',
symbol_name => $class,
};
next;
}
if ($line =~ /^\s{8}(?:async\s+)?def\s+([A-Za-z_][A-Za-z0-9_]*)\([^)]*\)\s+line\s+(\d+)/) {
my ($method, $line_number) = ($1, $2);
my $key = inventory_key($current_file, $method, $line_number);
$inventory{$key} = {
symbol_type => 'method',
class_name => $current_class,
symbol_name => $method,
};
next;
}
if ($line =~ /^\s{4}def\s+([A-Za-z_][A-Za-z0-9_]*)\([^)]*\)\s+line\s+(\d+)/) {
my ($function, $line_number) = ($1, $2);
my $key = inventory_key($current_file, $function, $line_number);
$inventory{$key} = {
symbol_type => 'function',
class_name => '',
symbol_name => $function,
};
next;
}
}
close($fh);
return \%inventory;
}
sub inventory_key {
my ($file, $symbol, $line_number) = @_;
return join('|', basename_only($file), $symbol, defined($line_number) ? $line_number : '');
}
sub parse_markdown_tables {
my ($path, $inventory) = @_;
open(my $fh, '<', $path) or die "Unable to read $path: $!\n";
my @rows;
my $current_file;
my $in_expected_table = 0;
while (my $line = <$fh>) {
chomp $line;
if ($line =~ /^\|\s*file path\s*\|\s*symbol\s*\|\s*line\s*\|\s*tag\s*\|\s*phase-1 C\+\+ candidate\s*\|/i) {
$in_expected_table = 1;
next;
}
if ($in_expected_table && $line =~ /^\|\s*-+/) {
next;
}
if ($in_expected_table && $line !~ /^\|/) {
$in_expected_table = 0;
next;
}
next unless $in_expected_table;
my @cells = split_markdown_row($line);
next unless @cells == 8;
my ($file, $symbol, $line_number, $tag, $candidate, $rationale, $callers, $callees) = @cells;
next if lc($file) eq 'file path';
$file = clean_cell($file);
if ($file eq 'same') {
die "Encountered 'same' file before an explicit file path\n" unless defined $current_file;
$file = $current_file;
} else {
$current_file = $file;
}
$symbol = clean_cell($symbol);
$tag = clean_tag(clean_cell($tag));
$candidate = lc(clean_cell($candidate)) eq 'yes' ? 1 : 0;
$line_number = clean_cell($line_number);
$line_number = $line_number =~ /(\d+)/ ? $1 : undef;
$rationale = clean_cell($rationale);
$callers = clean_cell($callers);
$callees = clean_cell($callees);
my ($symbol_type, $class_name, $symbol_name);
my $inventory_key = inventory_key($file, symbol_lookup_name($symbol), $line_number);
if (exists $inventory->{$inventory_key}) {
$symbol_type = $inventory->{$inventory_key}->{symbol_type};
$class_name = $inventory->{$inventory_key}->{class_name};
$symbol_name = $inventory->{$inventory_key}->{symbol_name};
} else {
($symbol_type, $class_name, $symbol_name) = infer_symbol_shape($symbol);
$class_name = '' unless defined $class_name;
}
my $phase = $candidate ? '1_candidate' : '0_inventory';
my $confidence = $symbol =~ /\+\s*all\s+methods/i ? 'medium' : 'high';
my $notes = "Imported from " . basename_only($path) . "; original symbol cell: $symbol";
push @rows, {
source_file => $file,
symbol_name => $symbol_name,
symbol_type => $symbol_type,
class_name => $class_name,
line_number => $line_number,
tag => $tag,
phase => $phase,
status => 'REVIEWED',
cpp_candidate => $candidate,
confidence => $confidence,
rationale => $rationale,
callers => $callers,
callees => $callees,
notes => $notes,
};
}
close($fh);
return @rows;
}
sub split_markdown_row {
my ($line) = @_;
$line =~ s/^\|//;
$line =~ s/\|$//;
my @cells = split /\|/, $line, -1;
for my $cell (@cells) {
$cell =~ s/^\s+//;
$cell =~ s/\s+$//;
}
return @cells;
}
sub clean_cell {
my ($value) = @_;
$value = '' unless defined $value;
$value =~ s/<br\s*\/?>/\n/gi;
$value =~ s/`([^`]*)`/$1/g;
$value =~ s/^\s+//;
$value =~ s/\s+$//;
return $value;
}
sub clean_tag {
my ($tag) = @_;
$tag =~ s/^\[//;
$tag =~ s/\]$//;
$tag = uc($tag || 'UNKNOWN');
return $tag =~ /^(CORE|GLUE|PLATFORM|TEST|UNKNOWN)$/ ? $tag : 'UNKNOWN';
}
sub infer_symbol_shape {
my ($symbol) = @_;
if ($symbol =~ /^([A-Za-z_][A-Za-z0-9_]*)\s*\+\s*all\s+methods\b/i) {
return ('class_group', undef, $symbol);
}
if ($symbol =~ /^([A-Za-z_][A-Za-z0-9_]*)\.([A-Za-z_][A-Za-z0-9_]*)$/) {
return ('method', $1, $2);
}
if ($symbol =~ /^_[A-Za-z_][A-Za-z0-9_]*$/ || $symbol =~ /^[a-z_][A-Za-z0-9_]*$/) {
return ('function', undef, $symbol);
}
if ($symbol =~ /^[A-Z][A-Za-z0-9_]*$/) {
return ('class', undef, $symbol);
}
return ('symbol', undef, $symbol);
}
sub symbol_lookup_name {
my ($symbol) = @_;
if ($symbol =~ /^([A-Za-z_][A-Za-z0-9_]*)\.([A-Za-z_][A-Za-z0-9_]*)$/) {
return $2;
}
if ($symbol =~ /^([A-Za-z_][A-Za-z0-9_]*)\s*\+\s*all\s+methods\b/i) {
return $1;
}
return $symbol;
}
sub build_sql {
my ($rows, $reviewer_name) = @_;
my $sql = <<"SQL";
.bail on
PRAGMA foreign_keys = ON;
BEGIN;
CREATE TEMP TABLE import_symbols (
import_order INTEGER PRIMARY KEY,
source_file TEXT NOT NULL,
symbol_name TEXT NOT NULL,
symbol_type TEXT,
class_name TEXT,
line_number INTEGER,
tag TEXT NOT NULL,
phase TEXT NOT NULL,
status TEXT NOT NULL,
cpp_candidate INTEGER NOT NULL,
confidence TEXT,
rationale TEXT,
callers TEXT,
callees TEXT,
notes TEXT
);
SQL
my $i = 0;
for my $row (@{$rows}) {
$i++;
$sql .= "INSERT INTO import_symbols VALUES ("
. join(', ',
$i,
sql_quote($row->{source_file}),
sql_quote($row->{symbol_name}),
sql_quote($row->{symbol_type}),
sql_quote($row->{class_name}),
sql_integer($row->{line_number}),
sql_quote($row->{tag}),
sql_quote($row->{phase}),
sql_quote($row->{status}),
$row->{cpp_candidate} ? 1 : 0,
sql_quote($row->{confidence}),
sql_quote($row->{rationale}),
sql_quote($row->{callers}),
sql_quote($row->{callees}),
sql_quote($row->{notes}),
)
. ");\n";
}
$sql .= <<"SQL";
CREATE TEMP TABLE import_existing AS
SELECT
i.import_order,
s.symbol_id,
s.tag AS old_tag,
s.status AS old_status
FROM import_symbols i
JOIN symbols s
ON s.source_file = i.source_file
AND COALESCE(s.class_name, '') = COALESCE(i.class_name, '')
AND s.symbol_name = i.symbol_name
AND COALESCE(s.line_number, -1) = COALESCE(i.line_number, -1);
INSERT INTO symbols (
source_file,
symbol_name,
symbol_type,
class_name,
line_number,
tag,
phase,
status,
cpp_candidate,
confidence,
rationale,
callers,
callees,
notes,
updated_at
)
SELECT
source_file,
symbol_name,
symbol_type,
class_name,
line_number,
tag,
phase,
status,
cpp_candidate,
confidence,
rationale,
callers,
callees,
notes,
CURRENT_TIMESTAMP
FROM import_symbols
WHERE 1
ON CONFLICT(source_file, class_name, symbol_name, line_number) DO UPDATE SET
symbol_type = excluded.symbol_type,
tag = excluded.tag,
phase = excluded.phase,
status = excluded.status,
cpp_candidate = excluded.cpp_candidate,
confidence = excluded.confidence,
rationale = excluded.rationale,
callers = excluded.callers,
callees = excluded.callees,
notes = excluded.notes,
updated_at = CURRENT_TIMESTAMP;
INSERT INTO reviews (
symbol_id,
reviewer,
old_tag,
new_tag,
old_status,
new_status,
note
)
SELECT
s.symbol_id,
@{[sql_quote($reviewer_name)]},
e.old_tag,
i.tag,
e.old_status,
i.status,
'Imported from Codex Markdown review. Rationale: ' || COALESCE(i.rationale, '')
FROM import_symbols i
JOIN symbols s
ON s.source_file = i.source_file
AND COALESCE(s.class_name, '') = COALESCE(i.class_name, '')
AND s.symbol_name = i.symbol_name
AND COALESCE(s.line_number, -1) = COALESCE(i.line_number, -1)
LEFT JOIN import_existing e
ON e.import_order = i.import_order;
COMMIT;
SQL
return $sql;
}
sub sql_quote {
my ($value) = @_;
return 'NULL' unless defined $value;
$value =~ s/'/''/g;
return "'$value'";
}
sub sql_integer {
my ($value) = @_;
return 'NULL' unless defined $value && $value =~ /^\d+$/;
return $value;
}
sub basename_only {
my ($path) = @_;
$path =~ s{.*/}{};
return $path;
}