2026-05-16 15:46:53 -07:00
|
|
|
#!/usr/bin/env perl
|
2026-05-16 17:19:26 -07:00
|
|
|
#
|
|
|
|
|
# Codex
|
|
|
|
|
#
|
|
|
|
|
|
2026-05-16 15:46:53 -07:00
|
|
|
use strict;
|
|
|
|
|
use warnings;
|
|
|
|
|
use Getopt::Long qw(GetOptions);
|
|
|
|
|
use File::Basename qw(dirname);
|
|
|
|
|
use Cwd qw(abs_path);
|
|
|
|
|
|
|
|
|
|
my $script_dir = dirname(abs_path($0));
|
2026-05-16 17:19:26 -07:00
|
|
|
my $default_db = "ble_migration.sqlite";
|
|
|
|
|
my $default_md = "Codex_response_20260616_1514.md";
|
|
|
|
|
my $default_inventory = "ble_symbols_20260516_1429.txt";
|
2026-05-16 15:46:53 -07:00
|
|
|
|
|
|
|
|
my $db_path = $default_db;
|
|
|
|
|
my $markdown_path = $default_md;
|
|
|
|
|
my $inventory_path = $default_inventory;
|
|
|
|
|
my $reviewer = 'codex';
|
|
|
|
|
my $dry_run = 0;
|
|
|
|
|
my $help = 0;
|
|
|
|
|
|
|
|
|
|
GetOptions(
|
|
|
|
|
'db=s' => \$db_path,
|
|
|
|
|
'markdown=s' => \$markdown_path,
|
|
|
|
|
'inventory=s' => \$inventory_path,
|
|
|
|
|
'reviewer=s' => \$reviewer,
|
|
|
|
|
'dry-run' => \$dry_run,
|
|
|
|
|
'help' => \$help,
|
|
|
|
|
) or usage(2);
|
|
|
|
|
|
|
|
|
|
usage(0) if $help;
|
|
|
|
|
|
|
|
|
|
die "Markdown file not found: $markdown_path\n" unless -f $markdown_path;
|
|
|
|
|
die "SQLite database not found: $db_path\n" unless -f $db_path;
|
|
|
|
|
die "Inventory file not found: $inventory_path\n" unless -f $inventory_path;
|
|
|
|
|
|
|
|
|
|
my $inventory = load_inventory($inventory_path);
|
|
|
|
|
my @rows = parse_markdown_tables($markdown_path, $inventory);
|
|
|
|
|
die "No symbol rows found in $markdown_path\n" unless @rows;
|
|
|
|
|
|
|
|
|
|
my $sql = build_sql(\@rows, $reviewer);
|
|
|
|
|
|
|
|
|
|
if ($dry_run) {
|
|
|
|
|
print $sql;
|
|
|
|
|
exit 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
open(my $sqlite, '|-', 'sqlite3', $db_path)
|
|
|
|
|
or die "Unable to start sqlite3 for $db_path: $!\n";
|
|
|
|
|
print {$sqlite} $sql;
|
|
|
|
|
close($sqlite) or die "sqlite3 import failed for $db_path\n";
|
|
|
|
|
|
|
|
|
|
print "Imported " . scalar(@rows) . " symbol review rows into $db_path\n";
|
|
|
|
|
|
|
|
|
|
sub usage {
|
|
|
|
|
my ($exit_code) = @_;
|
|
|
|
|
print <<"USAGE";
|
|
|
|
|
Usage:
|
|
|
|
|
perl migration/import_codex_review.pl [options]
|
|
|
|
|
|
|
|
|
|
Options:
|
|
|
|
|
--db PATH SQLite database path
|
|
|
|
|
default: migration/ble_migration.sqlite
|
|
|
|
|
--markdown PATH Codex Markdown review path
|
|
|
|
|
default: migration/Codex_response_20260616_1514.md
|
|
|
|
|
--inventory PATH symbol inventory used to infer class_name for unqualified
|
|
|
|
|
method rows
|
|
|
|
|
default: migration/ble_symbols_20260516_1429.txt
|
|
|
|
|
--reviewer NAME reviewer value for reviews table
|
|
|
|
|
default: codex
|
|
|
|
|
--dry-run print SQL instead of applying it
|
|
|
|
|
--help show this help
|
|
|
|
|
|
|
|
|
|
The importer is idempotent for symbols: it upserts by the schema's unique key
|
|
|
|
|
(source_file, class_name, symbol_name, line_number). It appends one review row
|
|
|
|
|
per imported Markdown table row each time it is run.
|
|
|
|
|
USAGE
|
|
|
|
|
exit $exit_code;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub load_inventory {
|
|
|
|
|
my ($path) = @_;
|
|
|
|
|
open(my $fh, '<', $path) or die "Unable to read $path: $!\n";
|
|
|
|
|
|
|
|
|
|
my %inventory;
|
|
|
|
|
my $current_file;
|
|
|
|
|
my $current_class = '';
|
|
|
|
|
|
|
|
|
|
while (my $line = <$fh>) {
|
|
|
|
|
chomp $line;
|
|
|
|
|
|
|
|
|
|
if ($line =~ /^(\S+\.py)\s*$/) {
|
|
|
|
|
$current_file = $1;
|
|
|
|
|
$current_class = '';
|
|
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
next unless defined $current_file;
|
|
|
|
|
|
|
|
|
|
if ($line =~ /^\s{4}class\s+([A-Za-z_][A-Za-z0-9_]*)\s+line\s+(\d+)/) {
|
|
|
|
|
my ($class, $line_number) = ($1, $2);
|
|
|
|
|
$current_class = $class;
|
|
|
|
|
my $key = inventory_key($current_file, $class, $line_number);
|
|
|
|
|
$inventory{$key} = {
|
|
|
|
|
symbol_type => 'class',
|
|
|
|
|
class_name => '',
|
|
|
|
|
symbol_name => $class,
|
|
|
|
|
};
|
|
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ($line =~ /^\s{8}(?:async\s+)?def\s+([A-Za-z_][A-Za-z0-9_]*)\([^)]*\)\s+line\s+(\d+)/) {
|
|
|
|
|
my ($method, $line_number) = ($1, $2);
|
|
|
|
|
my $key = inventory_key($current_file, $method, $line_number);
|
|
|
|
|
$inventory{$key} = {
|
|
|
|
|
symbol_type => 'method',
|
|
|
|
|
class_name => $current_class,
|
|
|
|
|
symbol_name => $method,
|
|
|
|
|
};
|
|
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ($line =~ /^\s{4}def\s+([A-Za-z_][A-Za-z0-9_]*)\([^)]*\)\s+line\s+(\d+)/) {
|
|
|
|
|
my ($function, $line_number) = ($1, $2);
|
|
|
|
|
my $key = inventory_key($current_file, $function, $line_number);
|
|
|
|
|
$inventory{$key} = {
|
|
|
|
|
symbol_type => 'function',
|
|
|
|
|
class_name => '',
|
|
|
|
|
symbol_name => $function,
|
|
|
|
|
};
|
|
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
close($fh);
|
|
|
|
|
return \%inventory;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub inventory_key {
|
|
|
|
|
my ($file, $symbol, $line_number) = @_;
|
|
|
|
|
return join('|', basename_only($file), $symbol, defined($line_number) ? $line_number : '');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub parse_markdown_tables {
|
|
|
|
|
my ($path, $inventory) = @_;
|
|
|
|
|
open(my $fh, '<', $path) or die "Unable to read $path: $!\n";
|
|
|
|
|
|
|
|
|
|
my @rows;
|
|
|
|
|
my $current_file;
|
|
|
|
|
my $in_expected_table = 0;
|
|
|
|
|
|
|
|
|
|
while (my $line = <$fh>) {
|
|
|
|
|
chomp $line;
|
|
|
|
|
|
|
|
|
|
if ($line =~ /^\|\s*file path\s*\|\s*symbol\s*\|\s*line\s*\|\s*tag\s*\|\s*phase-1 C\+\+ candidate\s*\|/i) {
|
|
|
|
|
$in_expected_table = 1;
|
|
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ($in_expected_table && $line =~ /^\|\s*-+/) {
|
|
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ($in_expected_table && $line !~ /^\|/) {
|
|
|
|
|
$in_expected_table = 0;
|
|
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
next unless $in_expected_table;
|
|
|
|
|
|
|
|
|
|
my @cells = split_markdown_row($line);
|
|
|
|
|
next unless @cells == 8;
|
|
|
|
|
|
|
|
|
|
my ($file, $symbol, $line_number, $tag, $candidate, $rationale, $callers, $callees) = @cells;
|
|
|
|
|
next if lc($file) eq 'file path';
|
|
|
|
|
|
|
|
|
|
$file = clean_cell($file);
|
|
|
|
|
if ($file eq 'same') {
|
|
|
|
|
die "Encountered 'same' file before an explicit file path\n" unless defined $current_file;
|
|
|
|
|
$file = $current_file;
|
|
|
|
|
} else {
|
|
|
|
|
$current_file = $file;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$symbol = clean_cell($symbol);
|
|
|
|
|
$tag = clean_tag(clean_cell($tag));
|
|
|
|
|
$candidate = lc(clean_cell($candidate)) eq 'yes' ? 1 : 0;
|
|
|
|
|
$line_number = clean_cell($line_number);
|
|
|
|
|
$line_number = $line_number =~ /(\d+)/ ? $1 : undef;
|
|
|
|
|
|
|
|
|
|
$rationale = clean_cell($rationale);
|
|
|
|
|
$callers = clean_cell($callers);
|
|
|
|
|
$callees = clean_cell($callees);
|
|
|
|
|
|
|
|
|
|
my ($symbol_type, $class_name, $symbol_name);
|
|
|
|
|
my $inventory_key = inventory_key($file, symbol_lookup_name($symbol), $line_number);
|
|
|
|
|
if (exists $inventory->{$inventory_key}) {
|
|
|
|
|
$symbol_type = $inventory->{$inventory_key}->{symbol_type};
|
|
|
|
|
$class_name = $inventory->{$inventory_key}->{class_name};
|
|
|
|
|
$symbol_name = $inventory->{$inventory_key}->{symbol_name};
|
|
|
|
|
} else {
|
|
|
|
|
($symbol_type, $class_name, $symbol_name) = infer_symbol_shape($symbol);
|
|
|
|
|
$class_name = '' unless defined $class_name;
|
|
|
|
|
}
|
|
|
|
|
my $phase = $candidate ? '1_candidate' : '0_inventory';
|
|
|
|
|
my $confidence = $symbol =~ /\+\s*all\s+methods/i ? 'medium' : 'high';
|
|
|
|
|
my $notes = "Imported from " . basename_only($path) . "; original symbol cell: $symbol";
|
|
|
|
|
|
|
|
|
|
push @rows, {
|
|
|
|
|
source_file => $file,
|
|
|
|
|
symbol_name => $symbol_name,
|
|
|
|
|
symbol_type => $symbol_type,
|
|
|
|
|
class_name => $class_name,
|
|
|
|
|
line_number => $line_number,
|
|
|
|
|
tag => $tag,
|
|
|
|
|
phase => $phase,
|
|
|
|
|
status => 'REVIEWED',
|
|
|
|
|
cpp_candidate => $candidate,
|
|
|
|
|
confidence => $confidence,
|
|
|
|
|
rationale => $rationale,
|
|
|
|
|
callers => $callers,
|
|
|
|
|
callees => $callees,
|
|
|
|
|
notes => $notes,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
close($fh);
|
|
|
|
|
return @rows;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub split_markdown_row {
|
|
|
|
|
my ($line) = @_;
|
|
|
|
|
$line =~ s/^\|//;
|
|
|
|
|
$line =~ s/\|$//;
|
|
|
|
|
my @cells = split /\|/, $line, -1;
|
|
|
|
|
for my $cell (@cells) {
|
|
|
|
|
$cell =~ s/^\s+//;
|
|
|
|
|
$cell =~ s/\s+$//;
|
|
|
|
|
}
|
|
|
|
|
return @cells;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub clean_cell {
|
|
|
|
|
my ($value) = @_;
|
|
|
|
|
$value = '' unless defined $value;
|
|
|
|
|
$value =~ s/<br\s*\/?>/\n/gi;
|
|
|
|
|
$value =~ s/`([^`]*)`/$1/g;
|
|
|
|
|
$value =~ s/^\s+//;
|
|
|
|
|
$value =~ s/\s+$//;
|
|
|
|
|
return $value;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub clean_tag {
|
|
|
|
|
my ($tag) = @_;
|
|
|
|
|
$tag =~ s/^\[//;
|
|
|
|
|
$tag =~ s/\]$//;
|
|
|
|
|
$tag = uc($tag || 'UNKNOWN');
|
|
|
|
|
return $tag =~ /^(CORE|GLUE|PLATFORM|TEST|UNKNOWN)$/ ? $tag : 'UNKNOWN';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub infer_symbol_shape {
|
|
|
|
|
my ($symbol) = @_;
|
|
|
|
|
|
|
|
|
|
if ($symbol =~ /^([A-Za-z_][A-Za-z0-9_]*)\s*\+\s*all\s+methods\b/i) {
|
|
|
|
|
return ('class_group', undef, $symbol);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ($symbol =~ /^([A-Za-z_][A-Za-z0-9_]*)\.([A-Za-z_][A-Za-z0-9_]*)$/) {
|
|
|
|
|
return ('method', $1, $2);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ($symbol =~ /^_[A-Za-z_][A-Za-z0-9_]*$/ || $symbol =~ /^[a-z_][A-Za-z0-9_]*$/) {
|
|
|
|
|
return ('function', undef, $symbol);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ($symbol =~ /^[A-Z][A-Za-z0-9_]*$/) {
|
|
|
|
|
return ('class', undef, $symbol);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ('symbol', undef, $symbol);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub symbol_lookup_name {
|
|
|
|
|
my ($symbol) = @_;
|
|
|
|
|
if ($symbol =~ /^([A-Za-z_][A-Za-z0-9_]*)\.([A-Za-z_][A-Za-z0-9_]*)$/) {
|
|
|
|
|
return $2;
|
|
|
|
|
}
|
|
|
|
|
if ($symbol =~ /^([A-Za-z_][A-Za-z0-9_]*)\s*\+\s*all\s+methods\b/i) {
|
|
|
|
|
return $1;
|
|
|
|
|
}
|
|
|
|
|
return $symbol;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub build_sql {
|
|
|
|
|
my ($rows, $reviewer_name) = @_;
|
|
|
|
|
|
|
|
|
|
my $sql = <<"SQL";
|
|
|
|
|
.bail on
|
|
|
|
|
PRAGMA foreign_keys = ON;
|
|
|
|
|
BEGIN;
|
|
|
|
|
|
|
|
|
|
CREATE TEMP TABLE import_symbols (
|
|
|
|
|
import_order INTEGER PRIMARY KEY,
|
|
|
|
|
source_file TEXT NOT NULL,
|
|
|
|
|
symbol_name TEXT NOT NULL,
|
|
|
|
|
symbol_type TEXT,
|
|
|
|
|
class_name TEXT,
|
|
|
|
|
line_number INTEGER,
|
|
|
|
|
tag TEXT NOT NULL,
|
|
|
|
|
phase TEXT NOT NULL,
|
|
|
|
|
status TEXT NOT NULL,
|
|
|
|
|
cpp_candidate INTEGER NOT NULL,
|
|
|
|
|
confidence TEXT,
|
|
|
|
|
rationale TEXT,
|
|
|
|
|
callers TEXT,
|
|
|
|
|
callees TEXT,
|
|
|
|
|
notes TEXT
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
SQL
|
|
|
|
|
|
|
|
|
|
my $i = 0;
|
|
|
|
|
for my $row (@{$rows}) {
|
|
|
|
|
$i++;
|
|
|
|
|
$sql .= "INSERT INTO import_symbols VALUES ("
|
|
|
|
|
. join(', ',
|
|
|
|
|
$i,
|
|
|
|
|
sql_quote($row->{source_file}),
|
|
|
|
|
sql_quote($row->{symbol_name}),
|
|
|
|
|
sql_quote($row->{symbol_type}),
|
|
|
|
|
sql_quote($row->{class_name}),
|
|
|
|
|
sql_integer($row->{line_number}),
|
|
|
|
|
sql_quote($row->{tag}),
|
|
|
|
|
sql_quote($row->{phase}),
|
|
|
|
|
sql_quote($row->{status}),
|
|
|
|
|
$row->{cpp_candidate} ? 1 : 0,
|
|
|
|
|
sql_quote($row->{confidence}),
|
|
|
|
|
sql_quote($row->{rationale}),
|
|
|
|
|
sql_quote($row->{callers}),
|
|
|
|
|
sql_quote($row->{callees}),
|
|
|
|
|
sql_quote($row->{notes}),
|
|
|
|
|
)
|
|
|
|
|
. ");\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$sql .= <<"SQL";
|
|
|
|
|
|
|
|
|
|
CREATE TEMP TABLE import_existing AS
|
|
|
|
|
SELECT
|
|
|
|
|
i.import_order,
|
|
|
|
|
s.symbol_id,
|
|
|
|
|
s.tag AS old_tag,
|
|
|
|
|
s.status AS old_status
|
|
|
|
|
FROM import_symbols i
|
|
|
|
|
JOIN symbols s
|
|
|
|
|
ON s.source_file = i.source_file
|
|
|
|
|
AND COALESCE(s.class_name, '') = COALESCE(i.class_name, '')
|
|
|
|
|
AND s.symbol_name = i.symbol_name
|
|
|
|
|
AND COALESCE(s.line_number, -1) = COALESCE(i.line_number, -1);
|
|
|
|
|
|
|
|
|
|
INSERT INTO symbols (
|
|
|
|
|
source_file,
|
|
|
|
|
symbol_name,
|
|
|
|
|
symbol_type,
|
|
|
|
|
class_name,
|
|
|
|
|
line_number,
|
|
|
|
|
tag,
|
|
|
|
|
phase,
|
|
|
|
|
status,
|
|
|
|
|
cpp_candidate,
|
|
|
|
|
confidence,
|
|
|
|
|
rationale,
|
|
|
|
|
callers,
|
|
|
|
|
callees,
|
|
|
|
|
notes,
|
|
|
|
|
updated_at
|
|
|
|
|
)
|
|
|
|
|
SELECT
|
|
|
|
|
source_file,
|
|
|
|
|
symbol_name,
|
|
|
|
|
symbol_type,
|
|
|
|
|
class_name,
|
|
|
|
|
line_number,
|
|
|
|
|
tag,
|
|
|
|
|
phase,
|
|
|
|
|
status,
|
|
|
|
|
cpp_candidate,
|
|
|
|
|
confidence,
|
|
|
|
|
rationale,
|
|
|
|
|
callers,
|
|
|
|
|
callees,
|
|
|
|
|
notes,
|
|
|
|
|
CURRENT_TIMESTAMP
|
|
|
|
|
FROM import_symbols
|
|
|
|
|
WHERE 1
|
|
|
|
|
ON CONFLICT(source_file, class_name, symbol_name, line_number) DO UPDATE SET
|
|
|
|
|
symbol_type = excluded.symbol_type,
|
|
|
|
|
tag = excluded.tag,
|
|
|
|
|
phase = excluded.phase,
|
|
|
|
|
status = excluded.status,
|
|
|
|
|
cpp_candidate = excluded.cpp_candidate,
|
|
|
|
|
confidence = excluded.confidence,
|
|
|
|
|
rationale = excluded.rationale,
|
|
|
|
|
callers = excluded.callers,
|
|
|
|
|
callees = excluded.callees,
|
|
|
|
|
notes = excluded.notes,
|
|
|
|
|
updated_at = CURRENT_TIMESTAMP;
|
|
|
|
|
|
|
|
|
|
INSERT INTO reviews (
|
|
|
|
|
symbol_id,
|
|
|
|
|
reviewer,
|
|
|
|
|
old_tag,
|
|
|
|
|
new_tag,
|
|
|
|
|
old_status,
|
|
|
|
|
new_status,
|
|
|
|
|
note
|
|
|
|
|
)
|
|
|
|
|
SELECT
|
|
|
|
|
s.symbol_id,
|
|
|
|
|
@{[sql_quote($reviewer_name)]},
|
|
|
|
|
e.old_tag,
|
|
|
|
|
i.tag,
|
|
|
|
|
e.old_status,
|
|
|
|
|
i.status,
|
|
|
|
|
'Imported from Codex Markdown review. Rationale: ' || COALESCE(i.rationale, '')
|
|
|
|
|
FROM import_symbols i
|
|
|
|
|
JOIN symbols s
|
|
|
|
|
ON s.source_file = i.source_file
|
|
|
|
|
AND COALESCE(s.class_name, '') = COALESCE(i.class_name, '')
|
|
|
|
|
AND s.symbol_name = i.symbol_name
|
|
|
|
|
AND COALESCE(s.line_number, -1) = COALESCE(i.line_number, -1)
|
|
|
|
|
LEFT JOIN import_existing e
|
|
|
|
|
ON e.import_order = i.import_order;
|
|
|
|
|
|
|
|
|
|
COMMIT;
|
|
|
|
|
SQL
|
|
|
|
|
|
|
|
|
|
return $sql;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub sql_quote {
|
|
|
|
|
my ($value) = @_;
|
|
|
|
|
return 'NULL' unless defined $value;
|
|
|
|
|
$value =~ s/'/''/g;
|
|
|
|
|
return "'$value'";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub sql_integer {
|
|
|
|
|
my ($value) = @_;
|
|
|
|
|
return 'NULL' unless defined $value && $value =~ /^\d+$/;
|
|
|
|
|
return $value;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub basename_only {
|
|
|
|
|
my ($path) = @_;
|
|
|
|
|
$path =~ s{.*/}{};
|
|
|
|
|
return $path;
|
|
|
|
|
}
|