#!/usr/bin/env perl # # Codex # use strict; use warnings; use Getopt::Long qw(GetOptions); use File::Basename qw(dirname); use Cwd qw(abs_path); my $script_dir = dirname(abs_path($0)); my $default_db = "ble_migration.sqlite"; my $default_md = "Codex_response_20260616_1514.md"; my $default_inventory = "ble_symbols_20260516_1429.txt"; my $db_path = $default_db; my $markdown_path = $default_md; my $inventory_path = $default_inventory; my $reviewer = 'codex'; my $dry_run = 0; my $help = 0; GetOptions( 'db=s' => \$db_path, 'markdown=s' => \$markdown_path, 'inventory=s' => \$inventory_path, 'reviewer=s' => \$reviewer, 'dry-run' => \$dry_run, 'help' => \$help, ) or usage(2); usage(0) if $help; die "Markdown file not found: $markdown_path\n" unless -f $markdown_path; die "SQLite database not found: $db_path\n" unless -f $db_path; die "Inventory file not found: $inventory_path\n" unless -f $inventory_path; my $inventory = load_inventory($inventory_path); my @rows = parse_markdown_tables($markdown_path, $inventory); die "No symbol rows found in $markdown_path\n" unless @rows; my $sql = build_sql(\@rows, $reviewer); if ($dry_run) { print $sql; exit 0; } open(my $sqlite, '|-', 'sqlite3', $db_path) or die "Unable to start sqlite3 for $db_path: $!\n"; print {$sqlite} $sql; close($sqlite) or die "sqlite3 import failed for $db_path\n"; print "Imported " . scalar(@rows) . " symbol review rows into $db_path\n"; sub usage { my ($exit_code) = @_; print <<"USAGE"; Usage: perl migration/import_codex_review.pl [options] Options: --db PATH SQLite database path default: migration/ble_migration.sqlite --markdown PATH Codex Markdown review path default: migration/Codex_response_20260616_1514.md --inventory PATH symbol inventory used to infer class_name for unqualified method rows default: migration/ble_symbols_20260516_1429.txt --reviewer NAME reviewer value for reviews table default: codex --dry-run print SQL instead of applying it --help show this help The importer is idempotent for symbols: it upserts by the schema's unique key (source_file, class_name, symbol_name, line_number). It appends one review row per imported Markdown table row each time it is run. USAGE exit $exit_code; } sub load_inventory { my ($path) = @_; open(my $fh, '<', $path) or die "Unable to read $path: $!\n"; my %inventory; my $current_file; my $current_class = ''; while (my $line = <$fh>) { chomp $line; if ($line =~ /^(\S+\.py)\s*$/) { $current_file = $1; $current_class = ''; next; } next unless defined $current_file; if ($line =~ /^\s{4}class\s+([A-Za-z_][A-Za-z0-9_]*)\s+line\s+(\d+)/) { my ($class, $line_number) = ($1, $2); $current_class = $class; my $key = inventory_key($current_file, $class, $line_number); $inventory{$key} = { symbol_type => 'class', class_name => '', symbol_name => $class, }; next; } if ($line =~ /^\s{8}(?:async\s+)?def\s+([A-Za-z_][A-Za-z0-9_]*)\([^)]*\)\s+line\s+(\d+)/) { my ($method, $line_number) = ($1, $2); my $key = inventory_key($current_file, $method, $line_number); $inventory{$key} = { symbol_type => 'method', class_name => $current_class, symbol_name => $method, }; next; } if ($line =~ /^\s{4}def\s+([A-Za-z_][A-Za-z0-9_]*)\([^)]*\)\s+line\s+(\d+)/) { my ($function, $line_number) = ($1, $2); my $key = inventory_key($current_file, $function, $line_number); $inventory{$key} = { symbol_type => 'function', class_name => '', symbol_name => $function, }; next; } } close($fh); return \%inventory; } sub inventory_key { my ($file, $symbol, $line_number) = @_; return join('|', basename_only($file), $symbol, defined($line_number) ? $line_number : ''); } sub parse_markdown_tables { my ($path, $inventory) = @_; open(my $fh, '<', $path) or die "Unable to read $path: $!\n"; my @rows; my $current_file; my $in_expected_table = 0; while (my $line = <$fh>) { chomp $line; if ($line =~ /^\|\s*file path\s*\|\s*symbol\s*\|\s*line\s*\|\s*tag\s*\|\s*phase-1 C\+\+ candidate\s*\|/i) { $in_expected_table = 1; next; } if ($in_expected_table && $line =~ /^\|\s*-+/) { next; } if ($in_expected_table && $line !~ /^\|/) { $in_expected_table = 0; next; } next unless $in_expected_table; my @cells = split_markdown_row($line); next unless @cells == 8; my ($file, $symbol, $line_number, $tag, $candidate, $rationale, $callers, $callees) = @cells; next if lc($file) eq 'file path'; $file = clean_cell($file); if ($file eq 'same') { die "Encountered 'same' file before an explicit file path\n" unless defined $current_file; $file = $current_file; } else { $current_file = $file; } $symbol = clean_cell($symbol); $tag = clean_tag(clean_cell($tag)); $candidate = lc(clean_cell($candidate)) eq 'yes' ? 1 : 0; $line_number = clean_cell($line_number); $line_number = $line_number =~ /(\d+)/ ? $1 : undef; $rationale = clean_cell($rationale); $callers = clean_cell($callers); $callees = clean_cell($callees); my ($symbol_type, $class_name, $symbol_name); my $inventory_key = inventory_key($file, symbol_lookup_name($symbol), $line_number); if (exists $inventory->{$inventory_key}) { $symbol_type = $inventory->{$inventory_key}->{symbol_type}; $class_name = $inventory->{$inventory_key}->{class_name}; $symbol_name = $inventory->{$inventory_key}->{symbol_name}; } else { ($symbol_type, $class_name, $symbol_name) = infer_symbol_shape($symbol); $class_name = '' unless defined $class_name; } my $phase = $candidate ? '1_candidate' : '0_inventory'; my $confidence = $symbol =~ /\+\s*all\s+methods/i ? 'medium' : 'high'; my $notes = "Imported from " . basename_only($path) . "; original symbol cell: $symbol"; push @rows, { source_file => $file, symbol_name => $symbol_name, symbol_type => $symbol_type, class_name => $class_name, line_number => $line_number, tag => $tag, phase => $phase, status => 'REVIEWED', cpp_candidate => $candidate, confidence => $confidence, rationale => $rationale, callers => $callers, callees => $callees, notes => $notes, }; } close($fh); return @rows; } sub split_markdown_row { my ($line) = @_; $line =~ s/^\|//; $line =~ s/\|$//; my @cells = split /\|/, $line, -1; for my $cell (@cells) { $cell =~ s/^\s+//; $cell =~ s/\s+$//; } return @cells; } sub clean_cell { my ($value) = @_; $value = '' unless defined $value; $value =~ s//\n/gi; $value =~ s/`([^`]*)`/$1/g; $value =~ s/^\s+//; $value =~ s/\s+$//; return $value; } sub clean_tag { my ($tag) = @_; $tag =~ s/^\[//; $tag =~ s/\]$//; $tag = uc($tag || 'UNKNOWN'); return $tag =~ /^(CORE|GLUE|PLATFORM|TEST|UNKNOWN)$/ ? $tag : 'UNKNOWN'; } sub infer_symbol_shape { my ($symbol) = @_; if ($symbol =~ /^([A-Za-z_][A-Za-z0-9_]*)\s*\+\s*all\s+methods\b/i) { return ('class_group', undef, $symbol); } if ($symbol =~ /^([A-Za-z_][A-Za-z0-9_]*)\.([A-Za-z_][A-Za-z0-9_]*)$/) { return ('method', $1, $2); } if ($symbol =~ /^_[A-Za-z_][A-Za-z0-9_]*$/ || $symbol =~ /^[a-z_][A-Za-z0-9_]*$/) { return ('function', undef, $symbol); } if ($symbol =~ /^[A-Z][A-Za-z0-9_]*$/) { return ('class', undef, $symbol); } return ('symbol', undef, $symbol); } sub symbol_lookup_name { my ($symbol) = @_; if ($symbol =~ /^([A-Za-z_][A-Za-z0-9_]*)\.([A-Za-z_][A-Za-z0-9_]*)$/) { return $2; } if ($symbol =~ /^([A-Za-z_][A-Za-z0-9_]*)\s*\+\s*all\s+methods\b/i) { return $1; } return $symbol; } sub build_sql { my ($rows, $reviewer_name) = @_; my $sql = <<"SQL"; .bail on PRAGMA foreign_keys = ON; BEGIN; CREATE TEMP TABLE import_symbols ( import_order INTEGER PRIMARY KEY, source_file TEXT NOT NULL, symbol_name TEXT NOT NULL, symbol_type TEXT, class_name TEXT, line_number INTEGER, tag TEXT NOT NULL, phase TEXT NOT NULL, status TEXT NOT NULL, cpp_candidate INTEGER NOT NULL, confidence TEXT, rationale TEXT, callers TEXT, callees TEXT, notes TEXT ); SQL my $i = 0; for my $row (@{$rows}) { $i++; $sql .= "INSERT INTO import_symbols VALUES (" . join(', ', $i, sql_quote($row->{source_file}), sql_quote($row->{symbol_name}), sql_quote($row->{symbol_type}), sql_quote($row->{class_name}), sql_integer($row->{line_number}), sql_quote($row->{tag}), sql_quote($row->{phase}), sql_quote($row->{status}), $row->{cpp_candidate} ? 1 : 0, sql_quote($row->{confidence}), sql_quote($row->{rationale}), sql_quote($row->{callers}), sql_quote($row->{callees}), sql_quote($row->{notes}), ) . ");\n"; } $sql .= <<"SQL"; CREATE TEMP TABLE import_existing AS SELECT i.import_order, s.symbol_id, s.tag AS old_tag, s.status AS old_status FROM import_symbols i JOIN symbols s ON s.source_file = i.source_file AND COALESCE(s.class_name, '') = COALESCE(i.class_name, '') AND s.symbol_name = i.symbol_name AND COALESCE(s.line_number, -1) = COALESCE(i.line_number, -1); INSERT INTO symbols ( source_file, symbol_name, symbol_type, class_name, line_number, tag, phase, status, cpp_candidate, confidence, rationale, callers, callees, notes, updated_at ) SELECT source_file, symbol_name, symbol_type, class_name, line_number, tag, phase, status, cpp_candidate, confidence, rationale, callers, callees, notes, CURRENT_TIMESTAMP FROM import_symbols WHERE 1 ON CONFLICT(source_file, class_name, symbol_name, line_number) DO UPDATE SET symbol_type = excluded.symbol_type, tag = excluded.tag, phase = excluded.phase, status = excluded.status, cpp_candidate = excluded.cpp_candidate, confidence = excluded.confidence, rationale = excluded.rationale, callers = excluded.callers, callees = excluded.callees, notes = excluded.notes, updated_at = CURRENT_TIMESTAMP; INSERT INTO reviews ( symbol_id, reviewer, old_tag, new_tag, old_status, new_status, note ) SELECT s.symbol_id, @{[sql_quote($reviewer_name)]}, e.old_tag, i.tag, e.old_status, i.status, 'Imported from Codex Markdown review. Rationale: ' || COALESCE(i.rationale, '') FROM import_symbols i JOIN symbols s ON s.source_file = i.source_file AND COALESCE(s.class_name, '') = COALESCE(i.class_name, '') AND s.symbol_name = i.symbol_name AND COALESCE(s.line_number, -1) = COALESCE(i.line_number, -1) LEFT JOIN import_existing e ON e.import_order = i.import_order; COMMIT; SQL return $sql; } sub sql_quote { my ($value) = @_; return 'NULL' unless defined $value; $value =~ s/'/''/g; return "'$value'"; } sub sql_integer { my ($value) = @_; return 'NULL' unless defined $value && $value =~ /^\d+$/; return $value; } sub basename_only { my ($path) = @_; $path =~ s{.*/}{}; return $path; }