""" Tests for HCI Error Fixes (Event-Driven D-Bus Monitoring) Tests the fixes for HCI errors on BCM43xx single-radio Bluetooth chips. The root cause was D-Bus monitoring threads polling every 0.5s, causing radio contention with advertising/scanning operations. Fixes tested: 1. Event-driven D-Bus monitor: Uses asyncio.Event instead of polling 2. Stale poll improvements: Uses threading.Event.wait() instead of busy-wait 3. Stop() shutdown behavior: Uses call_soon_threadsafe for immediate stop Reference: /tmp/hci_error_analysis.md """ import pytest import sys import os import asyncio import threading import time from unittest.mock import Mock, MagicMock, AsyncMock, patch, PropertyMock # Add src to path sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../src')) # Mock RNS module before importing import RNS if not hasattr(RNS, 'LOG_INFO'): RNS.LOG_CRITICAL = 0 RNS.LOG_ERROR = 1 RNS.LOG_WARNING = 2 RNS.LOG_NOTICE = 3 RNS.LOG_INFO = 4 RNS.LOG_VERBOSE = 5 RNS.LOG_DEBUG = 6 RNS.LOG_EXTREME = 7 RNS.log = Mock() class TestEventDrivenDBusMonitor: """Test event-driven D-Bus monitoring (replaces polling).""" @pytest.fixture def mock_driver(self): """Create mock driver with required attributes.""" driver = Mock() driver._peers = {} driver._peers_lock = threading.RLock() driver._log = Mock() driver._handle_peripheral_disconnected = Mock() return driver @pytest.fixture def mock_gatt_server(self, mock_driver): """Create mock GATT server with event-driven monitoring setup.""" from ble_reticulum.linux_bluetooth_driver import BluezeroGATTServer server = Mock(spec=BluezeroGATTServer) server.driver = mock_driver server.stop_event = threading.Event() server.connected_centrals = {} server.centrals_lock = threading.RLock() server._log = Mock() server._handle_central_disconnected = Mock() server.log_prefix = "[TEST]" # Event-driven shutdown attributes server._monitor_loop = None server._async_stop_event = None return server def test_async_stop_event_initialized(self, mock_gatt_server): """Test that _async_stop_event is initialized to None before monitoring starts.""" assert mock_gatt_server._async_stop_event is None assert mock_gatt_server._monitor_loop is None def test_async_event_setup_in_monitor_loop(self): """Test that asyncio.Event and loop reference are set up correctly in monitor_loop.""" async def async_test(): loop = asyncio.get_running_loop() async_stop = asyncio.Event() # These would be set on self in the real implementation _monitor_loop = loop _async_stop_event = async_stop # Verify they're set correctly assert _monitor_loop is loop assert _async_stop_event is async_stop assert isinstance(_async_stop_event, asyncio.Event) assert not _async_stop_event.is_set() asyncio.run(async_test()) def test_async_event_wait_blocks_until_set(self): """Test that await async_stop.wait() blocks until event is set.""" async def async_test(): async_stop = asyncio.Event() wait_completed = False async def wait_for_stop(): nonlocal wait_completed await async_stop.wait() wait_completed = True # Start waiting wait_task = asyncio.create_task(wait_for_stop()) # Should still be waiting await asyncio.sleep(0.1) assert not wait_completed # Set the event async_stop.set() # Should complete quickly await asyncio.wait_for(wait_task, timeout=1.0) assert wait_completed asyncio.run(async_test()) def test_async_event_wakes_immediately_when_set(self): """Test that async event wakes immediately (no 5s delay like polling).""" async def async_test(): async_stop = asyncio.Event() wake_time = None async def wait_for_stop(): nonlocal wake_time await async_stop.wait() wake_time = time.time() # Start waiting wait_task = asyncio.create_task(wait_for_stop()) await asyncio.sleep(0.01) # Let task start # Set event and measure response time set_time = time.time() async_stop.set() await asyncio.wait_for(wait_task, timeout=1.0) # Response should be immediate (< 100ms vs 5000ms polling) response_time = wake_time - set_time assert response_time < 0.1, f"Response time {response_time}s should be < 0.1s" asyncio.run(async_test()) def test_call_soon_threadsafe_sets_event(self): """Test that call_soon_threadsafe can set async event from another thread.""" async def async_test(): async_stop = asyncio.Event() loop = asyncio.get_running_loop() event_was_set = False async def wait_for_stop(): nonlocal event_was_set await async_stop.wait() event_was_set = True # Start waiting in async context wait_task = asyncio.create_task(wait_for_stop()) await asyncio.sleep(0.01) # Set event from sync code (simulating stop() call) # In real implementation this would be from another thread loop.call_soon_threadsafe(async_stop.set) await asyncio.wait_for(wait_task, timeout=1.0) assert event_was_set asyncio.run(async_test()) def test_call_soon_threadsafe_from_thread(self): """Test that call_soon_threadsafe works from a separate thread.""" event_set_in_loop = threading.Event() loop_started = threading.Event() stored_loop = None stored_event = None async def async_main(): nonlocal stored_loop, stored_event async_stop = asyncio.Event() loop = asyncio.get_running_loop() # Store for cross-thread access stored_loop = loop stored_event = async_stop loop_started.set() # Wait for signal await async_stop.wait() event_set_in_loop.set() # Run async code in thread async_thread = threading.Thread( target=lambda: asyncio.run(async_main()), daemon=True ) async_thread.start() # Wait for loop to start loop_started.wait(timeout=2.0) assert stored_loop is not None assert stored_event is not None # Signal from main thread stored_loop.call_soon_threadsafe(stored_event.set) # Verify event was set event_set_in_loop.wait(timeout=2.0) assert event_set_in_loop.is_set() async_thread.join(timeout=1.0) def test_no_polling_loop_in_monitor(self): """Test that there's no periodic polling - just event wait.""" async def async_test(): # The implementation should NOT have this pattern: # while not self.stop_event.is_set(): # await asyncio.sleep(0.5) # BAD - polling # Instead it should use: # await async_stop.wait() # GOOD - event-driven async_stop = asyncio.Event() iterations = 0 async def event_driven_wait(): nonlocal iterations # This is the correct pattern - single wait, no loop iterations await async_stop.wait() iterations = 1 # Only one "iteration" - the wait itself # Test event-driven pattern async_stop.set() # Set immediately for test await event_driven_wait() # Event-driven should only have 1 "iteration" assert iterations == 1 asyncio.run(async_test()) class TestStalePollImprovements: """Test stale connection polling improvements.""" @pytest.fixture def mock_gatt_server(self): """Create mock GATT server with polling setup.""" server = Mock() server.stop_event = threading.Event() server.connected_centrals = {} server.centrals_lock = threading.RLock() server._log = Mock() server._handle_central_disconnected = Mock() server.log_prefix = "[TEST]" return server def test_event_wait_used_instead_of_busy_loop(self): """Test that threading.Event.wait(timeout) is used instead of busy-wait.""" stop_event = threading.Event() wait_called = False # The implementation should use: # if self.stop_event.wait(timeout=300.0): # break # Not the old pattern: # for _ in range(240): # if self.stop_event.is_set(): # break # time.sleep(0.5) def proper_wait_pattern(): nonlocal wait_called if stop_event.wait(timeout=0.1): # Short timeout for test wait_called = True return True return False # Should return False when not set result = proper_wait_pattern() assert not result assert not wait_called # Should return True when set stop_event.set() result = proper_wait_pattern() assert result assert wait_called def test_immediate_stop_response(self): """Test that stop signal is responded to immediately (not after 0.5s polls).""" stop_event = threading.Event() response_time = None thread_exited = threading.Event() def wait_loop(): nonlocal response_time start = time.time() # Using Event.wait() pattern stop_event.wait(timeout=300.0) response_time = time.time() - start thread_exited.set() thread = threading.Thread(target=wait_loop, daemon=True) thread.start() # Let thread start waiting time.sleep(0.05) # Signal stop stop_event.set() # Wait for thread to respond thread_exited.wait(timeout=1.0) # Response should be immediate (< 100ms vs old 500ms polling interval) assert response_time is not None assert response_time < 0.15, f"Response time {response_time}s should be < 0.15s" def test_poll_interval_is_300_seconds(self): """Test that stale poll interval is 300 seconds (5 minutes).""" # The implementation uses: # if self.stop_event.wait(timeout=300.0): # Verify the constant value (we can't easily test 5 min wait in unit test) EXPECTED_INTERVAL = 300.0 # This would be tested by reading the actual code value # For now, we simulate what the implementation should do stop_event = threading.Event() poll_count = 0 def poll_loop(): nonlocal poll_count while not stop_event.is_set(): # In real code, this is 300.0 if stop_event.wait(timeout=0.01): # Short for test break poll_count += 1 if poll_count >= 5: # Limit iterations for test break thread = threading.Thread(target=poll_loop, daemon=True) thread.start() time.sleep(0.1) stop_event.set() thread.join(timeout=1.0) # Thread should have exited cleanly assert not thread.is_alive() def test_single_wait_call_per_interval(self): """Test that each interval uses single wait() call, not 600 iterations.""" stop_event = threading.Event() wait_call_count = 0 original_wait = threading.Event.wait def counting_wait(self, timeout=None): nonlocal wait_call_count wait_call_count += 1 return original_wait(self, timeout=0.01 if timeout else timeout) with patch.object(threading.Event, 'wait', counting_wait): # Simulate one "poll cycle" stop_event.wait(timeout=300.0) # This is the new pattern # Should be just 1 wait call, not 600+ like the old busy-loop assert wait_call_count == 1 def test_no_busy_loop_iterations(self): """Test that the old busy-loop pattern is not used.""" stop_event = threading.Event() sleep_count = 0 # Old pattern would have 600 sleep calls per 5-minute interval: # for _ in range(600): # if self.stop_event.is_set(): # break # time.sleep(0.5) # New pattern has zero sleep calls: # if self.stop_event.wait(timeout=300.0): # break def new_poll_pattern(): nonlocal sleep_count # New pattern - no sleep calls if stop_event.wait(timeout=0.01): return True # No time.sleep() here! return False new_poll_pattern() # No explicit sleep calls in new pattern assert sleep_count == 0 class TestStopShutdownBehavior: """Test stop() method shutdown behavior.""" @pytest.fixture def mock_gatt_server(self): """Create mock GATT server for stop() testing.""" server = Mock() server.stop_event = threading.Event() server.running = True server._log = Mock() server._monitor_loop = None server._async_stop_event = None server.server_thread = None server.disconnect_monitor_thread = None server.stale_poll_thread = None server.ble_agent = None server.connected_centrals = {} server.centrals_lock = threading.RLock() return server def test_stop_sets_stop_event(self, mock_gatt_server): """Test that stop() sets the stop_event.""" assert not mock_gatt_server.stop_event.is_set() # Simulate stop() behavior mock_gatt_server.stop_event.set() assert mock_gatt_server.stop_event.is_set() def test_stop_signals_async_event(self): """Test that stop() signals async event via call_soon_threadsafe.""" async def async_test(): async_stop = asyncio.Event() loop = asyncio.get_running_loop() # Simulate stop() calling call_soon_threadsafe loop.call_soon_threadsafe(async_stop.set) # Give event loop a chance to process await asyncio.sleep(0.01) assert async_stop.is_set() asyncio.run(async_test()) def test_stop_handles_runtime_error_gracefully(self): """Test that stop() handles RuntimeError when loop is already stopped.""" # Create a mock loop that raises RuntimeError mock_loop = Mock() mock_loop.call_soon_threadsafe = Mock(side_effect=RuntimeError("Loop is closed")) mock_async_stop = Mock() # Simulate the stop() error handling code _monitor_loop = mock_loop _async_stop_event = mock_async_stop try: _monitor_loop.call_soon_threadsafe(_async_stop_event.set) except RuntimeError: pass # Should be caught and ignored # Test passes if no exception is raised def test_stop_checks_for_none_references(self): """Test that stop() checks for None before calling call_soon_threadsafe.""" _monitor_loop = None _async_stop_event = None # Simulate the stop() check if _monitor_loop and _async_stop_event: # This should NOT be reached pytest.fail("Should not call when references are None") # Test passes - no error when refs are None def test_shutdown_latency_improvement(self): """Test that shutdown responds immediately (not up to 5s delay).""" stop_event = threading.Event() async_stop_triggered = threading.Event() thread_exited = threading.Event() stored_loop = None stored_async_stop = None async def mock_monitor_loop(): nonlocal stored_loop, stored_async_stop async_stop = asyncio.Event() loop = asyncio.get_running_loop() # Store refs for "stop()" to access stored_loop = loop stored_async_stop = async_stop async_stop_triggered.set() # Signal refs are ready # Wait for stop signal await async_stop.wait() def run_async(): try: asyncio.run(mock_monitor_loop()) except Exception: pass thread_exited.set() # Start monitor thread thread = threading.Thread(target=run_async, daemon=True) thread.start() # Wait for async loop to be ready async_stop_triggered.wait(timeout=2.0) assert stored_loop is not None # Measure shutdown time start = time.time() # Simulate stop() calling call_soon_threadsafe stop_event.set() stored_loop.call_soon_threadsafe(stored_async_stop.set) # Wait for thread to exit thread_exited.wait(timeout=2.0) shutdown_time = time.time() - start # Shutdown should be fast (< 500ms vs old 5000ms max) assert shutdown_time < 0.5, f"Shutdown took {shutdown_time}s, should be < 0.5s" def test_stop_waits_for_threads_with_timeout(self, mock_gatt_server): """Test that stop() waits for threads with reasonable timeouts.""" # Create mock threads mock_server_thread = Mock() mock_server_thread.is_alive = Mock(return_value=True) mock_server_thread.join = Mock() mock_monitor_thread = Mock() mock_monitor_thread.is_alive = Mock(return_value=True) mock_monitor_thread.join = Mock() mock_poll_thread = Mock() mock_poll_thread.is_alive = Mock(return_value=True) mock_poll_thread.join = Mock() # Simulate stop() thread joins mock_gatt_server.server_thread = mock_server_thread mock_gatt_server.disconnect_monitor_thread = mock_monitor_thread mock_gatt_server.stale_poll_thread = mock_poll_thread # Verify join is called with appropriate timeouts if mock_gatt_server.server_thread and mock_gatt_server.server_thread.is_alive(): mock_gatt_server.server_thread.join(timeout=5.0) if mock_gatt_server.disconnect_monitor_thread and mock_gatt_server.disconnect_monitor_thread.is_alive(): mock_gatt_server.disconnect_monitor_thread.join(timeout=2.0) if mock_gatt_server.stale_poll_thread and mock_gatt_server.stale_poll_thread.is_alive(): mock_gatt_server.stale_poll_thread.join(timeout=2.0) # Verify joins were called with timeouts mock_server_thread.join.assert_called_once_with(timeout=5.0) mock_monitor_thread.join.assert_called_once_with(timeout=2.0) mock_poll_thread.join.assert_called_once_with(timeout=2.0) class TestIntegrationScenarios: """Integration tests for HCI error fix scenarios.""" def test_full_lifecycle_start_to_stop(self): """Test complete lifecycle with event-driven monitoring.""" stop_event = threading.Event() monitor_started = threading.Event() monitor_stopped = threading.Event() stored_loop = None stored_async_stop = None async def mock_monitor(): nonlocal stored_loop, stored_async_stop async_stop = asyncio.Event() loop = asyncio.get_running_loop() stored_loop = loop stored_async_stop = async_stop monitor_started.set() # Event-driven wait (not polling) await async_stop.wait() def run_monitor(): try: asyncio.run(mock_monitor()) except Exception: pass monitor_stopped.set() # Start monitoring thread = threading.Thread(target=run_monitor, daemon=True) thread.start() # Wait for start monitor_started.wait(timeout=2.0) assert stored_loop is not None assert stored_async_stop is not None # Simulate stop() stop_event.set() stored_loop.call_soon_threadsafe(stored_async_stop.set) # Wait for clean shutdown monitor_stopped.wait(timeout=2.0) thread.join(timeout=1.0) assert not thread.is_alive() assert monitor_stopped.is_set() def test_multiple_stop_calls_safe(self): """Test that multiple stop() calls don't cause issues.""" stop_event = threading.Event() # First stop stop_event.set() assert stop_event.is_set() # Second stop (should be safe) stop_event.set() assert stop_event.is_set() # Clear and set again (simulating restart + stop) stop_event.clear() assert not stop_event.is_set() stop_event.set() assert stop_event.is_set() def test_dbus_signals_still_processed_during_wait(self): """Test that D-Bus signals are processed while waiting for stop.""" async def async_test(): async_stop = asyncio.Event() signal_received = False def handle_signal(): nonlocal signal_received signal_received = True # Start wait task wait_task = asyncio.create_task(async_stop.wait()) # Simulate D-Bus signal (would be scheduled via event loop) await asyncio.sleep(0.01) handle_signal() # Signal handler runs # Verify signal was processed assert signal_received # Stop wait async_stop.set() await wait_task asyncio.run(async_test()) class TestNoPollingVerification: """Verify that no polling patterns exist in the fixes.""" def test_no_05_second_sleep_in_monitor(self): """Verify the old 0.5s sleep pattern is not used in D-Bus monitor.""" # The old pattern was: # await asyncio.sleep(0.5) # BAD # New pattern: # await async_stop.wait() # GOOD # This test verifies the concept async def no_polling_pattern(): async_stop = asyncio.Event() # No periodic sleep! async_stop.set() # Immediately set for test await async_stop.wait() # Should complete without any sleep delays start = time.time() asyncio.run(no_polling_pattern()) elapsed = time.time() - start # Should complete in < 100ms (no 500ms sleeps) assert elapsed < 0.1 def test_no_busy_loop_in_stale_poll(self): """Verify the old busy-loop pattern is not used in stale poll.""" stop_event = threading.Event() iterations = 0 # Old pattern (BAD): # for _ in range(240): # 240 * 0.5s = 120s # if self.stop_event.is_set(): # break # time.sleep(0.5) # New pattern (GOOD): def new_pattern(): nonlocal iterations if stop_event.wait(timeout=0.01): # Short timeout for test return True iterations += 1 return False # Run the new pattern new_pattern() # Should only have 1 iteration (the wait call itself) assert iterations == 1 class TestCodeVerification: """Tests that verify the actual implementation has correct patterns.""" def test_verify_poll_interval_in_code(self): """Verify that the actual code uses 300s poll interval.""" import re # Read the actual source file source_path = os.path.join( os.path.dirname(__file__), '../src/ble_reticulum/linux_bluetooth_driver.py' ) with open(source_path, 'r') as f: source = f.read() # Find the poll_stale_connections method and verify it uses 300.0 timeout # Pattern: stop_event.wait(timeout=300.0) poll_pattern = r'self\.stop_event\.wait\(timeout=(\d+\.?\d*)\)' matches = re.findall(poll_pattern, source) assert '300.0' in matches, f"Expected 300.0 second timeout, found: {matches}" def test_verify_event_driven_wait_in_code(self): """Verify that the actual code uses async_stop.wait() not polling.""" import re source_path = os.path.join( os.path.dirname(__file__), '../src/ble_reticulum/linux_bluetooth_driver.py' ) with open(source_path, 'r') as f: source = f.read() # The code should have: await async_stop.wait() assert 'await async_stop.wait()' in source, "Should use event-driven wait" # The code should NOT have the old polling pattern in the monitor loop # Look for the monitor_loop function and check it doesn't have asyncio.sleep polling monitor_section_match = re.search( r'async def monitor_loop\(\):(.*?)(?=\n def |\n async def |\Z)', source, re.DOTALL ) if monitor_section_match: monitor_section = monitor_section_match.group(1) # Should not have: while not stop_event... asyncio.sleep pattern polling_pattern = r'while.*stop_event.*\n.*asyncio\.sleep\(0\.5\)' assert not re.search(polling_pattern, monitor_section), \ "Monitor loop should not use 0.5s polling pattern" def test_verify_call_soon_threadsafe_in_stop(self): """Verify that stop() uses call_soon_threadsafe.""" source_path = os.path.join( os.path.dirname(__file__), '../src/ble_reticulum/linux_bluetooth_driver.py' ) with open(source_path, 'r') as f: source = f.read() # The stop() method should use call_soon_threadsafe assert 'call_soon_threadsafe' in source, "stop() should use call_soon_threadsafe" assert '_async_stop_event.set' in source, "Should signal async stop event" if __name__ == "__main__": pytest.main([__file__, "-v"])