I am trying to implement graceful termination of my experiment, but calls to check_pause()
are causing the moninj to crash. Below, I paste a minimal working example:
from artiq.experiment import *
class Trigger_only(EnvExperiment):
''' testing check_pause() call stability '''
def build(self):
self.setattr_device("core");
self.setattr_device('scheduler')
@kernel
def run(self):
self.core.reset()
while not self.scheduler.check_pause():
self.core.break_realtime()
delay(100*us)
print("pause")
This is mostly taken from this Github issue, but I don't really understand the specifics of the discussion in that thread.
The traceback of the error that occurs is as follows:
artiq.coredevice.comm_moninj:Moninj connection terminating with exception
Traceback (most recent call last):
File "/nix/store/7gf107724halsxf3d9kqaij3g5yx7wap-python3-3.11.9-env/lib/python3.11/site-packages/artiq/coredevice/comm_moninj.py", line 84, in _receive_cr
ty = await self._reader.read(1)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/nix/store/5w07wfs288qpmnvjywk24f3ak5k1np7r-python3-3.11.9/lib/python3.11/asyncio/streams.py", line 711, in read
await self._wait_for_data('read')
File "/nix/store/5w07wfs288qpmnvjywk24f3ak5k1np7r-python3-3.11.9/lib/python3.11/asyncio/streams.py", line 543, in _wait_for_data
await self._waiter
File "/nix/store/5w07wfs288qpmnvjywk24f3ak5k1np7r-python3-3.11.9/lib/python3.11/asyncio/selector_events.py", line 999, in _read_ready__data_received
data = self._sock.recv(self.max_size)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TimeoutError: [Errno 110] Connection timed out
This is followed by repeats of the following, until moninj starts up again.
artiq.dashboard.moninj:lost connection to moninj
artiq.dashboard.moninj:failed to connect to moninj. Is aqctl_moninj_proxy running?
Traceback (most recent call last):
File "/nix/store/7gf107724halsxf3d9kqaij3g5yx7wap-python3-3.11.9-env/lib/python3.11/site-packages/artiq/dashboard/moninj.py", line 725, in mi_connector
await new_mi_connection.connect(self.mi_addr, self.mi_port)
File "/nix/store/7gf107724halsxf3d9kqaij3g5yx7wap-python3-3.11.9-env/lib/python3.11/site-packages/artiq/coredevice/comm_moninj.py", line 32, in connect
self._reader, self._writer = await async_open_connection(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/nix/store/7gf107724halsxf3d9kqaij3g5yx7wap-python3-3.11.9-env/lib/python3.11/site-packages/sipyco/keepalive.py", line 80, in async_open_connection
reader, writer = await asyncio.open_connection(host, port, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/nix/store/5w07wfs288qpmnvjywk24f3ak5k1np7r-python3-3.11.9/lib/python3.11/asyncio/streams.py", line 48, in open_connection
transport, _ = await loop.create_connection(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/nix/store/5w07wfs288qpmnvjywk24f3ak5k1np7r-python3-3.11.9/lib/python3.11/asyncio/base_events.py", line 1086, in create_connection
raise exceptions[0]
File "/nix/store/5w07wfs288qpmnvjywk24f3ak5k1np7r-python3-3.11.9/lib/python3.11/asyncio/base_events.py", line 1070, in create_connection
sock = await self._connect_sock(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/nix/store/5w07wfs288qpmnvjywk24f3ak5k1np7r-python3-3.11.9/lib/python3.11/asyncio/base_events.py", line 974, in _connect_sock
await self.sock_connect(sock, address)
File "/nix/store/5w07wfs288qpmnvjywk24f3ak5k1np7r-python3-3.11.9/lib/python3.11/asyncio/selector_events.py", line 638, in sock_connect
return await fut
^^^^^^^^^
File "/nix/store/5w07wfs288qpmnvjywk24f3ak5k1np7r-python3-3.11.9/lib/python3.11/asyncio/selector_events.py", line 678, in _sock_connect_cb
raise OSError(err, f'Connect call failed {address}')
ConnectionRefusedError: [Errno 111] Connect call failed ('::1', 1383, 0, 0)
artiq_comtools.ctlmgr:Controller core_moninj exited
artiq_comtools.ctlmgr:Restarting in 5.0 seconds
artiq.frontend.aqctl_corelog:Logging connection terminating with exception
Traceback (most recent call last):
File "/nix/store/7gf107724halsxf3d9kqaij3g5yx7wap-python3-3.11.9-env/lib/python3.11/site-packages/artiq/frontend/aqctl_corelog.py", line 63, in get_logs
length, = struct.unpack(endian + "l", await reader.readexactly(4))
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/nix/store/5w07wfs288qpmnvjywk24f3ak5k1np7r-python3-3.11.9/lib/python3.11/asyncio/streams.py", line 750, in readexactly
await self._wait_for_data('readexactly')
File "/nix/store/5w07wfs288qpmnvjywk24f3ak5k1np7r-python3-3.11.9/lib/python3.11/asyncio/streams.py", line 543, in _wait_for_data
await self._waiter
File "/nix/store/5w07wfs288qpmnvjywk24f3ak5k1np7r-python3-3.11.9/lib/python3.11/asyncio/selector_events.py", line 999, in _read_ready__data_received
data = self._sock.recv(self.max_size)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TimeoutError: [Errno 110] Connection timed out
Going to much longer delays, running the self.scheduler.check_pause()
in the host vs in the kernel, restarting everything, changing the networking cables/switch, and a number of other desparate attempts to fix the problem have all been futile. I'd really appreciate any help on this!