Description
Description
Multimaster, is not working with the TCP transport
Setup
Dockerfile
FROM saltstack/ci-centos-8
RUN yum install -y tmux
RUN python3 -m pip install salt
RUN mkdir -p /tmp/salt-tests-tmpdir/mm-master-1/conf \
mkdir -p /tmp/salt-tests-tmpdir/mm-minion-1/conf \
mkdir -p /tmp/salt-tests-tmpdir/mm-master-2/conf \
mkdir -p /tmp/salt-tests-tmpdir/mm-minion-2/conf
RUN echo -en "api_logfile: logs/api.log\napi_pidfile: run/api.pid\ncachedir: cache\nenable_legacy_startup_events: false\nid: mm-master-1\ninterface: 127.0.0.1\nkey_logfile: logs/key.log\nlog_file: logs/master.log\nlog_fmt_console: '%(asctime)s,%(msecs)03.0f [%(name)-17s:%(lineno)-4d][%(levelname)-8s][%(processName)18s(%(process)d)]\n %(message)s'\nlog_fmt_logfile: '[%(asctime)s,%(msecs)03.0f][%(name)-17s:%(lineno)-4d][%(levelname)-8s][%(processName)18s(%(process)d)]\n %(message)s'\nlog_level_logfile: debug\nmax_open_files: 10240\nopen_mode: true\norder_masters: false\npidfile: run/master.pid\npillar_opts: false\npki_dir: pki\npublish_port: 55225\nret_port: 41309\nroot_dir: /tmp/salt-tests-tmpdir/mm-master-1\nsock_dir: run/master\ntcp_master_pub_port: 39653\ntcp_master_publish_pull: 33521\ntcp_master_pull_port: 48867\ntcp_master_workers: 52909\ntransport: zeromq\n" > /tmp/salt-tests-tmpdir/mm-master-1/conf/master
RUN echo -en "api_logfile: logs/api.log\napi_pidfile: run/api.pid\ncachedir: cache\nenable_legacy_startup_events: false\nid: mm-master-2\ninterface: 127.0.0.1\nkey_logfile: logs/key.log\nlog_file: logs/master.log\nlog_fmt_console: '%(asctime)s,%(msecs)03.0f [%(name)-17s:%(lineno)-4d][%(levelname)-8s][%(processName)18s(%(process)d)]\n %(message)s'\nlog_fmt_logfile: '[%(asctime)s,%(msecs)03.0f][%(name)-17s:%(lineno)-4d][%(levelname)-8s][%(processName)18s(%(process)d)]\n %(message)s'\nlog_level_logfile: debug\nmax_open_files: 10240\nopen_mode: true\norder_masters: false\npidfile: run/master.pid\npillar_opts: false\npki_dir: pki\npublish_port: 46579\nret_port: 54151\nroot_dir: /tmp/salt-tests-tmpdir/mm-master-2\nsock_dir: run/master\ntcp_master_pub_port: 33155\ntcp_master_publish_pull: 44571\ntcp_master_pull_port: 35349\ntcp_master_workers: 56335\ntransport: zeromq\n" > /tmp/salt-tests-tmpdir/mm-master-2/conf/master
RUN echo -en "cachedir: cache\nid: mm-minion-1\ninterface: 127.0.0.1\nlog_file: logs/minion.log\nlog_fmt_console: '%(asctime)s,%(msecs)03.0f [%(name)-17s:%(lineno)-4d][%(levelname)-8s][%(processName)18s(%(process)d)]\n %(message)s'\nlog_fmt_logfile: '[%(asctime)s,%(msecs)03.0f][%(name)-17s:%(lineno)-4d][%(levelname)-8s][%(processName)18s(%(process)d)]\n %(message)s'\nlog_level_logfile: debug\nloop_interval: 0.05\nmaster:\n- 127.0.0.1:41309\n- 127.0.0.1:54151\nmaster_port: 41309\npidfile: run/minion.pid\npki_dir: pki\nroot_dir: /tmp/salt-tests-tmpdir/mm-minion-1\nsock_dir: run/minion\ntcp_pub_port: 41831\ntcp_pull_port: 56859\ntransport: zeromq\n" > /tmp/salt-tests-tmpdir/mm-minion-1/conf/minion
RUN echo -en "cachedir: cache\nid: mm-minion-2\ninterface: 127.0.0.1\nlog_file: logs/minion.log\nlog_fmt_console: '%(asctime)s,%(msecs)03.0f [%(name)-17s:%(lineno)-4d][%(levelname)-8s][%(processName)18s(%(process)d)]\n %(message)s'\nlog_fmt_logfile: '[%(asctime)s,%(msecs)03.0f][%(name)-17s:%(lineno)-4d][%(levelname)-8s][%(processName)18s(%(process)d)]\n %(message)s'\nlog_level_logfile: debug\nloop_interval: 0.05\nmaster:\n- 127.0.0.1:41309\n- 127.0.0.1:54151\nmaster_port: 54151\npidfile: run/minion.pid\npki_dir: pki\nroot_dir: /tmp/salt-tests-tmpdir/mm-minion-2\nsock_dir: run/minion\ntcp_pub_port: 48801\ntcp_pull_port: 42185\ntransport: zeromq\n" > /tmp/salt-tests-tmpdir/mm-minion-2/conf/minion
RUN mkdir -p /tmp/salt-tests-tmpdir/scripts
RUN echo -e '#!/bin/bash\nsalt-master -c /tmp/salt-tests-tmpdir/mm-master-1/conf "${@:1}"' > /tmp/salt-tests-tmpdir/scripts/mm-master-1.sh
RUN echo -e '#!/bin/bash\nsalt -c /tmp/salt-tests-tmpdir/mm-master-1/conf "${@:1}"' > /tmp/salt-tests-tmpdir/scripts/mm-salt-1.sh
RUN echo -e '#!/bin/bash\nmkdir -p /tmp/salt-tests-tmpdir/mm-master-2/pki\ncp /tmp/salt-tests-tmpdir/mm-master-1/pki/master.* /tmp/salt-tests-tmpdir/mm-master-2/pki/\nsalt-master -c /tmp/salt-tests-tmpdir/mm-master-2/conf "${@:1}"' > /tmp/salt-tests-tmpdir/scripts/mm-master-2.sh
RUN echo -e '#!/bin/bash\nsalt -c /tmp/salt-tests-tmpdir/mm-master-2/conf "${@:1}"' > /tmp/salt-tests-tmpdir/scripts/mm-salt-2.sh
RUN echo -e '#!/bin/bash\nsalt-minion -c /tmp/salt-tests-tmpdir/mm-minion-1/conf "${@:1}"' > /tmp/salt-tests-tmpdir/scripts/mm-minion-1.sh
RUN echo -e '#!/bin/bash\nsalt-minion -c /tmp/salt-tests-tmpdir/mm-minion-2/conf "${@:1}"' > /tmp/salt-tests-tmpdir/scripts/mm-minion-2.sh
RUN echo -en "#!/bin/sh\nPATH=\"/tmp/salt-tests-tmpdir/scripts:\$PATH\"\nexport PATH\n\ntmux new-session -s e -d -n mm-master-1 'bash -i'\ntmux new-window -t e:1 -n mm-master-2 'bash -i'\ntmux new-window -t e:2 -n mm-minion-1 'bash -i'\ntmux new-window -t e:3 -n mm-minion-2 'bash -i'\ntmux new-window -t e:4 -n shell-1 'bash -i'\n\ntmux select-window -t e:4\ntmux -2 attach-session -t e" > /tmp/salt-tests-tmpdir/scripts/start-tmux.sh
RUN echo -en "#!/bin/bash\nTRANSPORT=\$1\necho Setting transport to: \$TRANSPORT\nsed -i \"s/^transport: \\(.*\\)$/transport: \$TRANSPORT/g\" /tmp/salt-tests-tmpdir/mm-*/conf/{master,minion}" > /tmp/salt-tests-tmpdir/scripts/switch-transport.sh
RUN chmod +x /tmp/salt-tests-tmpdir/scripts/*.sh
ENTRYPOINT /tmp/salt-tests-tmpdir/scripts/start-tmux.sh
To simplify, you can use the s0undt3ch/multimaster-tcp
docker container
Steps to Reproduce the behavior
Once you start the container, you'll have a tmux
session open.
- On window
mm-master-1
runmm-master-1.sh
- On window
mm-master-2
runmm-master-2.sh
- On window
mm-minion-1
runmm-minion-1.sh
- On window
mm-minion-2
runmm-minion-2.sh
Now, on window shell-1
, to verify it's working you can run mm-salt-1.sh \* test.ping
mm-minion-2:
True
mm-minion-1:
True
If you now stop the mm-master-2
, the previous command should still work:
mm-minion-2:
True
mm-minion-1:
True
Now, to expose the problem, stop all daemons, run switch-transport.sh tcp
and then start all daemons again.
On window shell-1
, to verify it's working, run mm-salt-1.sh \* test.ping
, both minions should return True
to the ping:
mm-minion-2:
True
mm-minion-1:
True
However, if you stop mm-master-2
and run mm-salt-1.sh \* test.ping
, no minions will respond:
mm-minion-2:
Minion did not return. [No response]
The minions may not have all finished running and any remaining minions will return upon completion. To look up the return data for this job later, run the following command:
salt-run jobs.lookup_jid 20201201064144142153
mm-minion-1:
Minion did not return. [No response]
The minions may not have all finished running and any remaining minions will return upon completion. To look up the return data for this job later, run the following command:
salt-run jobs.lookup_jid 20201201064144142153
ERROR: Minions returned with non-zero exit code
Expected behavior
Both minions should still respond when one of the masters is down, just like what happens when the zeromq
transport is set.
Screenshots
If applicable, add screenshots to help explain your problem.
Versions Report
salt --versions-report
(Provided by running salt --versions-report)Salt Version:
Salt: 3002.2
Dependency Versions:
cffi: Not Installed
cherrypy: Not Installed
dateutil: Not Installed
docker-py: Not Installed
gitdb: Not Installed
gitpython: Not Installed
Jinja2: 2.11.2
libgit2: Not Installed
M2Crypto: Not Installed
Mako: Not Installed
msgpack: 1.0.0
msgpack-pure: Not Installed
mysql-python: Not Installed
pycparser: Not Installed
pycrypto: Not Installed
pycryptodome: 3.9.9
pygit2: Not Installed
Python: 3.6.8 (default, Nov 21 2019, 19:31:34)
python-gnupg: Not Installed
PyYAML: 5.3.1
PyZMQ: 20.0.0
smmap: Not Installed
timelib: Not Installed
Tornado: 4.5.3
ZMQ: 4.3.3
System Versions:
dist: centos 8 Core
locale: UTF-8
machine: x86_64
release: 5.9.10-arch1-1
system: Linux
version: CentOS Linux 8 Core