Skip to content

[🐛 Bug]: nodes spawned until system is out of resources. No Sessions started #2411

@therealdjryan

Description

@therealdjryan

What happened?

After the grid is up I start one test but the test never runs. podman ps shows the number of nodes growing. The previous working version of selenium was 4.20.0

Command used to start Selenium Grid with Docker (or Kubernetes)

podman run --name=selenium-hub -d --security-opt label:disable --label io.podman.compose.config-hash=3db1e053aaef1b931176814434d519f08cc6bb687f3a4805f665d67b92687cb5 --label io.podman.compose.project=grid --label io.podman.compose.version=1.0.6 --label PODMAN_SYSTEMD_UNIT=podman-compose@grid.service --label com.docker.compose.project=grid --label com.docker.compose.project.working_dir=/stuff/src/test/resources/grid --label com.docker.compose.project.config_files=container-compose.yml --label com.docker.compose.container-number=1 --label com.docker.compose.service=selenium-hub -e GRID_MAX_SESSION=6 -e SE_NODE_MAX_INSTANCES=1 -e SE_VIDEO_INTERNAL_UPLOAD=false  - -e SE_ENABLE_TRACING=false -e SE_NODE_DOCKER_CONFIG_FILENAME=config.toml -e SE_OPTS=--registration-secret xxxxxx --log-level FINE --tracing false -e SE_START_NO_VNC=true -e SE_VNC_NO_PASSWORD=1 -e SE_VNC_VIEW_ONLY=1 -e SE_NODE_GRID_URL=http://gpuigrid:4444 -e CONTAINER_HOST=tcp://10.200.96.126:23750 -e DOCKER_HOST=tcp://10.200.96.126:23750 -e TZ=US/Eastern  -v /stuff/src/test/resources/grid/config.toml:/opt/selenium/config.toml --net grid_default --network-alias selenium-hub -p 4442:4442 -p 4443:4443 -p 4444:4444 --shm-size 2gb --privileged --restart unless-stopped --healthcheck-command /bin/sh -c /opt/bin/check-grid.sh --healthcheck-interval 10s --healthcheck-timeout 5s --healthcheck-retries 3 selenium/hub:4.25.0-20240922


version: "3.9"

services:
  node-docker:
    image: selenium/node-docker:4.25.0-20240922
    privileged: true
    container_name: "node-docker"
    shm_size: 2gb
    volumes:
      - ./config.toml:/opt/selenium/config.toml
    security_opt:
      - label:disable
    healthcheck:
      test: ["CMD", "/opt/bin/check-grid.sh", "--host", "selenium-hub"]
      interval: 10s
      timeout: 5s
      retries: 4
    restart: unless-stopped
    depends_on:
      - selenium-hub
    environment:
      - SE_VIDEO_INTERNAL_UPLOAD=false
      - SE_ENABLE_TRACING=false
      - SE_NODE_DOCKER_CONFIG_FILENAME=config.toml
      - SE_EVENT_BUS_HOST=selenium-hub
      - SE_EVENT_BUS_PUBLISH_PORT=4442
      - SE_EVENT_BUS_SUBSCRIBE_PORT=4443
      - SE_NODE_GRID_URL=http://gpuigrid:4444
      - CONTAINER_HOST=tcp://somehost:23750
      - DOCKER_HOST=tcp://somehost:23750
      - SE_SCREEN_WIDTH=1550
      - SE_SCREEN_HEIGHT=880
      - SE_START_NO_VNC=true
      - SE_VNC_NO_PASSWORD=1
      - SE_VNC_VIEW_ONLY=1
      - SE_NODE_SESSION_TIMEOUT=900
      - SE_OPTS=--registration-secret xxxxxxx --log-level FINE --tracing false --enable-managed-downloads true
      - TZ=US/Eastern
      - SE_NODE_MAX_INSTANCES=1
    ports:
      - "5555:5555"
  selenium-hub:
    image: selenium/hub:4.25.0-20240922
    container_name: selenium-hub
    privileged: true
    shm_size: 2gb
    volumes:
      - ./config.toml:/opt/selenium/config.toml
    security_opt:
      - label:disable
    environment:
      - GRID_MAX_SESSION=6  # Limit sessions per node
      - SE_NODE_MAX_INSTANCES=1  # Limit instances per node
      - SE_VIDEO_INTERNAL_UPLOAD=false  -
      - SE_ENABLE_TRACING=false
      - SE_NODE_DOCKER_CONFIG_FILENAME=config.toml
      - SE_OPTS=--registration-secret xxxxxxxx --log-level FINE --tracing false
      - SE_START_NO_VNC=true
      - SE_VNC_NO_PASSWORD=1
      - SE_VNC_VIEW_ONLY=1
      - SE_NODE_GRID_URL=http://gpuigrid:4444
      - CONTAINER_HOST=tcp://somehost:23750
      - DOCKER_HOST=tcp://somehost:23750
      - TZ=US/Eastern
    ports:
      - "4442:4442"
      - "4443:4443"
      - "4444:4444"
    healthcheck:
      test: ["CMD", "/opt/bin/check-grid.sh"]
      interval: 10s
      timeout: 5s
      retries: 3
    restart: unless-stopped

config.toml

[docker]
# Configs have a mapping between the Docker image to use and the capabilities that need to be matched to
# start a container with the given image.
configs = [
    "selenium/standalone-chrome:4.25.0-20240922", "{\"browserName\": \"chrome\", \"platformName\": \"linux\"}"
]

host-config-keys = ["Dns", "DnsOptions", "DnsSearch", "ExtraHosts", "Binds"]

# URL for connecting to the docker daemon
# host.docker.internal works for macOS and Windows.
# Linux could use --net=host in the `docker run` instruction or 172.17.0.1 in the URI below.
# To have Docker listening through tcp on macOS, install socat and run the following command
# socat -4 TCP-LISTEN:2375,fork UNIX-CONNECT:/var/run/docker.sock
url = "http://somehost:23750"
# url = "unix://var/run/docker.sock"
# Docker image used for video recording
video-image = "selenium/video:ffmpeg-7.0.2-20240922"
# Uncomment the following section if you are running the node on a separate VM
# Fill out the placeholders with appropriate values
#[server]
[node]
enable-managed-downloads = true
registration-secret = "xxxxxxx"

Relevant log output

fca1e9ad7ae1 13:05:00.686 DEBUG [MultiExchange.getExceptionalCF] - [JdkHttpClient-1-0] [142s 498ms] MultiExchange ConnectException (async): already retried once.
fca1e9ad7ae1 java.net.ConnectException
fca1e9ad7ae1 	at java.net.http/jdk.internal.net.http.common.Utils.toConnectException(Utils.java:1055)
fca1e9ad7ae1 	at java.net.http/jdk.internal.net.http.PlainHttpConnection.connectAsync(PlainHttpConnection.java:198)
fca1e9ad7ae1 	at java.net.http/jdk.internal.net.http.PlainHttpConnection.checkRetryConnect(PlainHttpConnection.java:230)
fca1e9ad7ae1 	at java.net.http/jdk.internal.net.http.PlainHttpConnection.lambda$connectAsync$1(PlainHttpConnection.java:206)
fca1e9ad7ae1 	at java.base/java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:934)
fca1e9ad7ae1 	at java.base/java.util.concurrent.CompletableFuture$UniHandle.tryFire(CompletableFuture.java:911)
fca1e9ad7ae1 	at java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:510)
fca1e9ad7ae1 	at java.base/java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1773)
fca1e9ad7ae1 	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
fca1e9ad7ae1 	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
fca1e9ad7ae1 	at java.base/java.lang.Thread.run(Thread.java:840)
fca1e9ad7ae1 Caused by: java.nio.channels.ClosedChannelException
fca1e9ad7ae1 	at java.base/sun.nio.ch.SocketChannelImpl.ensureOpen(SocketChannelImpl.java:195)
fca1e9ad7ae1 	at java.base/sun.nio.ch.SocketChannelImpl.beginConnect(SocketChannelImpl.java:760)
fca1e9ad7ae1 	at java.base/sun.nio.ch.SocketChannelImpl.connect(SocketChannelImpl.java:848)
fca1e9ad7ae1 	at java.net.http/jdk.internal.net.http.PlainHttpConnection.lambda$connectAsync$0(PlainHttpConnection.java:183)
fca1e9ad7ae1 	at java.base/java.security.AccessController.doPrivileged(AccessController.java:569)
fca1e9ad7ae1 	at java.net.http/jdk.internal.net.http.PlainHttpConnection.connectAsync(PlainHttpConnection.java:185)
fca1e9ad7ae1 	... 9 more

Operating System

Red Hat Enterprise Linux 8.4 (Ootpa)

Docker Selenium version (image tag)

4.25.0-20240922

Selenium Grid chart version (chart version)

N/A

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions