Partial Analysis of CVE-2025-38618

This vulnerability is a logic bug in vsock, and the corresponding patch commit can be found here.

There is a related vulnerability, CVE-2025-21756, in this early this year, which was also exploited in kernelCTF.

1. Root Cause Analysis

The vsock subsystem maintains socket tables for unbound and bound vsock objects, which can be accessed via the macros vsock_bound_sockets() and vsock_unbound_sockets().

#define VSOCK_HASH(addr)        ((addr)->svm_port % VSOCK_HASH_SIZE)
#define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)])
#define vsock_unbound_sockets     (&vsock_bind_table[VSOCK_HASH_SIZE])

The vsock object is inserted into unbound table [1] during creation.

static int vsock_create(struct net *net, struct socket *sock,
            int protocol, int kern)
{
    // [...]
    sk = __vsock_create(net, sock, NULL, GFP_KERNEL, 0, kern);
    vsk = vsock_sk(sk);

    vsock_insert_unbound(vsk); // [1]
    // [...]
}

static void vsock_insert_unbound(struct vsock_sock *vsk)
{
    spin_lock_bh(&vsock_table_lock);
    __vsock_insert_bound(vsock_unbound_sockets, vsk);
    spin_unlock_bh(&vsock_table_lock);
}

When binding to an address or attempting to connect to a remote server, the function __vsock_bind() is called internally. This function then calls __vsock_bind_connectible() [2] if the vsock type is SOCK_STREAM or SOCK_SEQPACKET.

static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
{
    struct vsock_sock *vsk = vsock_sk(sk);
    int retval;

    if (vsock_addr_bound(&vsk->local_addr))
        return -EINVAL;

    // [...]
    if (addr->svm_cid != VMADDR_CID_ANY && !vsock_find_cid(addr->svm_cid))
        return -EADDRNOTAVAIL;

    switch (sk->sk_socket->type) {
    case SOCK_STREAM:
    case SOCK_SEQPACKET:
        spin_lock_bh(&vsock_table_lock);
        retval = __vsock_bind_connectible(vsk, addr); // [2]
        spin_unlock_bh(&vsock_table_lock);
        break;
    }
}

__vsock_bind_connectible() first selects a random port if the port is zero [3]. If the vsock specifies VMADDR_PORT_ANY (i.e., no port), it increments the port number [4] and checks whether the port is already in use [5]. If the port is available, the vsock is bound to it, removed from the original unbound table [6], and inserted into the corresponding bound table [7].

static int __vsock_bind_connectible(struct vsock_sock *vsk,
                    struct sockaddr_vm *addr)
{
    static u32 port;
    struct sockaddr_vm new_addr;

    if (!port)
        port = get_random_u32_above(LAST_RESERVED_PORT); // [3]

    vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port);

    if (addr->svm_port == VMADDR_PORT_ANY) {
        bool found = false;
        unsigned int i;

        for (i = 0; i < MAX_PORT_RETRIES; i++) {
            if (port <= LAST_RESERVED_PORT)
                port = LAST_RESERVED_PORT + 1;

            new_addr.svm_port = port++; // [4]

            if (!__vsock_find_bound_socket(&new_addr)) { // [5]
                found = true;
                break;
            }
        }

        if (!found)
            return -EADDRNOTAVAIL;
    }
    // [...]
    vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port);
    // [...]
    __vsock_remove_bound(vsk); // [6]
    __vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk); // [7]
}

However, the port VMADDR_PORT_ANY, used to indicate an unspecified port, is defined as -1 (0xffffffff), which means that after several increments, a randomly generated port may eventually wrap around to this value and be assigned to the vsock. This assignment is clearly incorrect and is likely to cause side effects.

2. Exploit Path

According to the commit message, when connecting to a server listening on port VMADDR_PORT_ANY, the function virtio_transport_recv_listen() is invoked internally. This function creates a new child vsock [1], and its local address is initialized as the server’s [2].

static int
virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
                 struct virtio_transport *t)
{
    struct vsock_sock *vsk = vsock_sk(sk);
    
    // [...]
    child = vsock_create_connected(sk); // [1]

    // [...]
    vchild = vsock_sk(child);
    vsock_addr_init(&vchild->local_addr, le64_to_cpu(hdr->dst_cid), // [2]
            le32_to_cpu(hdr->dst_port));
    
    // [...]
}

Moreover, the newly created vsock object is not inserted into the bound table, which allows us to leverage the bind handler vsock_bind().

static int
vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
{
    int err;
    struct sock *sk;
    struct sockaddr_vm *vm_addr;

    vsock_addr_cast(addr, addr_len, &vm_addr);
    sk = sock->sk;
    // [...]
    err = __vsock_bind(sk, vm_addr); // <--------------
    // [...]
}

The address check can be bypassed because of port VMADDR_PORT_ANY [3].

static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
{
    // [...]
    if (vsock_addr_bound(&vsk->local_addr)) // [3]
        return -EINVAL;
    
    switch (sk->sk_socket->type) {
    case SOCK_STREAM:
    case SOCK_SEQPACKET:
        spin_lock_bh(&vsock_table_lock);
        retval = __vsock_bind_connectible(vsk, addr); // <--------------
        spin_unlock_bh(&vsock_table_lock);
        break;
    // [...]
    }
    // [...]
}

Finally, a situation similar to CVE-2025-21756 occurs again: a socket is not in the bound table, yet __vsock_remove_bound() is called to remove it and decrement its reference count, which results in a use-after-free.

static int __vsock_bind_connectible(struct vsock_sock *vsk,
                    struct sockaddr_vm *addr)
{
    // [...]
    __vsock_remove_bound(vsk);
    __vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk);
}

3. Problem

When trying to reproduce this vulnerability, I found that the vsock listen handler performs a check on the local address [1]. Even if the port can be assigned to VMADDR_PORT_ANY, this check invalidates the listen request, preventing the server socket from accepting connections.

static int vsock_listen(struct socket *sock, int backlog)
{
    int err;
    struct sock *sk;
    struct vsock_sock *vsk;

    sk = sock->sk;
    
    // [...]
    if (sock->state != SS_UNCONNECTED) {
        err = -EINVAL;
        goto out;
    }

    vsk = vsock_sk(sk);

    if (!vsock_addr_bound(&vsk->local_addr)) { // [1]
        err = -EINVAL;
        goto out;
    }

    sk->sk_max_ack_backlog = backlog;
    sk->sk_state = TCP_LISTEN;

    // [...]
    return err;
}

After reviewing the code, I still cannot find a way to bypass this check. I might update this post once I figure something out, or after the author submits PR to the kernelCTF repo.

Solution (Updated in 2025/10/16)

According to the post by SSD, LINUX VSOCK’S VMADDR_PORT_ANY: DYNAMIC PORT ASSIGNMENT SIMPLIFIES VM‑HOST COMMUNICATION, if we can achieve the race condition below, the accepted child socket can end up with VMADDR_PORT_ANY as its local port.

[Thread-1]                           [Thread-2]
sys_bind(server_fd)
 ...
  __vsock_bind_connectible()
   vsk->local_addr = VMADDR_PORT_ANY
                                     sys_connect(clinet_fd, dst.svm_port=VMADDR_PORT_ANY)
                                      ...
                                       virtio_transport_recv_pkt(skb)
                                        sk = vsock_find_bound_socket(&dst)

sys_bind(server_fd)
 ...
  __vsock_bind_connectible()
   vsk->local_addr = RANDOM_PORT

sys_listen(server_fd, 1)
 sk->sk_state = TCP_LISTEN

                                        virtio_transport_recv_listen(sk, skb)
                                         hdr = virtio_vsock_hdr(skb)
                                         (hdr->dst_port == VMADDR_PORT_ANY)
                                         child = vsock_create_connected(sk)
                                         vchild = vsock_sk(child)
                                         vchild->local_addr.svm_port = hdr->dst_port

This works because the local port assigned to the accepted child socket is taken from the packet header, while the listening check is performed against the fields of the vsock object.

The PoC code is here with the following diff.

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock_patch.c
index f8f1a49..ac92417 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock_patch.c
@@ -645,7 +645,8 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
        struct sockaddr_vm new_addr;

        if (!port)
-               port = get_random_u32_above(LAST_RESERVED_PORT);
+               // port = get_random_u32_above(LAST_RESERVED_PORT);
+               port = VMADDR_PORT_ANY - 2;

        vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port);

For delay,

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common_patch.c
index c57fe7d..28c4521 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common_patch.c
@@ -1437,6 +1437,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
                goto free_pkt;
        }

+       mdelay(100);
        vsk = vsock_sk(sk);

        lock_sock(sk);