Skip to content

NN

hyperbench.nn

HyperedgeAggregator

Pool node embeddings into hyperedge embeddings using the incidence structure.

Each node-hyperedge incidence selects one node embedding row, then reduces those rows per hyperedge with the requested scatter aggregation.

Parameters:

Name Type Description Default
hyperedge_index Tensor

Hyperedge incidence in COO format of size (2, num_incidences).

required
node_embeddings Tensor

Node embedding matrix of size (num_nodes, num_channels).

required
num_hyperedges int | None

Optional explicit hyperedge count. When provided, the pooled output preserves empty hyperedges that do not appear in hyperedge_index.

None
Source code in hyperbench/nn/aggregator.py
class HyperedgeAggregator:
    """
    Pool node embeddings into hyperedge embeddings using the incidence structure.

    Each node-hyperedge incidence selects one node embedding row, then reduces
    those rows per hyperedge with the requested scatter aggregation.

    Args:
        hyperedge_index: Hyperedge incidence in COO format of size ``(2, num_incidences)``.
        node_embeddings: Node embedding matrix of size ``(num_nodes, num_channels)``.
        num_hyperedges: Optional explicit hyperedge count.
            When provided, the pooled output preserves empty hyperedges that do not appear in ``hyperedge_index``.
    """

    def __init__(
        self,
        hyperedge_index: Tensor,
        node_embeddings: Tensor,
        num_hyperedges: int | None = None,
    ):
        self.hyperedge_index_wrapper = HyperedgeIndex(hyperedge_index)
        self.node_embeddings = node_embeddings
        self.num_hyperedges = num_hyperedges

    def pool(self, aggregation: Literal["maxmin", "max", "min", "mean", "mul", "sum"]) -> Tensor:
        """
        Aggregate node embeddings for each hyperedge.

        ``hyperedge_index`` is the COO encoding of the nonzero entries of ``H``,
        so ``hyperedge_index[0, k] = v`` and ``hyperedge_index[1, k] = e`` means ``H[v, e] = 1`` for incidence ``k``.

        Let ``H`` be the binary incidence matrix of shape ``(num_nodes, num_hyperedges)``
        and let ``X`` be the node embedding matrix of shape ``(num_nodes, num_channels)``.
        This method pools node features into hyperedge features using the incidence pattern in ``H``:
        - ``aggregation="sum"`` computes the equivalent of the standard sparse matrix product ``H^T X``.
        - ``aggregation="mean"`` computes ``D_e^{-1} H^T X``, where ``D_e[e, e] = sum_v H[v, e]`` is the hyperedge cardinality matrix.
        - ``aggregation in {"max", "min", "mul"}`` uses the same sparsity pattern as ``H^T X``,
          but replaces the summation over incident nodes with a channel-wise ``max``, ``min``, or product reduction.
        - ``aggregation="maxmin"`` computes the channel-wise range ``max - min`` for each hyperedge.

        Examples:
            >>> hyperedge_index = [[0, 1, 2, 2, 3],
            ...                    [0, 0, 0, 1, 1]]
            >>> node_embeddings = [[1, 10], [2, 20], [3, 30], [4, 40]]
            >>> HyperedgeAggregator(hyperedge_index, node_embeddings).pool("mean")
            ... [[2, 20], [3.5, 35]]
            >>> HyperedgeAggregator(hyperedge_index, node_embeddings).pool("sum")
            ... [[6, 60], [7, 70]]
            >>> HyperedgeAggregator(hyperedge_index, node_embeddings).pool("max")
            ... [[3, 30], [4, 40]]
            >>> HyperedgeAggregator(hyperedge_index, node_embeddings).pool("maxmin")
            ... [[2, 20], [1, 10]]

        Args:
            aggregation: Reduction applied across the nodes belonging to each hyperedge.

        Returns:
            A hyperedge embedding matrix of shape ``(num_hyperedges, num_channels)``.
        """
        # Gather the embeddings for each incidence.
        # A node appearing in multiple hyperedges is repeated, once per incidence.
        # Example: node_embeddings = [[1, 10],  # node 0
        #                             [2, 20],  # node 1
        #                             [3, 30],  # node 2
        #                             [4, 40]]  # node 3
        #          -> all_node_ids = [0, 1, 2, 2, 3]
        #          -> incidence_node_embeddings = [[1, 10],  # node 0 for hyperedge 0
        #                                          [2, 20],  # node 1 for hyperedge 0
        #                                          [3, 30],  # node 2 for hyperedge 0
        #                                          [3, 30],  # node 2 for hyperedge 1
        #                                          [4, 40]]  # node 3 for hyperedge 1
        #             shape: (num_incidences, num_channels)
        incidence_node_embeddings = self.node_embeddings[self.hyperedge_index_wrapper.all_node_ids]

        # Scatter-aggregate node embeddings into hyperedge embeddings.
        # Example: with aggregation="sum":
        #          [[1+2+3, 10+20+30],  # hyperedge 0 contains node 0, 1, 2
        #          [3+4, 30+40]]        # hyperedge 1 contains node 2, 3
        #          shape: (num_hyperedges, num_channels)
        #          with aggregation="max":
        #          [[max(1, 2, 3), max(10, 20, 30)],  # hyperedge 0 contains node 0, 1, 2
        #           [max(3, 4), max(30, 40)]]         # hyperedge 1 contains node 2, 3
        #          shape: (num_hyperedges, num_channels)
        num_hyperedges = (
            self.num_hyperedges
            if self.num_hyperedges is not None
            else self.hyperedge_index_wrapper.num_hyperedges
        )

        if aggregation == "maxmin":
            return maxmin_scatter(
                src=incidence_node_embeddings,
                index=self.hyperedge_index_wrapper.all_hyperedge_ids,
                dim=0,  # scatter along the hyperedge dimension
                dim_size=num_hyperedges,
            )

        return scatter(
            src=incidence_node_embeddings,
            index=self.hyperedge_index_wrapper.all_hyperedge_ids,
            dim=0,  # scatter along the hyperedge dimension
            dim_size=num_hyperedges,
            reduce=aggregation,
        )

pool(aggregation)

Aggregate node embeddings for each hyperedge.

hyperedge_index is the COO encoding of the nonzero entries of H, so hyperedge_index[0, k] = v and hyperedge_index[1, k] = e means H[v, e] = 1 for incidence k.

Let H be the binary incidence matrix of shape (num_nodes, num_hyperedges) and let X be the node embedding matrix of shape (num_nodes, num_channels). This method pools node features into hyperedge features using the incidence pattern in H: - aggregation="sum" computes the equivalent of the standard sparse matrix product H^T X. - aggregation="mean" computes D_e^{-1} H^T X, where D_e[e, e] = sum_v H[v, e] is the hyperedge cardinality matrix. - aggregation in {"max", "min", "mul"} uses the same sparsity pattern as H^T X, but replaces the summation over incident nodes with a channel-wise max, min, or product reduction. - aggregation="maxmin" computes the channel-wise range max - min for each hyperedge.

Examples:

>>> hyperedge_index = [[0, 1, 2, 2, 3],
...                    [0, 0, 0, 1, 1]]
>>> node_embeddings = [[1, 10], [2, 20], [3, 30], [4, 40]]
>>> HyperedgeAggregator(hyperedge_index, node_embeddings).pool("mean")
... [[2, 20], [3.5, 35]]
>>> HyperedgeAggregator(hyperedge_index, node_embeddings).pool("sum")
... [[6, 60], [7, 70]]
>>> HyperedgeAggregator(hyperedge_index, node_embeddings).pool("max")
... [[3, 30], [4, 40]]
>>> HyperedgeAggregator(hyperedge_index, node_embeddings).pool("maxmin")
... [[2, 20], [1, 10]]

Parameters:

Name Type Description Default
aggregation Literal['maxmin', 'max', 'min', 'mean', 'mul', 'sum']

Reduction applied across the nodes belonging to each hyperedge.

required

Returns:

Type Description
Tensor

A hyperedge embedding matrix of shape (num_hyperedges, num_channels).

Source code in hyperbench/nn/aggregator.py
def pool(self, aggregation: Literal["maxmin", "max", "min", "mean", "mul", "sum"]) -> Tensor:
    """
    Aggregate node embeddings for each hyperedge.

    ``hyperedge_index`` is the COO encoding of the nonzero entries of ``H``,
    so ``hyperedge_index[0, k] = v`` and ``hyperedge_index[1, k] = e`` means ``H[v, e] = 1`` for incidence ``k``.

    Let ``H`` be the binary incidence matrix of shape ``(num_nodes, num_hyperedges)``
    and let ``X`` be the node embedding matrix of shape ``(num_nodes, num_channels)``.
    This method pools node features into hyperedge features using the incidence pattern in ``H``:
    - ``aggregation="sum"`` computes the equivalent of the standard sparse matrix product ``H^T X``.
    - ``aggregation="mean"`` computes ``D_e^{-1} H^T X``, where ``D_e[e, e] = sum_v H[v, e]`` is the hyperedge cardinality matrix.
    - ``aggregation in {"max", "min", "mul"}`` uses the same sparsity pattern as ``H^T X``,
      but replaces the summation over incident nodes with a channel-wise ``max``, ``min``, or product reduction.
    - ``aggregation="maxmin"`` computes the channel-wise range ``max - min`` for each hyperedge.

    Examples:
        >>> hyperedge_index = [[0, 1, 2, 2, 3],
        ...                    [0, 0, 0, 1, 1]]
        >>> node_embeddings = [[1, 10], [2, 20], [3, 30], [4, 40]]
        >>> HyperedgeAggregator(hyperedge_index, node_embeddings).pool("mean")
        ... [[2, 20], [3.5, 35]]
        >>> HyperedgeAggregator(hyperedge_index, node_embeddings).pool("sum")
        ... [[6, 60], [7, 70]]
        >>> HyperedgeAggregator(hyperedge_index, node_embeddings).pool("max")
        ... [[3, 30], [4, 40]]
        >>> HyperedgeAggregator(hyperedge_index, node_embeddings).pool("maxmin")
        ... [[2, 20], [1, 10]]

    Args:
        aggregation: Reduction applied across the nodes belonging to each hyperedge.

    Returns:
        A hyperedge embedding matrix of shape ``(num_hyperedges, num_channels)``.
    """
    # Gather the embeddings for each incidence.
    # A node appearing in multiple hyperedges is repeated, once per incidence.
    # Example: node_embeddings = [[1, 10],  # node 0
    #                             [2, 20],  # node 1
    #                             [3, 30],  # node 2
    #                             [4, 40]]  # node 3
    #          -> all_node_ids = [0, 1, 2, 2, 3]
    #          -> incidence_node_embeddings = [[1, 10],  # node 0 for hyperedge 0
    #                                          [2, 20],  # node 1 for hyperedge 0
    #                                          [3, 30],  # node 2 for hyperedge 0
    #                                          [3, 30],  # node 2 for hyperedge 1
    #                                          [4, 40]]  # node 3 for hyperedge 1
    #             shape: (num_incidences, num_channels)
    incidence_node_embeddings = self.node_embeddings[self.hyperedge_index_wrapper.all_node_ids]

    # Scatter-aggregate node embeddings into hyperedge embeddings.
    # Example: with aggregation="sum":
    #          [[1+2+3, 10+20+30],  # hyperedge 0 contains node 0, 1, 2
    #          [3+4, 30+40]]        # hyperedge 1 contains node 2, 3
    #          shape: (num_hyperedges, num_channels)
    #          with aggregation="max":
    #          [[max(1, 2, 3), max(10, 20, 30)],  # hyperedge 0 contains node 0, 1, 2
    #           [max(3, 4), max(30, 40)]]         # hyperedge 1 contains node 2, 3
    #          shape: (num_hyperedges, num_channels)
    num_hyperedges = (
        self.num_hyperedges
        if self.num_hyperedges is not None
        else self.hyperedge_index_wrapper.num_hyperedges
    )

    if aggregation == "maxmin":
        return maxmin_scatter(
            src=incidence_node_embeddings,
            index=self.hyperedge_index_wrapper.all_hyperedge_ids,
            dim=0,  # scatter along the hyperedge dimension
            dim_size=num_hyperedges,
        )

    return scatter(
        src=incidence_node_embeddings,
        index=self.hyperedge_index_wrapper.all_hyperedge_ids,
        dim=0,  # scatter along the hyperedge dimension
        dim_size=num_hyperedges,
        reduce=aggregation,
    )

NodeAggregator

Pool hyperedge embeddings into node embeddings using the incidence structure.

Each node-hyperedge incidence selects one hyperedge embedding row, then reduces those rows per node with the requested scatter aggregation.

Parameters:

Name Type Description Default
hyperedge_index Tensor

Hyperedge incidence in COO format of size (2, num_incidences).

required
hyperedge_embeddings Tensor

Hyperedge embedding matrix of size (num_hyperedges, num_channels).

required
num_nodes int | None

Optional explicit node count. When provided, the pooled output preserves isolated nodes that do not appear in hyperedge_index.

None
Source code in hyperbench/nn/aggregator.py
class NodeAggregator:
    """
    Pool hyperedge embeddings into node embeddings using the incidence structure.

    Each node-hyperedge incidence selects one hyperedge embedding row, then
    reduces those rows per node with the requested scatter aggregation.

    Args:
        hyperedge_index: Hyperedge incidence in COO format of size ``(2, num_incidences)``.
        hyperedge_embeddings: Hyperedge embedding matrix of size ``(num_hyperedges, num_channels)``.
        num_nodes: Optional explicit node count. When provided, the pooled output preserves isolated nodes that do not appear in ``hyperedge_index``.
    """

    def __init__(
        self,
        hyperedge_index: Tensor,
        hyperedge_embeddings: Tensor,
        num_nodes: int | None = None,
    ):
        self.hyperedge_index_wrapper = HyperedgeIndex(hyperedge_index)
        self.hyperedge_embeddings = hyperedge_embeddings
        self.num_nodes = num_nodes

    def pool(self, aggregation: Literal["maxmin", "max", "min", "mean", "mul", "sum"]) -> Tensor:
        """
        Aggregate hyperedge embeddings for each node.

        ``hyperedge_index`` is the COO encoding of the nonzero entries of ``H``,
        so ``hyperedge_index[0, k] = v`` and ``hyperedge_index[1, k] = e`` means ``H[v, e] = 1`` for incidence ``k``.

        Let ``H`` be the incidence matrix of shape ``(num_nodes, num_hyperedges)``
        and let ``E`` be the hyperedge embedding matrix of shape ``(num_hyperedges, num_channels)``.
        This method pools hyperedge features into node features using the incidence pattern in ``H``:
        - ``aggregation="sum"`` computes the equivalent of the standard sparse matrix product ``H E``.
        - ``aggregation="mean"`` computes ``D_v^{-1} H E``, where ``D_v[v, v] = sum_e H[v, e]`` is the node degree matrix.
        - ``aggregation in {"max", "min", "mul"}`` uses the same sparsity pattern as ``H E``,
          but replaces the summation over incident hyperedges with a channel-wise ``max``, ``min``, or product reduction.

        Examples:
            >>> hyperedge_index = [[0, 1, 1, 2],
            ...                    [0, 0, 1, 1]]
            >>> hyperedge_embeddings = [[10, 100], [20, 200]]
            >>> NodeAggregator(hyperedge_index, hyperedge_embeddings).pool("mean")
            ... [[10, 100], [15, 150], [20, 200]]
            >>> NodeAggregator(hyperedge_index, hyperedge_embeddings).pool("sum")
            ... [[10, 100], [30, 300], [20, 200]]
            >>> NodeAggregator(hyperedge_index, hyperedge_embeddings).pool("max")
            ... [[10, 100], [20, 200], [20, 200]]

        Args:
            aggregation: Reduction applied across the hyperedges incident to each node.

        Returns:
            A node embedding matrix of shape ``(num_nodes, num_channels)``.
        """
        # Gather the embeddings for each incidence.
        # A hyperedge appearing in multiple node incidences is repeated, once per incidence.
        # Example: hyperedge_embeddings = [[10, 100],  # hyperedge 0
        #                                  [20, 200]]  # hyperedge 1
        #          -> all_hyperedge_ids = [0, 0, 1, 1]
        #          -> incidence_hyperedge_embeddings = [[10, 100],   # hyperedge 0 for node 0
        #                                               [10, 100],   # hyperedge 0 for node 1
        #                                               [20, 200],   # hyperedge 1 for node 1
        #                                               [20, 200]]   # hyperedge 1 for node 2
        #             shape: (num_incidences, num_channels)
        incidence_hyperedge_embeddings = self.hyperedge_embeddings[
            self.hyperedge_index_wrapper.all_hyperedge_ids
        ]
        num_nodes = (
            self.num_nodes if self.num_nodes is not None else self.hyperedge_index_wrapper.num_nodes
        )

        if aggregation == "maxmin":
            return maxmin_scatter(
                src=incidence_hyperedge_embeddings,
                index=self.hyperedge_index_wrapper.all_node_ids,
                dim=0,  # scatter along the node dimension
                dim_size=num_nodes,
            )

        # Scatter-aggregate hyperedge embeddings into node embeddings.
        # Example: with aggregation="sum":
        #          [[10, 100],         # node 0 belongs to hyperedge 0
        #           [10+20, 100+200],  # node 1 belongs to hyperedge 0 and 1
        #           [20, 200]]         # node 2 belongs to hyperedge 1
        #          shape: (num_nodes, num_channels)
        #          with aggregation="max":
        #          [[10, 100],                     # node 0 belongs to hyperedge 0
        #           [max(10, 20), max(100, 200)],  # node 1 belongs to hyperedge 0 and 1
        #           [20, 200]]                     # node 2 belongs to hyperedge 1
        #         shape: (num_nodes, num_channels)
        return scatter(
            src=incidence_hyperedge_embeddings,
            index=self.hyperedge_index_wrapper.all_node_ids,
            dim=0,  # scatter along the node dimension
            dim_size=num_nodes,
            reduce=aggregation,
        )

pool(aggregation)

Aggregate hyperedge embeddings for each node.

hyperedge_index is the COO encoding of the nonzero entries of H, so hyperedge_index[0, k] = v and hyperedge_index[1, k] = e means H[v, e] = 1 for incidence k.

Let H be the incidence matrix of shape (num_nodes, num_hyperedges) and let E be the hyperedge embedding matrix of shape (num_hyperedges, num_channels). This method pools hyperedge features into node features using the incidence pattern in H: - aggregation="sum" computes the equivalent of the standard sparse matrix product H E. - aggregation="mean" computes D_v^{-1} H E, where D_v[v, v] = sum_e H[v, e] is the node degree matrix. - aggregation in {"max", "min", "mul"} uses the same sparsity pattern as H E, but replaces the summation over incident hyperedges with a channel-wise max, min, or product reduction.

Examples:

>>> hyperedge_index = [[0, 1, 1, 2],
...                    [0, 0, 1, 1]]
>>> hyperedge_embeddings = [[10, 100], [20, 200]]
>>> NodeAggregator(hyperedge_index, hyperedge_embeddings).pool("mean")
... [[10, 100], [15, 150], [20, 200]]
>>> NodeAggregator(hyperedge_index, hyperedge_embeddings).pool("sum")
... [[10, 100], [30, 300], [20, 200]]
>>> NodeAggregator(hyperedge_index, hyperedge_embeddings).pool("max")
... [[10, 100], [20, 200], [20, 200]]

Parameters:

Name Type Description Default
aggregation Literal['maxmin', 'max', 'min', 'mean', 'mul', 'sum']

Reduction applied across the hyperedges incident to each node.

required

Returns:

Type Description
Tensor

A node embedding matrix of shape (num_nodes, num_channels).

Source code in hyperbench/nn/aggregator.py
def pool(self, aggregation: Literal["maxmin", "max", "min", "mean", "mul", "sum"]) -> Tensor:
    """
    Aggregate hyperedge embeddings for each node.

    ``hyperedge_index`` is the COO encoding of the nonzero entries of ``H``,
    so ``hyperedge_index[0, k] = v`` and ``hyperedge_index[1, k] = e`` means ``H[v, e] = 1`` for incidence ``k``.

    Let ``H`` be the incidence matrix of shape ``(num_nodes, num_hyperedges)``
    and let ``E`` be the hyperedge embedding matrix of shape ``(num_hyperedges, num_channels)``.
    This method pools hyperedge features into node features using the incidence pattern in ``H``:
    - ``aggregation="sum"`` computes the equivalent of the standard sparse matrix product ``H E``.
    - ``aggregation="mean"`` computes ``D_v^{-1} H E``, where ``D_v[v, v] = sum_e H[v, e]`` is the node degree matrix.
    - ``aggregation in {"max", "min", "mul"}`` uses the same sparsity pattern as ``H E``,
      but replaces the summation over incident hyperedges with a channel-wise ``max``, ``min``, or product reduction.

    Examples:
        >>> hyperedge_index = [[0, 1, 1, 2],
        ...                    [0, 0, 1, 1]]
        >>> hyperedge_embeddings = [[10, 100], [20, 200]]
        >>> NodeAggregator(hyperedge_index, hyperedge_embeddings).pool("mean")
        ... [[10, 100], [15, 150], [20, 200]]
        >>> NodeAggregator(hyperedge_index, hyperedge_embeddings).pool("sum")
        ... [[10, 100], [30, 300], [20, 200]]
        >>> NodeAggregator(hyperedge_index, hyperedge_embeddings).pool("max")
        ... [[10, 100], [20, 200], [20, 200]]

    Args:
        aggregation: Reduction applied across the hyperedges incident to each node.

    Returns:
        A node embedding matrix of shape ``(num_nodes, num_channels)``.
    """
    # Gather the embeddings for each incidence.
    # A hyperedge appearing in multiple node incidences is repeated, once per incidence.
    # Example: hyperedge_embeddings = [[10, 100],  # hyperedge 0
    #                                  [20, 200]]  # hyperedge 1
    #          -> all_hyperedge_ids = [0, 0, 1, 1]
    #          -> incidence_hyperedge_embeddings = [[10, 100],   # hyperedge 0 for node 0
    #                                               [10, 100],   # hyperedge 0 for node 1
    #                                               [20, 200],   # hyperedge 1 for node 1
    #                                               [20, 200]]   # hyperedge 1 for node 2
    #             shape: (num_incidences, num_channels)
    incidence_hyperedge_embeddings = self.hyperedge_embeddings[
        self.hyperedge_index_wrapper.all_hyperedge_ids
    ]
    num_nodes = (
        self.num_nodes if self.num_nodes is not None else self.hyperedge_index_wrapper.num_nodes
    )

    if aggregation == "maxmin":
        return maxmin_scatter(
            src=incidence_hyperedge_embeddings,
            index=self.hyperedge_index_wrapper.all_node_ids,
            dim=0,  # scatter along the node dimension
            dim_size=num_nodes,
        )

    # Scatter-aggregate hyperedge embeddings into node embeddings.
    # Example: with aggregation="sum":
    #          [[10, 100],         # node 0 belongs to hyperedge 0
    #           [10+20, 100+200],  # node 1 belongs to hyperedge 0 and 1
    #           [20, 200]]         # node 2 belongs to hyperedge 1
    #          shape: (num_nodes, num_channels)
    #          with aggregation="max":
    #          [[10, 100],                     # node 0 belongs to hyperedge 0
    #           [max(10, 20), max(100, 200)],  # node 1 belongs to hyperedge 0 and 1
    #           [20, 200]]                     # node 2 belongs to hyperedge 1
    #         shape: (num_nodes, num_channels)
    return scatter(
        src=incidence_hyperedge_embeddings,
        index=self.hyperedge_index_wrapper.all_node_ids,
        dim=0,  # scatter along the node dimension
        dim_size=num_nodes,
        reduce=aggregation,
    )

HGNNConv

Bases: Module

The HGNNConv layer proposed in Hypergraph Neural Networks <https://arxiv.org/pdf/1809.09401>_ paper (AAAI 2019). Reference implementation: source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/nn/convs/hypergraphs/hgnn_conv.html#HGNNConv>_.

Each layer performs: X' = sigma(L_HGNN X Theta) where L_HGNN = D_n^{-1/2} H D_e^{-1} H^T D_n^{-1/2} is the hypergraph Laplacian computed from the incidence matrix H. This smooths node features through the hypergraph structure (nodes -> hyperedges -> nodes) without reducing to a pairwise graph.

Unlike HyperGCNConv, which uses a GCN Laplacian on a graph reduced from the hypergraph, HGNNConv operates entirely in hypergraph space and preserves all higher-order relationships.

Parameters:

Name Type Description Default
in_channels int

The number of input channels.

required
out_channels int

The number of output channels.

required
bias bool

If set to False, the layer will not learn the bias parameter. Defaults to True.

True
use_batch_normalization bool

If set to True, the layer will use batch normalization. Defaults to False.

False
drop_rate float

If set to a positive number, the layer will use dropout. Defaults to 0.5.

0.5
is_last bool

If set to True, the layer will not apply the final activation and dropout functions. Defaults to False.

False
Source code in hyperbench/nn/conv.py
class HGNNConv(nn.Module):
    """
    The HGNNConv layer proposed in `Hypergraph Neural Networks <https://arxiv.org/pdf/1809.09401>`_ paper (AAAI 2019).
    Reference implementation: `source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/nn/convs/hypergraphs/hgnn_conv.html#HGNNConv>`_.

    Each layer performs: ``X' = sigma(L_HGNN X Theta)`` where ``L_HGNN = D_n^{-1/2} H D_e^{-1} H^T D_n^{-1/2}``
    is the hypergraph Laplacian computed from the incidence matrix H. This smooths node features through
    the hypergraph structure (nodes -> hyperedges -> nodes) without reducing to a pairwise graph.

    Unlike ``HyperGCNConv``, which uses a GCN Laplacian on a graph reduced from the hypergraph,
    ``HGNNConv`` operates entirely in hypergraph space and preserves all higher-order relationships.

    Args:
        in_channels: The number of input channels.
        out_channels: The number of output channels.
        bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
        use_batch_normalization: If set to ``True``, the layer will use batch normalization. Defaults to ``False``.
        drop_rate: If set to a positive number, the layer will use dropout. Defaults to ``0.5``.
        is_last: If set to ``True``, the layer will not apply the final activation and dropout functions. Defaults to ``False``.
    """

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        bias: bool = True,
        use_batch_normalization: bool = False,
        drop_rate: float = 0.5,
        is_last: bool = False,
    ):
        super().__init__()
        self.is_last = is_last
        self.batch_norm_1d = nn.BatchNorm1d(out_channels) if use_batch_normalization else None
        self.activation_fn = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(drop_rate)
        self.theta = nn.Linear(in_channels, out_channels, bias=bias)

    def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
        """
        Apply one HGNN convolution layer: project features, smooth via hypergraph Laplacian,
        then apply activation, batch norm, and dropout (unless this is the last layer).

        The full per-layer formula is:
            ``X' = sigma( D_n^{-1/2} H D_e^{-1} H^T D_n^{-1/2} (X Theta) )``

        where the Laplacian ``L = D_n^{-1/2} H D_e^{-1} H^T D_n^{-1/2}`` is computed from
        the hyperedge_index and can be passed in precomputed as ``hgnn_laplacian_matrix``
        for efficiency when the hypergraph structure does not change across forward passes.

        Args:
            x: Input node feature matrix of size ``(num_nodes, in_channels)``.
            hyperedge_index: Hyperedge incidence in COO format of size ``(2, num_incidences)``.

        Returns:
            The output node feature matrix of size ``(num_nodes, out_channels)``.
        """
        x = self.theta(x)

        smoothing_matrix = HyperedgeIndex(hyperedge_index).get_sparse_hgnn_smoothing_matrix(
            num_nodes=x.size(0),
        )
        x = Hypergraph.smoothing_with_matrix(x, smoothing_matrix)

        if not self.is_last:
            x = self.activation_fn(x)
            if self.batch_norm_1d is not None:
                x = self.batch_norm_1d(x)
            x = self.dropout(x)

        return x

forward(x, hyperedge_index)

Apply one HGNN convolution layer: project features, smooth via hypergraph Laplacian, then apply activation, batch norm, and dropout (unless this is the last layer).

The full per-layer formula is

X' = sigma( D_n^{-1/2} H D_e^{-1} H^T D_n^{-1/2} (X Theta) )

where the Laplacian L = D_n^{-1/2} H D_e^{-1} H^T D_n^{-1/2} is computed from the hyperedge_index and can be passed in precomputed as hgnn_laplacian_matrix for efficiency when the hypergraph structure does not change across forward passes.

Parameters:

Name Type Description Default
x Tensor

Input node feature matrix of size (num_nodes, in_channels).

required
hyperedge_index Tensor

Hyperedge incidence in COO format of size (2, num_incidences).

required

Returns:

Type Description
Tensor

The output node feature matrix of size (num_nodes, out_channels).

Source code in hyperbench/nn/conv.py
def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
    """
    Apply one HGNN convolution layer: project features, smooth via hypergraph Laplacian,
    then apply activation, batch norm, and dropout (unless this is the last layer).

    The full per-layer formula is:
        ``X' = sigma( D_n^{-1/2} H D_e^{-1} H^T D_n^{-1/2} (X Theta) )``

    where the Laplacian ``L = D_n^{-1/2} H D_e^{-1} H^T D_n^{-1/2}`` is computed from
    the hyperedge_index and can be passed in precomputed as ``hgnn_laplacian_matrix``
    for efficiency when the hypergraph structure does not change across forward passes.

    Args:
        x: Input node feature matrix of size ``(num_nodes, in_channels)``.
        hyperedge_index: Hyperedge incidence in COO format of size ``(2, num_incidences)``.

    Returns:
        The output node feature matrix of size ``(num_nodes, out_channels)``.
    """
    x = self.theta(x)

    smoothing_matrix = HyperedgeIndex(hyperedge_index).get_sparse_hgnn_smoothing_matrix(
        num_nodes=x.size(0),
    )
    x = Hypergraph.smoothing_with_matrix(x, smoothing_matrix)

    if not self.is_last:
        x = self.activation_fn(x)
        if self.batch_norm_1d is not None:
            x = self.batch_norm_1d(x)
        x = self.dropout(x)

    return x

HGNNPConv

Bases: Module

The HGNNPConv layer proposed in HGNN+: General Hypergraph Neural Networks <https://ieeexplore.ieee.org/document/9795251>_ paper (IEEE T-PAMI 2022). Reference implementation: source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/nn/convs/hypergraphs/hgnnp_conv.html#HGNNPConv>_.

Each layer performs: X' = sigma(M_HGNN+ X Theta) where M_HGNN+ = D_v^{-1} H D_e^{-1} H^T is the HGNN+ smoothing matrix.

Unlike HGNNConv, which uses symmetric D_v^{-1/2} normalization for a spectral Laplacian, HGNNPConv uses plain inverse degrees and performs two-stage mean aggregation: nodes -> hyperedges -> nodes.

Parameters:

Name Type Description Default
in_channels int

The number of input channels.

required
out_channels int

The number of output channels.

required
bias bool

If set to False, the layer will not learn the bias parameter. Defaults to True.

True
use_batch_normalization bool

If set to True, the layer will use batch normalization. Defaults to False.

False
drop_rate float

If set to a positive number, the layer will use dropout. Defaults to 0.5.

0.5
is_last bool

If set to True, the layer will not apply the final activation and dropout functions. Defaults to False.

False
Source code in hyperbench/nn/conv.py
class HGNNPConv(nn.Module):
    """
    The HGNNPConv layer proposed in `HGNN+: General Hypergraph Neural Networks <https://ieeexplore.ieee.org/document/9795251>`_ paper (IEEE T-PAMI 2022).
    Reference implementation: `source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/nn/convs/hypergraphs/hgnnp_conv.html#HGNNPConv>`_.

    Each layer performs: ``X' = sigma(M_HGNN+ X Theta)`` where
    ``M_HGNN+ = D_v^{-1} H D_e^{-1} H^T`` is the HGNN+ smoothing matrix.

    Unlike ``HGNNConv``, which uses symmetric ``D_v^{-1/2}`` normalization for a
    spectral Laplacian, ``HGNNPConv`` uses plain inverse degrees and performs
    two-stage mean aggregation: nodes -> hyperedges -> nodes.

    Args:
        in_channels: The number of input channels.
        out_channels: The number of output channels.
        bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
        use_batch_normalization: If set to ``True``, the layer will use batch normalization. Defaults to ``False``.
        drop_rate: If set to a positive number, the layer will use dropout. Defaults to ``0.5``.
        is_last: If set to ``True``, the layer will not apply the final activation and dropout functions. Defaults to ``False``.
    """

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        bias: bool = True,
        use_batch_normalization: bool = False,
        drop_rate: float = 0.5,
        is_last: bool = False,
    ):
        super().__init__()
        self.is_last = is_last
        self.batch_norm_1d = nn.BatchNorm1d(out_channels) if use_batch_normalization else None
        self.activation_fn = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(drop_rate)
        self.theta = nn.Linear(in_channels, out_channels, bias=bias)

    def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
        """
        Apply one HGNN+ convolution layer using row-stochastic hypergraph smoothing.

        The full per-layer formula is:
            ``X' = sigma( D_v^{-1} H D_e^{-1} H^T (X Theta) )``

        Args:
            x: Input node feature matrix of size ``(num_nodes, in_channels)``.
            hyperedge_index: Hyperedge incidence in COO format of size ``(2, num_incidences)``.

        Returns:
            The output node feature matrix of size ``(num_nodes, out_channels)``.
        """
        x = self.theta(x)

        smoothing_matrix = HyperedgeIndex(hyperedge_index).get_sparse_hgnnp_smoothing_matrix(
            num_nodes=x.size(0),
        )
        x = Hypergraph.smoothing_with_matrix(x, smoothing_matrix)

        if not self.is_last:
            x = self.activation_fn(x)
            if self.batch_norm_1d is not None:
                x = self.batch_norm_1d(x)
            x = self.dropout(x)

        return x

forward(x, hyperedge_index)

Apply one HGNN+ convolution layer using row-stochastic hypergraph smoothing.

The full per-layer formula is

X' = sigma( D_v^{-1} H D_e^{-1} H^T (X Theta) )

Parameters:

Name Type Description Default
x Tensor

Input node feature matrix of size (num_nodes, in_channels).

required
hyperedge_index Tensor

Hyperedge incidence in COO format of size (2, num_incidences).

required

Returns:

Type Description
Tensor

The output node feature matrix of size (num_nodes, out_channels).

Source code in hyperbench/nn/conv.py
def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
    """
    Apply one HGNN+ convolution layer using row-stochastic hypergraph smoothing.

    The full per-layer formula is:
        ``X' = sigma( D_v^{-1} H D_e^{-1} H^T (X Theta) )``

    Args:
        x: Input node feature matrix of size ``(num_nodes, in_channels)``.
        hyperedge_index: Hyperedge incidence in COO format of size ``(2, num_incidences)``.

    Returns:
        The output node feature matrix of size ``(num_nodes, out_channels)``.
    """
    x = self.theta(x)

    smoothing_matrix = HyperedgeIndex(hyperedge_index).get_sparse_hgnnp_smoothing_matrix(
        num_nodes=x.size(0),
    )
    x = Hypergraph.smoothing_with_matrix(x, smoothing_matrix)

    if not self.is_last:
        x = self.activation_fn(x)
        if self.batch_norm_1d is not None:
            x = self.batch_norm_1d(x)
        x = self.dropout(x)

    return x

HNHNConv

Bases: Module

The HNHNConv layer proposed in HNHN: Hypergraph Networks with Hyperedge Neurons <https://arxiv.org/abs/2006.12278>_ paper. Reference implementation: source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/nn/convs/hypergraphs/hnhn_conv.html#HNHNConv>_.

Parameters:

Name Type Description Default
in_channels int

The number of input channels.

required
out_channels int

The number of output channels.

required
bias bool

If set to False, the layer will not learn the bias parameter. Defaults to True.

True
use_batch_normalization bool

If set to True, the layer will use batch normalization. Defaults to False.

False
drop_rate float

If set to a positive number, the layer will use dropout. Defaults to 0.5.

0.5
is_last bool

If set to True, the layer will not apply the final activation and dropout functions. Defaults to False.

False
Source code in hyperbench/nn/conv.py
class HNHNConv(nn.Module):
    """
    The HNHNConv layer proposed in `HNHN: Hypergraph Networks with Hyperedge Neurons <https://arxiv.org/abs/2006.12278>`_ paper.
    Reference implementation: `source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/nn/convs/hypergraphs/hnhn_conv.html#HNHNConv>`_.

    Args:
        in_channels: The number of input channels.
        out_channels: The number of output channels.
        bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
        use_batch_normalization: If set to ``True``, the layer will use batch normalization. Defaults to ``False``.
        drop_rate: If set to a positive number, the layer will use dropout. Defaults to ``0.5``.
        is_last: If set to ``True``, the layer will not apply the final activation and dropout functions. Defaults to ``False``.
    """

    __AGGREGATION: Literal["mean"] = "mean"

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        bias: bool = True,
        use_batch_normalization: bool = False,
        drop_rate: float = 0.5,
        is_last: bool = False,
    ):
        super().__init__()
        self.is_last = is_last
        self.batch_norm_1d = nn.BatchNorm1d(out_channels) if use_batch_normalization else None
        self.activation_fn = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(drop_rate)
        self.theta_v2e = nn.Linear(in_channels, out_channels, bias=bias)
        self.theta_e2v = nn.Linear(out_channels, out_channels, bias=bias)

    def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
        """
        Apply one HNHN convolution layer using two learned projections around
        node-to-hyperedge and hyperedge-to-node mean aggregation.

        Args:
            x: Input node feature matrix of size ``(num_nodes, in_channels)``.
            hyperedge_index: Hyperedge incidence in COO format of size ``(2, num_incidences)``.

        Returns:
            The output node feature matrix of size ``(num_nodes, out_channels)``.
        """
        x = self.theta_v2e(x)

        hyperedge_embeddings = HyperedgeAggregator(hyperedge_index, x).pool(self.__AGGREGATION)
        hyperedge_embeddings = self.activation_fn(hyperedge_embeddings)
        hyperedge_embeddings = self.theta_e2v(hyperedge_embeddings)

        x = NodeAggregator(
            hyperedge_index=hyperedge_index,
            hyperedge_embeddings=hyperedge_embeddings,
            num_nodes=x.size(0),
        ).pool(self.__AGGREGATION)

        if not self.is_last:
            x = self.activation_fn(x)
            if self.batch_norm_1d is not None:
                x = self.batch_norm_1d(x)
            x = self.dropout(x)

        return x

forward(x, hyperedge_index)

Apply one HNHN convolution layer using two learned projections around node-to-hyperedge and hyperedge-to-node mean aggregation.

Parameters:

Name Type Description Default
x Tensor

Input node feature matrix of size (num_nodes, in_channels).

required
hyperedge_index Tensor

Hyperedge incidence in COO format of size (2, num_incidences).

required

Returns:

Type Description
Tensor

The output node feature matrix of size (num_nodes, out_channels).

Source code in hyperbench/nn/conv.py
def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
    """
    Apply one HNHN convolution layer using two learned projections around
    node-to-hyperedge and hyperedge-to-node mean aggregation.

    Args:
        x: Input node feature matrix of size ``(num_nodes, in_channels)``.
        hyperedge_index: Hyperedge incidence in COO format of size ``(2, num_incidences)``.

    Returns:
        The output node feature matrix of size ``(num_nodes, out_channels)``.
    """
    x = self.theta_v2e(x)

    hyperedge_embeddings = HyperedgeAggregator(hyperedge_index, x).pool(self.__AGGREGATION)
    hyperedge_embeddings = self.activation_fn(hyperedge_embeddings)
    hyperedge_embeddings = self.theta_e2v(hyperedge_embeddings)

    x = NodeAggregator(
        hyperedge_index=hyperedge_index,
        hyperedge_embeddings=hyperedge_embeddings,
        num_nodes=x.size(0),
    ).pool(self.__AGGREGATION)

    if not self.is_last:
        x = self.activation_fn(x)
        if self.batch_norm_1d is not None:
            x = self.batch_norm_1d(x)
        x = self.dropout(x)

    return x

HyperGCNConv

Bases: Module

The HyperGCNConv layer proposed in HyperGCN: A New Method of Training Graph Convolutional Networks on Hypergraphs <https://dl.acm.org/doi/10.5555/3454287.3454422>_ paper (NeurIPS 2019). Reference implementation: source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/nn/convs/hypergraphs/hypergcn_conv.html#HyperGCNConv>_.

Parameters:

Name Type Description Default
in_channels int

The number of input channels.

required
out_channels int

The number of output channels.

required
bias bool

If set to False, the layer will not learn the bias parameter. Defaults to True.

True
use_batch_normalization bool

If set to True, the layer will use batch normalization. Defaults to False.

False
drop_rate float

If set to a positive number, the layer will use dropout. Defaults to 0.5.

0.5
use_mediator bool

Whether to use mediator to transform the hyperedges to edges in the graph. Defaults to False.

False
is_last bool

If set to True, the layer will not apply the final activation and dropout functions. Defaults to False.

False
Source code in hyperbench/nn/conv.py
class HyperGCNConv(nn.Module):
    """
    The HyperGCNConv layer proposed in `HyperGCN: A New Method of Training Graph Convolutional Networks on Hypergraphs <https://dl.acm.org/doi/10.5555/3454287.3454422>`_ paper (NeurIPS 2019).
    Reference implementation: `source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/nn/convs/hypergraphs/hypergcn_conv.html#HyperGCNConv>`_.

    Args:
        in_channels: The number of input channels.
        out_channels: The number of output channels.
        bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
        use_batch_normalization: If set to ``True``, the layer will use batch normalization. Defaults to ``False``.
        drop_rate: If set to a positive number, the layer will use dropout. Defaults to ``0.5``.
        use_mediator: Whether to use mediator to transform the hyperedges to edges in the graph. Defaults to ``False``.
        is_last: If set to ``True``, the layer will not apply the final activation and dropout functions. Defaults to ``False``.
    """

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        bias: bool = True,
        use_batch_normalization: bool = False,
        drop_rate: float = 0.5,
        use_mediator: bool = False,
        is_last: bool = False,
    ):
        super().__init__()
        self.is_last = is_last
        self.use_mediator = use_mediator
        self.batch_norm_1d = nn.BatchNorm1d(out_channels) if use_batch_normalization else None
        self.activation_fn = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(drop_rate)

        # θ is the learnable weight matrix (as in the HyperGCN paper),
        # it projects node features from in_channels to out_channels and learns how to mix feature channels
        self.theta = nn.Linear(in_channels, out_channels, bias=bias)

    def forward(
        self,
        x: Tensor,
        hyperedge_index: Tensor,
        gcn_laplacian_matrix: Tensor | None = None,
    ) -> Tensor:
        """
        The forward function.

        Args:
            x: Input node feature matrix. Size ``(num_nodes, in_channels)``.
            hyperedge_index: Hyperedge indices representing the hypergraph structure. Size ``(2, num_hyperedges)``.
            gcn_laplacian_matrix: Optional precomputed normalized GCN Laplacian matrix. Size ``(num_nodes, num_nodes)``. Defaults to ``None``.
                If provided, it will be used directly for smoothing, so we can skip computing it from edge_index.

        Returns:
            The output node feature matrix. Size ``(num_nodes, out_channels)``.
        """
        x = self.theta(x)

        if gcn_laplacian_matrix is not None:
            x = Graph.smoothing_with_laplacian_matrix(x, gcn_laplacian_matrix)
        else:
            edge_index, edge_weights = HyperedgeIndex(
                hyperedge_index
            ).reduce_to_edge_index_on_random_direction(
                x=x,
                with_mediators=self.use_mediator,
                return_weights=True,
            )

            normalized_gcn_laplacian_matrix = EdgeIndex(
                edge_index=edge_index,
                edge_weights=edge_weights,
            ).get_sparse_normalized_gcn_laplacian(num_nodes=x.size(0))

            x = Graph.smoothing_with_laplacian_matrix(x, normalized_gcn_laplacian_matrix)

        if not self.is_last:
            x = self.activation_fn(x)
            if self.batch_norm_1d is not None:
                x = self.batch_norm_1d(x)
            x = self.dropout(x)

        return x

forward(x, hyperedge_index, gcn_laplacian_matrix=None)

The forward function.

Parameters:

Name Type Description Default
x Tensor

Input node feature matrix. Size (num_nodes, in_channels).

required
hyperedge_index Tensor

Hyperedge indices representing the hypergraph structure. Size (2, num_hyperedges).

required
gcn_laplacian_matrix Tensor | None

Optional precomputed normalized GCN Laplacian matrix. Size (num_nodes, num_nodes). Defaults to None. If provided, it will be used directly for smoothing, so we can skip computing it from edge_index.

None

Returns:

Type Description
Tensor

The output node feature matrix. Size (num_nodes, out_channels).

Source code in hyperbench/nn/conv.py
def forward(
    self,
    x: Tensor,
    hyperedge_index: Tensor,
    gcn_laplacian_matrix: Tensor | None = None,
) -> Tensor:
    """
    The forward function.

    Args:
        x: Input node feature matrix. Size ``(num_nodes, in_channels)``.
        hyperedge_index: Hyperedge indices representing the hypergraph structure. Size ``(2, num_hyperedges)``.
        gcn_laplacian_matrix: Optional precomputed normalized GCN Laplacian matrix. Size ``(num_nodes, num_nodes)``. Defaults to ``None``.
            If provided, it will be used directly for smoothing, so we can skip computing it from edge_index.

    Returns:
        The output node feature matrix. Size ``(num_nodes, out_channels)``.
    """
    x = self.theta(x)

    if gcn_laplacian_matrix is not None:
        x = Graph.smoothing_with_laplacian_matrix(x, gcn_laplacian_matrix)
    else:
        edge_index, edge_weights = HyperedgeIndex(
            hyperedge_index
        ).reduce_to_edge_index_on_random_direction(
            x=x,
            with_mediators=self.use_mediator,
            return_weights=True,
        )

        normalized_gcn_laplacian_matrix = EdgeIndex(
            edge_index=edge_index,
            edge_weights=edge_weights,
        ).get_sparse_normalized_gcn_laplacian(num_nodes=x.size(0))

        x = Graph.smoothing_with_laplacian_matrix(x, normalized_gcn_laplacian_matrix)

    if not self.is_last:
        x = self.activation_fn(x)
        if self.batch_norm_1d is not None:
            x = self.batch_norm_1d(x)
        x = self.dropout(x)

    return x

NodeEnricher

Bases: Enricher, ABC

Base class for node enrichers.

Source code in hyperbench/nn/enricher.py
class NodeEnricher(Enricher, ABC):
    """
    Base class for node enrichers.
    """

    pass

HyperedgeEnricher

Bases: Enricher, ABC

Base class for hyperedge enrichers.

Source code in hyperbench/nn/enricher.py
class HyperedgeEnricher(Enricher, ABC):
    """
    Base class for hyperedge enrichers.
    """

    pass

FillValueHyperedgeAttrsEnricher

Bases: HyperedgeAttrsEnricher

Generates simple hyperedge attributes by filling them with a constant value.

Parameters:

Name Type Description Default
cache_dir str | None

Directory for saving/loading cached features. If None, caching is disabled.

None
fill_value float

The constant value to fill the hyperedge attributes with. Defaults to 1.0.

1.0
Source code in hyperbench/nn/enricher.py
class FillValueHyperedgeAttrsEnricher(HyperedgeAttrsEnricher):
    """
    Generates simple hyperedge attributes by filling them with a constant value.

    Args:
        cache_dir: Directory for saving/loading cached features. If ``None``, caching is disabled.
        fill_value: The constant value to fill the hyperedge attributes with. Defaults to ``1.0``.
    """

    def __init__(
        self,
        cache_dir: str | None = None,
        fill_value: float = 1.0,
    ):
        super().__init__(cache_dir=cache_dir)
        self.fill_value = fill_value

    def enrich(self, hyperedge_index: Tensor) -> Tensor:
        """
        Generate hyperedge attributes.

        Args:
            hyperedge_index: Hyperedge index tensor of shape ``(2, num_hyperedges)``.

        Returns:
            Tensor of shape ``(num_hyperedges, 1)`` containing the generated attribute for each hyperedge.
        """
        num_hyperedges = HyperedgeIndex(hyperedge_index).num_hyperedges
        hyperedge_attrs = torch.full(
            size=(num_hyperedges, 1),
            fill_value=self.fill_value,
            device=hyperedge_index.device,
        )
        return hyperedge_attrs

enrich(hyperedge_index)

Generate hyperedge attributes.

Parameters:

Name Type Description Default
hyperedge_index Tensor

Hyperedge index tensor of shape (2, num_hyperedges).

required

Returns:

Type Description
Tensor

Tensor of shape (num_hyperedges, 1) containing the generated attribute for each hyperedge.

Source code in hyperbench/nn/enricher.py
def enrich(self, hyperedge_index: Tensor) -> Tensor:
    """
    Generate hyperedge attributes.

    Args:
        hyperedge_index: Hyperedge index tensor of shape ``(2, num_hyperedges)``.

    Returns:
        Tensor of shape ``(num_hyperedges, 1)`` containing the generated attribute for each hyperedge.
    """
    num_hyperedges = HyperedgeIndex(hyperedge_index).num_hyperedges
    hyperedge_attrs = torch.full(
        size=(num_hyperedges, 1),
        fill_value=self.fill_value,
        device=hyperedge_index.device,
    )
    return hyperedge_attrs

ABHyperedgeWeightsEnricher

Bases: HyperedgeWeightsEnricher

Generates hyperedge weights based on the number of nodes in each hyperedge.

Parameters:

Name Type Description Default
cache_dir str | None

Directory for saving/loading cached features. If None, caching is disabled.

None
alpha float

Scaling factor for the random component added to weights. Must be between 0.0 and 1.0.

1.0
beta float | None

If provided, the random component is alpha * beta. If None, no random component is added.

None
Source code in hyperbench/nn/enricher.py
class ABHyperedgeWeightsEnricher(HyperedgeWeightsEnricher):
    """
    Generates hyperedge weights based on the number of nodes in each hyperedge.

    Args:
        cache_dir: Directory for saving/loading cached features. If ``None``, caching is disabled.
        alpha: Scaling factor for the random component added to weights. Must be between 0.0 and 1.0.
        beta: If provided, the random component is alpha * beta. If None, no random component is added.
    """

    def __init__(
        self,
        cache_dir: str | None = None,
        alpha: float = 1.0,
        beta: float | None = None,
    ):
        super().__init__(cache_dir=cache_dir)
        if alpha < 0.0 or alpha > 1.0:
            raise ValueError("Alpha must be between 0.0 and 1.0.")

        self.alpha = alpha
        self.beta = beta

    def enrich(self, hyperedge_index: Tensor) -> Tensor:
        """
        Compute edge weights as the number of nodes in each hyperedge.

        Args:
            hyperedge_index: Hyperedge index tensor of shape ``(2, num_hyperedges)``.

        Returns:
            Tensor of shape ``(num_hyperedges,)`` containing the weight of each hyperedge.
        """
        # Count the number of nodes in each hyperedge by counting occurrences of each hyperedge index.
        # Example: if hyperedge_index[1] = [0, 0, 1, 1, 1], then we have 2 nodes in hyperedge 0 and 3 nodes in hyperedge 1.
        num_hyperedges = int(hyperedge_index[1].max().item()) + 1
        weights = torch.bincount(hyperedge_index[1], minlength=num_hyperedges).float()

        random_alpha = random.uniform(0, self.alpha)
        if self.beta is not None:
            weights += random_alpha * self.beta
        return weights

enrich(hyperedge_index)

Compute edge weights as the number of nodes in each hyperedge.

Parameters:

Name Type Description Default
hyperedge_index Tensor

Hyperedge index tensor of shape (2, num_hyperedges).

required

Returns:

Type Description
Tensor

Tensor of shape (num_hyperedges,) containing the weight of each hyperedge.

Source code in hyperbench/nn/enricher.py
def enrich(self, hyperedge_index: Tensor) -> Tensor:
    """
    Compute edge weights as the number of nodes in each hyperedge.

    Args:
        hyperedge_index: Hyperedge index tensor of shape ``(2, num_hyperedges)``.

    Returns:
        Tensor of shape ``(num_hyperedges,)`` containing the weight of each hyperedge.
    """
    # Count the number of nodes in each hyperedge by counting occurrences of each hyperedge index.
    # Example: if hyperedge_index[1] = [0, 0, 1, 1, 1], then we have 2 nodes in hyperedge 0 and 3 nodes in hyperedge 1.
    num_hyperedges = int(hyperedge_index[1].max().item()) + 1
    weights = torch.bincount(hyperedge_index[1], minlength=num_hyperedges).float()

    random_alpha = random.uniform(0, self.alpha)
    if self.beta is not None:
        weights += random_alpha * self.beta
    return weights

LaplacianPositionalEncodingEnricher

Bases: NodeEnricher

Enrich node features with Laplacian Positional Encodings computed from the symmetric normalized Laplacian of the clique expansion of the hypergraph.

Parameters:

Name Type Description Default
num_features int

Number of positional encoding features to generate for each node.

required
num_nodes int

Total number of nodes in the graph. If not provided, it will be inferred from the hyperedge_index. This is only needed if the hyperedge_index does not include all nodes (e.g., some isolated nodes are missing). Another instance is when the setting is transductive and the hyperedge index contains some hyperedges that do not contain all the nodes in the node space.

0
cache_dir str | None

Optional directory to cache computed features. If None, caching is disabled.

None
Source code in hyperbench/nn/enricher.py
class LaplacianPositionalEncodingEnricher(NodeEnricher):
    """
    Enrich node features with Laplacian Positional Encodings computed from the symmetric normalized Laplacian of the clique expansion of the hypergraph.

    Args:
        num_features: Number of positional encoding features to generate for each node.
        num_nodes: Total number of nodes in the graph. If not provided, it will be inferred from the hyperedge_index.
            This is only needed if the hyperedge_index does not include all nodes (e.g., some isolated nodes are missing).
            Another instance is when the setting is transductive and the hyperedge index contains some hyperedges
            that do not contain all the nodes in the node space.
        cache_dir: Optional directory to cache computed features. If ``None``, caching is disabled.
    """

    def __init__(
        self,
        num_features: int,
        num_nodes: int = 0,
        cache_dir: str | None = None,
    ):
        super().__init__(cache_dir=cache_dir)
        self.num_features = num_features
        self.num_nodes = num_nodes

    def enrich(self, hyperedge_index: Tensor) -> Tensor:
        """
        Compute Laplacian Positional Encoding: the k smallest non-trivial eigenvectors
        of the symmetric normalized Laplacian L = I - D^{-1/2} A D^{-1/2}.

        The first eigenvector (constant, eigenvalue ~0) is skipped.
        The next num_features eigenvectors are returned as positional features.

        Args:
            hyperedge_index: Hyperedge index tensor of shape ``(2, num_hyperedges)``.

        Returns:
            Tensor of shape ``(num_nodes, num_features)``.
        """
        edge_index = HyperedgeIndex(hyperedge_index).reduce_to_edge_index_on_clique_expansion()
        edge_index_wrapper = EdgeIndex(edge_index)
        num_nodes = self.num_nodes if self.num_nodes > 0 else None
        laplacian_matrix = edge_index_wrapper.get_sparse_normalized_laplacian(num_nodes=num_nodes)
        laplacian_matrix_dense = (
            laplacian_matrix.to_dense()  # torch.linalg.eigh only works on dense tensors
        )

        # Compute eigenvalues and eigenvectors of the symmetric Laplacian.
        # torch.linalg.eigh returns them sorted in ascending order of eigenvalue.
        # The smallest eigenvalue is ~0 with a constant eigenvector (all entries equal),
        # which carries no positional information and will be skipped.
        # Example: eigenvalues ~ [0, 1, 2],
        #          eigenvectors ~ [[0.577, -0.707, 0.408],
        #                          [0.577,  0.000, -0.816],
        #                          [0.577,  0.707,  0.408]]
        # Column 0 (eigenvalue ~0) is the trivial constant vector, all entries ~0.577.
        # eigenvectors shape is ``(num_nodes, num_nodes)``, each column is an eigenvector.
        with torch.no_grad():
            _, eigenvectors = torch.linalg.eigh(laplacian_matrix_dense)

        # We skip the first (trivial) eigenvector, so at most num_nodes - 1 are usable.
        # Example: 3 nodes -> 2 available non-trivial eigenvectors
        num_nodes = int(eigenvectors.size(0))
        num_nontrivial_eigenvectors = num_nodes - 1

        # If we have enough eigenvectors, slice columns 1 through num_features (inclusive).
        # Each row will be the positional encoding for that node.
        # Example: num_features = 2, eigenvectors.shape = (3, 3)
        #          -> return columns 1 and 2
        #             shape (3, 2)  # (num_nodes, num_features)
        if num_nontrivial_eigenvectors >= self.num_features:
            return eigenvectors[:, 1 : self.num_features + 1]

        # If the graph has fewer usable eigenvectors than requested
        # (e.g., num_features = 5 but only 2 available), we create a zero-padded tensor and fill what we have.
        # Example: num_nontrivial_eigenvectors = 2, num_features = 5
        #          -> shape (3, 5)  # columns 0-1 filled, 2-4 are zeros.
        x = torch.zeros(size=(num_nodes, self.num_features), device=edge_index.device)
        x[:, :num_nontrivial_eigenvectors] = eigenvectors[:, 1:]
        return x

enrich(hyperedge_index)

Compute Laplacian Positional Encoding: the k smallest non-trivial eigenvectors of the symmetric normalized Laplacian L = I - D^{-½} A D^{-½}.

The first eigenvector (constant, eigenvalue ~0) is skipped. The next num_features eigenvectors are returned as positional features.

Parameters:

Name Type Description Default
hyperedge_index Tensor

Hyperedge index tensor of shape (2, num_hyperedges).

required

Returns:

Type Description
Tensor

Tensor of shape (num_nodes, num_features).

Source code in hyperbench/nn/enricher.py
def enrich(self, hyperedge_index: Tensor) -> Tensor:
    """
    Compute Laplacian Positional Encoding: the k smallest non-trivial eigenvectors
    of the symmetric normalized Laplacian L = I - D^{-1/2} A D^{-1/2}.

    The first eigenvector (constant, eigenvalue ~0) is skipped.
    The next num_features eigenvectors are returned as positional features.

    Args:
        hyperedge_index: Hyperedge index tensor of shape ``(2, num_hyperedges)``.

    Returns:
        Tensor of shape ``(num_nodes, num_features)``.
    """
    edge_index = HyperedgeIndex(hyperedge_index).reduce_to_edge_index_on_clique_expansion()
    edge_index_wrapper = EdgeIndex(edge_index)
    num_nodes = self.num_nodes if self.num_nodes > 0 else None
    laplacian_matrix = edge_index_wrapper.get_sparse_normalized_laplacian(num_nodes=num_nodes)
    laplacian_matrix_dense = (
        laplacian_matrix.to_dense()  # torch.linalg.eigh only works on dense tensors
    )

    # Compute eigenvalues and eigenvectors of the symmetric Laplacian.
    # torch.linalg.eigh returns them sorted in ascending order of eigenvalue.
    # The smallest eigenvalue is ~0 with a constant eigenvector (all entries equal),
    # which carries no positional information and will be skipped.
    # Example: eigenvalues ~ [0, 1, 2],
    #          eigenvectors ~ [[0.577, -0.707, 0.408],
    #                          [0.577,  0.000, -0.816],
    #                          [0.577,  0.707,  0.408]]
    # Column 0 (eigenvalue ~0) is the trivial constant vector, all entries ~0.577.
    # eigenvectors shape is ``(num_nodes, num_nodes)``, each column is an eigenvector.
    with torch.no_grad():
        _, eigenvectors = torch.linalg.eigh(laplacian_matrix_dense)

    # We skip the first (trivial) eigenvector, so at most num_nodes - 1 are usable.
    # Example: 3 nodes -> 2 available non-trivial eigenvectors
    num_nodes = int(eigenvectors.size(0))
    num_nontrivial_eigenvectors = num_nodes - 1

    # If we have enough eigenvectors, slice columns 1 through num_features (inclusive).
    # Each row will be the positional encoding for that node.
    # Example: num_features = 2, eigenvectors.shape = (3, 3)
    #          -> return columns 1 and 2
    #             shape (3, 2)  # (num_nodes, num_features)
    if num_nontrivial_eigenvectors >= self.num_features:
        return eigenvectors[:, 1 : self.num_features + 1]

    # If the graph has fewer usable eigenvectors than requested
    # (e.g., num_features = 5 but only 2 available), we create a zero-padded tensor and fill what we have.
    # Example: num_nontrivial_eigenvectors = 2, num_features = 5
    #          -> shape (3, 5)  # columns 0-1 filled, 2-4 are zeros.
    x = torch.zeros(size=(num_nodes, self.num_features), device=edge_index.device)
    x[:, :num_nontrivial_eigenvectors] = eigenvectors[:, 1:]
    return x

Node2VecEnricher

Bases: NodeEnricher

Enrich node features using Node2Vec embeddings computed from the clique expansion of the hypergraph.

Parameters:

Name Type Description Default
num_features int

Dimensionality of the node embeddings to generate.

required
walk_length int

Length of each random walk.

20
context_size int

Window size for the skip-gram model (number of neighbors in the walk considered as context). For example, if context_size=2 and walk_length=5, then for a random walk [v0, v1, v2, v3, v4], the context for v2 would be [v0, v1, v3, v4] as we take neighbors within distance 2 in the walk. The pairs generated by skip-gram would be [(v2, v0), (v2, v1), (v2, v3), (v2, v4)]. Rule of thumb: Graphs with strong local structure (5-10), Graphs with communities/long-range patterns (10-20). Defaults to 10.

10
num_walks_per_node int

Number of random walks to start at each node.

10
p float

Return hyperparameter for Node2Vec. Default is 1.0 (unbiased). This controls the probability of stepping back to the node visited in the previous step. Lower values of p make immediate backtracking more likely, which keeps walks closer to the local neighborhood. Higher values of p discourage returning to the previous node, so walks are less likely to bounce back and forth across the same edge.

1.0
q float

In-out hyperparameter for Node2Vec. Default is 1.0 (unbiased). This controls whether walks stay near the source node or explore further outward. Lower values of q bias the walk toward outward exploration, behaving more like DFS and emphasizing structural roles. Higher values of q bias the walk toward nearby nodes, behaving more like BFS and emphasizing community structure and homophily.

1.0
num_negative_samples int

Number of negative samples to use for training the skip-gram model. If set to X, then for each positive pair (u, v) generated from the random walks, X negative pairs (u, v_neg) will be generated, where v_neg is a node sampled uniformly at random from all nodes in the graph. Defaults to 1, meaning one negative sample per positive pair.

1
num_nodes int

Total number of nodes in the graph. If not provided, it will be inferred from the hyperedge_index. This is only needed if the hyperedge_index does not include all nodes (e.g., some isolated nodes are missing).

0
graph_reduction_strategy Literal['clique_expansion']

Strategy for reducing the hyperedge graph. Defaults to clique_expansion.

'clique_expansion'
num_epochs int

Number of epochs used to optimize Node2Vec embeddings. Defaults to 5.

5
learning_rate float

Learning rate for embedding optimization. Defaults to 0.01.

0.01
batch_size int

Batch size used by the random-walk loader. Defaults to 128.

128
sparse bool

Whether Node2Vec embeddings should use sparse gradients.

True
cache_dir str | None

Optional directory to cache computed embeddings. If None, caching is disabled.

None
verbose bool

Whether to print verbose output during training. Defaults to False.

False
Source code in hyperbench/nn/enricher.py
class Node2VecEnricher(NodeEnricher):
    """
    Enrich node features using Node2Vec embeddings computed from the clique expansion of the hypergraph.

    Args:
        num_features: Dimensionality of the node embeddings to generate.
        walk_length: Length of each random walk.
        context_size: Window size for the skip-gram model (number of neighbors in the walk considered as context).
            For example, if ``context_size=2`` and ``walk_length=5``, then for a random walk ``[v0, v1, v2, v3, v4]``,
            the context for ``v2`` would be ``[v0, v1, v3, v4]`` as we take neighbors within distance 2 in the walk.
            The pairs generated by skip-gram would be ``[(v2, v0), (v2, v1), (v2, v3), (v2, v4)]``.
            Rule of thumb: Graphs with strong local structure (5-10), Graphs with communities/long-range patterns (10-20).
            Defaults to ``10``.
        num_walks_per_node: Number of random walks to start at each node.
        p: Return hyperparameter for Node2Vec. Default is ``1.0`` (unbiased).
            This controls the probability of stepping back to the node visited in the previous step.
            Lower values of ``p`` make immediate backtracking more likely, which keeps walks closer to the
            local neighborhood. Higher values of ``p`` discourage returning to the previous node, so walks
            are less likely to bounce back and forth across the same edge.
        q: In-out hyperparameter for Node2Vec. Default is ``1.0`` (unbiased).
            This controls whether walks stay near the source node or explore further outward.
            Lower values of ``q`` bias the walk toward outward exploration, behaving more like DFS and
            emphasizing structural roles. Higher values of ``q`` bias the walk toward nearby nodes,
            behaving more like BFS and emphasizing community structure and homophily.
        num_negative_samples: Number of negative samples to use for training the skip-gram model.
            If set to ``X``, then for each positive pair ``(u, v)`` generated from the random walks, ``X`` negative pairs ``(u, v_neg)`` will be generated,
            where ``v_neg`` is a node sampled uniformly at random from all nodes in the graph.
            Defaults to ``1``, meaning one negative sample per positive pair.
        num_nodes: Total number of nodes in the graph. If not provided, it will be inferred from the hyperedge_index.
            This is only needed if the hyperedge_index does not include all nodes (e.g., some isolated nodes are missing).
        graph_reduction_strategy: Strategy for reducing the hyperedge graph. Defaults to ``clique_expansion``.
        num_epochs: Number of epochs used to optimize Node2Vec embeddings. Defaults to ``5``.
        learning_rate: Learning rate for embedding optimization. Defaults to ``0.01``.
        batch_size: Batch size used by the random-walk loader. Defaults to ``128``.
        sparse: Whether Node2Vec embeddings should use sparse gradients.
        cache_dir: Optional directory to cache computed embeddings. If ``None``, caching is disabled.
        verbose: Whether to print verbose output during training. Defaults to ``False``.
    """

    def __init__(
        self,
        num_features: int,
        walk_length: int = 20,
        context_size: int = 10,
        num_walks_per_node: int = 10,
        p: float = 1.0,
        q: float = 1.0,
        num_negative_samples: int = 1,
        num_nodes: int = 0,
        graph_reduction_strategy: Literal["clique_expansion"] = "clique_expansion",
        num_epochs: int = 5,
        learning_rate: float = 0.01,
        batch_size: int = 128,
        sparse: bool = True,
        cache_dir: str | None = None,
        verbose: bool = False,
    ):
        super().__init__(cache_dir=cache_dir)
        if walk_length < context_size:
            raise ValueError(
                f"Expected walk_length >= context_size, got "
                f"walk_length={walk_length}, context_size={context_size}."
            )

        self.embedding_dim = num_features
        self.walk_length = walk_length
        self.context_size = context_size
        self.num_walks_per_node = num_walks_per_node
        self.p = p
        self.q = q
        self.num_negative_samples = num_negative_samples
        self.num_nodes = num_nodes
        self.graph_reduction_strategy = graph_reduction_strategy
        self.num_epochs = num_epochs
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.sparse = sparse
        self.verbose = verbose

    def enrich(self, hyperedge_index: Tensor) -> Tensor:
        """
        Compute Node2Vec embeddings from the clique expansion of the hypergraph.

        The hypergraph is converted to a regular graph via clique expansion, where each hyperedge of size k
        contributes a k x k block of edges between its member nodes.
        The resulting ``edge_index`` is then used to train a Node2Vec model using random walks and the skip-gram objective.

        Args:
            hyperedge_index: Hyperedge index tensor of shape ``(2, num_hyperedges)``.

        Returns:
            Tensor of shape ``(num_nodes, embedding_dim)`` containing the Node2Vec embeddings for each node.
        """
        device = hyperedge_index.device

        if self.verbose:
            print(f"Reducing hypergraph to graph via {self.graph_reduction_strategy}...")

        hyperedge_index_wrapper = HyperedgeIndex(hyperedge_index)
        num_nodes = hyperedge_index_wrapper.num_nodes_if_isolated_exist(self.num_nodes)
        if num_nodes == 0:
            warnings.warn(
                "Found no nodes. Returning empty node features.",
                category=UserWarning,
                stacklevel=2,
            )
            return torch.empty((0, self.embedding_dim), device=device)

        reduced_edge_index = hyperedge_index_wrapper.reduce(self.graph_reduction_strategy)
        edge_index_wrapper = EdgeIndex(reduced_edge_index).remove_selfloops()
        if edge_index_wrapper.num_edges == 0:
            warnings.warn(
                "Clique expansion produced no non-self-loop edges. Returning zero node features.",
                category=UserWarning,
                stacklevel=2,
            )
            return torch.zeros((num_nodes, self.embedding_dim), device=device)

        edge_index = edge_index_wrapper.item.to(device)
        model = PyGNode2Vec(
            edge_index=edge_index,
            embedding_dim=self.embedding_dim,
            walk_length=self.walk_length,
            context_size=self.context_size,
            walks_per_node=self.num_walks_per_node,
            p=self.p,
            q=self.q,
            num_negative_samples=self.num_negative_samples,
            num_nodes=num_nodes,
            sparse=self.sparse,
        ).to(device)

        data_loader = model.loader(batch_size=self.batch_size, shuffle=True)
        optimizer = (
            optim.SparseAdam(model.parameters(), lr=self.learning_rate)
            if self.sparse
            else optim.Adam(model.parameters(), lr=self.learning_rate)
        )

        if self.verbose:
            print(f"Training Node2Vec model for {self.num_epochs} epochs...")

        model.train()
        for epoch in range(self.num_epochs):
            if self.verbose:
                print(f"Epoch {epoch + 1}/{self.num_epochs}")

            # Iterate over batches of positive and negative random walks
            for positive_random_walk, negative_random_walk in data_loader:
                positive_random_walk_on_device = positive_random_walk.to(device)
                negative_random_walk_on_device = negative_random_walk.to(device)

                optimizer.zero_grad()
                loss = model.loss(positive_random_walk_on_device, negative_random_walk_on_device)
                loss.backward()
                optimizer.step()

        if self.verbose:
            print("Training complete. Generating node embeddings...")

        model.eval()
        with torch.no_grad():
            x: Tensor = model()  # shape (num_nodes, num_features)

        # Detach node embeddings from computation graph and return them
        return x.detach().to(device)

enrich(hyperedge_index)

Compute Node2Vec embeddings from the clique expansion of the hypergraph.

The hypergraph is converted to a regular graph via clique expansion, where each hyperedge of size k contributes a k x k block of edges between its member nodes. The resulting edge_index is then used to train a Node2Vec model using random walks and the skip-gram objective.

Parameters:

Name Type Description Default
hyperedge_index Tensor

Hyperedge index tensor of shape (2, num_hyperedges).

required

Returns:

Type Description
Tensor

Tensor of shape (num_nodes, embedding_dim) containing the Node2Vec embeddings for each node.

Source code in hyperbench/nn/enricher.py
def enrich(self, hyperedge_index: Tensor) -> Tensor:
    """
    Compute Node2Vec embeddings from the clique expansion of the hypergraph.

    The hypergraph is converted to a regular graph via clique expansion, where each hyperedge of size k
    contributes a k x k block of edges between its member nodes.
    The resulting ``edge_index`` is then used to train a Node2Vec model using random walks and the skip-gram objective.

    Args:
        hyperedge_index: Hyperedge index tensor of shape ``(2, num_hyperedges)``.

    Returns:
        Tensor of shape ``(num_nodes, embedding_dim)`` containing the Node2Vec embeddings for each node.
    """
    device = hyperedge_index.device

    if self.verbose:
        print(f"Reducing hypergraph to graph via {self.graph_reduction_strategy}...")

    hyperedge_index_wrapper = HyperedgeIndex(hyperedge_index)
    num_nodes = hyperedge_index_wrapper.num_nodes_if_isolated_exist(self.num_nodes)
    if num_nodes == 0:
        warnings.warn(
            "Found no nodes. Returning empty node features.",
            category=UserWarning,
            stacklevel=2,
        )
        return torch.empty((0, self.embedding_dim), device=device)

    reduced_edge_index = hyperedge_index_wrapper.reduce(self.graph_reduction_strategy)
    edge_index_wrapper = EdgeIndex(reduced_edge_index).remove_selfloops()
    if edge_index_wrapper.num_edges == 0:
        warnings.warn(
            "Clique expansion produced no non-self-loop edges. Returning zero node features.",
            category=UserWarning,
            stacklevel=2,
        )
        return torch.zeros((num_nodes, self.embedding_dim), device=device)

    edge_index = edge_index_wrapper.item.to(device)
    model = PyGNode2Vec(
        edge_index=edge_index,
        embedding_dim=self.embedding_dim,
        walk_length=self.walk_length,
        context_size=self.context_size,
        walks_per_node=self.num_walks_per_node,
        p=self.p,
        q=self.q,
        num_negative_samples=self.num_negative_samples,
        num_nodes=num_nodes,
        sparse=self.sparse,
    ).to(device)

    data_loader = model.loader(batch_size=self.batch_size, shuffle=True)
    optimizer = (
        optim.SparseAdam(model.parameters(), lr=self.learning_rate)
        if self.sparse
        else optim.Adam(model.parameters(), lr=self.learning_rate)
    )

    if self.verbose:
        print(f"Training Node2Vec model for {self.num_epochs} epochs...")

    model.train()
    for epoch in range(self.num_epochs):
        if self.verbose:
            print(f"Epoch {epoch + 1}/{self.num_epochs}")

        # Iterate over batches of positive and negative random walks
        for positive_random_walk, negative_random_walk in data_loader:
            positive_random_walk_on_device = positive_random_walk.to(device)
            negative_random_walk_on_device = negative_random_walk.to(device)

            optimizer.zero_grad()
            loss = model.loss(positive_random_walk_on_device, negative_random_walk_on_device)
            loss.backward()
            optimizer.step()

    if self.verbose:
        print("Training complete. Generating node embeddings...")

    model.eval()
    with torch.no_grad():
        x: Tensor = model()  # shape (num_nodes, num_features)

    # Detach node embeddings from computation graph and return them
    return x.detach().to(device)

VilLainHyperedgeAttrsEnricher

Bases: _VilLainTrainer, HyperedgeAttrsEnricher

Enrich hyperedge attributes with VilLain embeddings learned from hypergraph topology.

Parameters:

Name Type Description Default
num_features int

Dimensionality of the hyperedge embeddings to generate.

required
num_nodes int

Total number of nodes, including isolated nodes that do not appear in hyperedge_index.

0
num_hyperedges int

Total number of hyperedges, including empty hyperedges that do not appear in hyperedge_index.

0
labels_per_subspace int

Number of virtual labels per subspace. Defaults to 2.

2
training_steps int

Propagation steps used for VilLain self-supervised loss. Defaults to 4.

4
generation_steps int

Propagation steps averaged for final embeddings. Defaults to 100.

100
tau float

Gumbel-Softmax temperature. Defaults to 1.0.

1.0
eps float

Numerical stability constant. Defaults to 1e-10.

1e-10
num_epochs int

Number of epochs used to optimize VilLain embeddings. Defaults to 5.

5
learning_rate float

Learning rate for embedding optimization. Defaults to 0.01.

0.01
weight_decay float

Weight decay for the optimizer. Defaults to 0.0.

0.0
cache_dir str | None

Optional directory to cache computed features. If None, caching is disabled.

None
verbose bool

Whether to print verbose output during training. Defaults to False.

False
Source code in hyperbench/nn/enricher.py
class VilLainHyperedgeAttrsEnricher(_VilLainTrainer, HyperedgeAttrsEnricher):
    """
    Enrich hyperedge attributes with VilLain embeddings learned from hypergraph topology.

    Args:
        num_features: Dimensionality of the hyperedge embeddings to generate.
        num_nodes: Total number of nodes, including isolated nodes that do not appear in ``hyperedge_index``.
        num_hyperedges: Total number of hyperedges, including empty hyperedges that do not appear in ``hyperedge_index``.
        labels_per_subspace: Number of virtual labels per subspace. Defaults to ``2``.
        training_steps: Propagation steps used for VilLain self-supervised loss. Defaults to ``4``.
        generation_steps: Propagation steps averaged for final embeddings. Defaults to ``100``.
        tau: Gumbel-Softmax temperature. Defaults to ``1.0``.
        eps: Numerical stability constant. Defaults to ``1e-10``.
        num_epochs: Number of epochs used to optimize VilLain embeddings. Defaults to ``5``.
        learning_rate: Learning rate for embedding optimization. Defaults to ``0.01``.
        weight_decay: Weight decay for the optimizer. Defaults to ``0.0``.
        cache_dir: Optional directory to cache computed features. If ``None``, caching is disabled.
        verbose: Whether to print verbose output during training. Defaults to ``False``.
    """

    def __init__(
        self,
        num_features: int,
        num_nodes: int = 0,
        num_hyperedges: int = 0,
        labels_per_subspace: int = 2,
        training_steps: int = 4,
        generation_steps: int = 100,
        tau: float = 1.0,
        eps: float = 1e-10,
        num_epochs: int = 5,
        learning_rate: float = 0.01,
        weight_decay: float = 0.0,
        cache_dir: str | None = None,
        verbose: bool = False,
    ):
        HyperedgeAttrsEnricher.__init__(self, cache_dir=cache_dir)
        _VilLainTrainer.__init__(
            self,
            num_features=num_features,
            num_nodes=num_nodes,
            num_hyperedges=num_hyperedges,
            labels_per_subspace=labels_per_subspace,
            training_steps=training_steps,
            generation_steps=generation_steps,
            tau=tau,
            eps=eps,
            num_epochs=num_epochs,
            learning_rate=learning_rate,
            weight_decay=weight_decay,
            verbose=verbose,
        )

    def enrich(self, hyperedge_index: Tensor) -> Tensor:
        """
        Train VilLain on the hypergraph and return hyperedge embeddings.

        Args:
            hyperedge_index: Hyperedge index tensor of shape ``(2, num_hyperedges)``.

        Returns:
            Tensor of shape ``(num_hyperedges, num_features)`` containing VilLain hyperedge embeddings.
        """
        num_hyperedges = self._num_hyperedges(hyperedge_index)
        if num_hyperedges == 0:
            warnings.warn(
                "Found no hyperedges. Returning empty hyperedge attributes.",
                category=UserWarning,
                stacklevel=2,
            )
            return self._empty_features(hyperedge_index)

        model = self._train(hyperedge_index)
        model.eval()
        with torch.no_grad():
            hyperedge_attr = model.hyperedge_embeddings(
                hyperedge_index=hyperedge_index,
                num_hyperedges=num_hyperedges,
            )
        return hyperedge_attr.detach().to(hyperedge_index.device)

enrich(hyperedge_index)

Train VilLain on the hypergraph and return hyperedge embeddings.

Parameters:

Name Type Description Default
hyperedge_index Tensor

Hyperedge index tensor of shape (2, num_hyperedges).

required

Returns:

Type Description
Tensor

Tensor of shape (num_hyperedges, num_features) containing VilLain hyperedge embeddings.

Source code in hyperbench/nn/enricher.py
def enrich(self, hyperedge_index: Tensor) -> Tensor:
    """
    Train VilLain on the hypergraph and return hyperedge embeddings.

    Args:
        hyperedge_index: Hyperedge index tensor of shape ``(2, num_hyperedges)``.

    Returns:
        Tensor of shape ``(num_hyperedges, num_features)`` containing VilLain hyperedge embeddings.
    """
    num_hyperedges = self._num_hyperedges(hyperedge_index)
    if num_hyperedges == 0:
        warnings.warn(
            "Found no hyperedges. Returning empty hyperedge attributes.",
            category=UserWarning,
            stacklevel=2,
        )
        return self._empty_features(hyperedge_index)

    model = self._train(hyperedge_index)
    model.eval()
    with torch.no_grad():
        hyperedge_attr = model.hyperedge_embeddings(
            hyperedge_index=hyperedge_index,
            num_hyperedges=num_hyperedges,
        )
    return hyperedge_attr.detach().to(hyperedge_index.device)

VilLainEnricher

Bases: _VilLainTrainer, NodeEnricher

Enrich node features with VilLain embeddings learned from hypergraph topology.

Parameters:

Name Type Description Default
num_features int

Dimensionality of the node embeddings to generate.

required
num_nodes int

Total number of nodes, including isolated nodes that do not appear in hyperedge_index.

0
num_hyperedges int

Total number of hyperedges, including empty hyperedges that do not appear in hyperedge_index.

0
labels_per_subspace int

Number of virtual labels per subspace. Defaults to 2.

2
training_steps int

Propagation steps used for VilLain self-supervised loss. Defaults to 4.

4
generation_steps int

Propagation steps averaged for final embeddings. Defaults to 100.

100
tau float

Gumbel-Softmax temperature. Defaults to 1.0.

1.0
eps float

Numerical stability constant. Defaults to 1e-10.

1e-10
num_epochs int

Number of epochs used to optimize VilLain embeddings. Defaults to 5.

5
learning_rate float

Learning rate for embedding optimization. Defaults to 0.01.

0.01
weight_decay float

Weight decay for the optimizer. Defaults to 0.0.

0.0
cache_dir str | None

Optional directory to cache computed features. If None, caching is disabled.

None
verbose bool

Whether to print verbose output during training. Defaults to False.

False
Source code in hyperbench/nn/enricher.py
class VilLainEnricher(_VilLainTrainer, NodeEnricher):
    """
    Enrich node features with VilLain embeddings learned from hypergraph topology.

    Args:
        num_features: Dimensionality of the node embeddings to generate.
        num_nodes: Total number of nodes, including isolated nodes that do not appear in ``hyperedge_index``.
        num_hyperedges: Total number of hyperedges, including empty hyperedges that do not appear in ``hyperedge_index``.
        labels_per_subspace: Number of virtual labels per subspace. Defaults to ``2``.
        training_steps: Propagation steps used for VilLain self-supervised loss. Defaults to ``4``.
        generation_steps: Propagation steps averaged for final embeddings. Defaults to ``100``.
        tau: Gumbel-Softmax temperature. Defaults to ``1.0``.
        eps: Numerical stability constant. Defaults to ``1e-10``.
        num_epochs: Number of epochs used to optimize VilLain embeddings. Defaults to ``5``.
        learning_rate: Learning rate for embedding optimization. Defaults to ``0.01``.
        weight_decay: Weight decay for the optimizer. Defaults to ``0.0``.
        cache_dir: Optional directory to cache computed features. If ``None``, caching is disabled.
        verbose: Whether to print verbose output during training. Defaults to ``False``.
    """

    def __init__(
        self,
        num_features: int,
        num_nodes: int = 0,
        num_hyperedges: int = 0,
        labels_per_subspace: int = 2,
        training_steps: int = 4,
        generation_steps: int = 100,
        tau: float = 1.0,
        eps: float = 1e-10,
        num_epochs: int = 5,
        learning_rate: float = 0.01,
        weight_decay: float = 0.0,
        cache_dir: str | None = None,
        verbose: bool = False,
    ):
        NodeEnricher.__init__(self, cache_dir=cache_dir)
        _VilLainTrainer.__init__(
            self,
            num_features=num_features,
            num_nodes=num_nodes,
            num_hyperedges=num_hyperedges,
            labels_per_subspace=labels_per_subspace,
            training_steps=training_steps,
            generation_steps=generation_steps,
            tau=tau,
            eps=eps,
            num_epochs=num_epochs,
            learning_rate=learning_rate,
            weight_decay=weight_decay,
            verbose=verbose,
        )

    def enrich(self, hyperedge_index: Tensor) -> Tensor:
        """
        Train VilLain on the hypergraph and return node embeddings.

        Args:
            hyperedge_index: Hyperedge index tensor of shape ``(2, num_hyperedges)``.

        Returns:
            Tensor of shape ``(num_nodes, num_features)`` containing VilLain node embeddings.
        """
        num_nodes = self._num_nodes(hyperedge_index)
        if num_nodes == 0:
            warnings.warn(
                "Found no nodes. Returning empty node features.",
                category=UserWarning,
                stacklevel=2,
            )
            return self._empty_features(hyperedge_index)

        model = self._train(hyperedge_index)
        model.eval()
        with torch.no_grad():
            x = model.node_embeddings(
                hyperedge_index=hyperedge_index,
                num_hyperedges=self._num_hyperedges(hyperedge_index),
            )
        return x.detach().to(hyperedge_index.device)

enrich(hyperedge_index)

Train VilLain on the hypergraph and return node embeddings.

Parameters:

Name Type Description Default
hyperedge_index Tensor

Hyperedge index tensor of shape (2, num_hyperedges).

required

Returns:

Type Description
Tensor

Tensor of shape (num_nodes, num_features) containing VilLain node embeddings.

Source code in hyperbench/nn/enricher.py
def enrich(self, hyperedge_index: Tensor) -> Tensor:
    """
    Train VilLain on the hypergraph and return node embeddings.

    Args:
        hyperedge_index: Hyperedge index tensor of shape ``(2, num_hyperedges)``.

    Returns:
        Tensor of shape ``(num_nodes, num_features)`` containing VilLain node embeddings.
    """
    num_nodes = self._num_nodes(hyperedge_index)
    if num_nodes == 0:
        warnings.warn(
            "Found no nodes. Returning empty node features.",
            category=UserWarning,
            stacklevel=2,
        )
        return self._empty_features(hyperedge_index)

    model = self._train(hyperedge_index)
    model.eval()
    with torch.no_grad():
        x = model.node_embeddings(
            hyperedge_index=hyperedge_index,
            num_hyperedges=self._num_hyperedges(hyperedge_index),
        )
    return x.detach().to(hyperedge_index.device)

NHPRankingLoss

Bases: Module

Ranking loss that pushes positive hyperedges above sampled negatives.

Examples:

>>> logits = [2.0, 1.0, -1.0]
>>> labels = [1.0, 1.0, 0.0]
>>> loss = NHPRankingLoss()
>>> loss(logits, labels)
>>> loss.ndim
... 0
Source code in hyperbench/nn/loss.py
class NHPRankingLoss(nn.Module):
    """
    Ranking loss that pushes positive hyperedges above sampled negatives.

    Examples:
        >>> logits = [2.0, 1.0, -1.0]
        >>> labels = [1.0, 1.0, 0.0]
        >>> loss = NHPRankingLoss()
        >>> loss(logits, labels)
        >>> loss.ndim
        ... 0
    """

    def forward(self, logits: Tensor, labels: Tensor) -> Tensor:
        """
        Compute the ranking loss.

        Args:
            logits: Logit scores for each candidate hyperedge, of shape ``(num_hyperedges,)``.
            labels: Binary labels indicating positive (1) and negative (0) hyperedges, of shape ``(num_hyperedges,)``.

        Returns:
            Scalar loss value.
        """
        # Split logits by label as we need to compare positive scores against negative scores.
        # Example: logits = [2.0, 1.0, -1.0]
        #          labels = [1.0, 1.0, 0.0]
        #          -> positive_logits = [2.0, 1.0]
        #          -> negative_logits = [-1.0]
        positive_logits = logits[labels == 1]
        negative_logits = logits[labels == 0]

        positive_scores = torch.sigmoid(positive_logits)
        negative_scores = torch.sigmoid(negative_logits)
        if positive_scores.numel() == 0 or negative_scores.numel() == 0:
            raise ValueError("NHPRankingLoss requires both positive and negative hyperedges.")

        # Objective: enforce that each positive score is higher than the average negative score.
        # For each positive score pos_i:
        #   margin_i = mean(negative_scores) - pos_i
        # We interpret margin_i as follows:
        # - If pos_i > mean(negatives), then margin_i < 0    -> desirable
        # - If pos_i <= mean(negatives), then margin_i >= 0  -> violation
        margins = negative_scores.mean() - positive_scores

        # Then softplus(margin_i):
        # - Is ~0 when margin_i is strongly negative (good ranking).
        # - Grows smoothly when margin_i > 0 (penalizing violations).
        # Final loss is the average over all positive samples.
        return F.softplus(margins).mean()

forward(logits, labels)

Compute the ranking loss.

Parameters:

Name Type Description Default
logits Tensor

Logit scores for each candidate hyperedge, of shape (num_hyperedges,).

required
labels Tensor

Binary labels indicating positive (1) and negative (0) hyperedges, of shape (num_hyperedges,).

required

Returns:

Type Description
Tensor

Scalar loss value.

Source code in hyperbench/nn/loss.py
def forward(self, logits: Tensor, labels: Tensor) -> Tensor:
    """
    Compute the ranking loss.

    Args:
        logits: Logit scores for each candidate hyperedge, of shape ``(num_hyperedges,)``.
        labels: Binary labels indicating positive (1) and negative (0) hyperedges, of shape ``(num_hyperedges,)``.

    Returns:
        Scalar loss value.
    """
    # Split logits by label as we need to compare positive scores against negative scores.
    # Example: logits = [2.0, 1.0, -1.0]
    #          labels = [1.0, 1.0, 0.0]
    #          -> positive_logits = [2.0, 1.0]
    #          -> negative_logits = [-1.0]
    positive_logits = logits[labels == 1]
    negative_logits = logits[labels == 0]

    positive_scores = torch.sigmoid(positive_logits)
    negative_scores = torch.sigmoid(negative_logits)
    if positive_scores.numel() == 0 or negative_scores.numel() == 0:
        raise ValueError("NHPRankingLoss requires both positive and negative hyperedges.")

    # Objective: enforce that each positive score is higher than the average negative score.
    # For each positive score pos_i:
    #   margin_i = mean(negative_scores) - pos_i
    # We interpret margin_i as follows:
    # - If pos_i > mean(negatives), then margin_i < 0    -> desirable
    # - If pos_i <= mean(negatives), then margin_i >= 0  -> violation
    margins = negative_scores.mean() - positive_scores

    # Then softplus(margin_i):
    # - Is ~0 when margin_i is strongly negative (good ranking).
    # - Grows smoothly when margin_i > 0 (penalizing violations).
    # Final loss is the average over all positive samples.
    return F.softplus(margins).mean()

VilLainLoss

VilLain self-supervised loss formulas.

This class is intentionally stateless with respect to propagation. The VilLain model owns message passing and accumulation over steps and this class owns the per-step formulas for local and global loss,

Parameters:

Name Type Description Default
num_subspaces int

Number of virtual-label subspaces in each embedding.

required
labels_per_subspace int

Number of virtual labels in each subspace.

required
eps float

Numerical stability constant used in logarithms and cosine similarity.

1e-12
Source code in hyperbench/nn/loss.py
class VilLainLoss:
    """
    VilLain self-supervised loss formulas.

    This class is intentionally stateless with respect to propagation.
    The VilLain model owns message passing and accumulation over steps
    and this class owns the per-step formulas for local and global loss,

    Args:
        num_subspaces: Number of virtual-label subspaces in each embedding.
        labels_per_subspace: Number of virtual labels in each subspace.
        eps: Numerical stability constant used in logarithms and cosine similarity.
    """

    def __init__(
        self,
        num_subspaces: int,
        labels_per_subspace: int,
        eps: float = 1e-12,
    ) -> None:
        super().__init__()
        self.num_subspaces = num_subspaces
        self.labels_per_subspace = labels_per_subspace
        self.eps = eps

    def local_loss(self, node_embeddings: Tensor, hyperedge_embeddings: Tensor) -> Tensor:
        """
        Compute the local entropy loss for one propagation step.

        Local loss is minimized to encourage propagated node and hyperedge distributions
        to become confident within each virtual-label subspace.

        Args:
            node_embeddings: Propagated node states of shape ``(num_nodes, num_subspaces * labels_per_subspace)``.
            hyperedge_embeddings: Propagated hyperedge states with the same channel dimension as ``node_embeddings``.

        Returns:
            Scalar tensor containing node plus hyperedge entropy losses.
        """
        return self.entropy_loss(node_embeddings) + self.entropy_loss(hyperedge_embeddings)

    def global_loss(self, node_embeddings: Tensor, hyperedge_embeddings: Tensor) -> Tensor:
        """
        Compute global anti-collapse losses for one propagation step.

        Global loss combines negative global entropy, which encourages balanced label usage
        with a distinctiveness term that separates label columns inside each subspace.

        Args:
            node_embeddings: Propagated node states of shape ``(num_nodes, num_subspaces * labels_per_subspace)``.
            hyperedge_embeddings: Propagated hyperedge states with the same channel dimension as ``node_embeddings``.

        Returns:
            Scalar tensor containing node plus hyperedge global losses.
        """
        return (
            self.balance_loss(node_embeddings)
            + self.distinctiveness_loss(node_embeddings)
            + self.balance_loss(hyperedge_embeddings)
            + self.distinctiveness_loss(hyperedge_embeddings)
        )

    def total_loss(self, local_loss: Tensor, global_loss: Tensor) -> Tensor:
        """
        Combine accumulated local and global VilLain losses.

        Args:
            local_loss: Accumulated local entropy loss.
            global_loss: Accumulated balance plus distinctiveness loss.

        Returns:
            Scalar tensor to minimize.
        """
        return local_loss + global_loss

    def entropy_loss(self, x: Tensor) -> Tensor:
        """
        Compute mean entropy within each virtual-label subspace.

        Args:
            x: Flattened virtual-label probabilities of shape ``(num_items, num_subspaces * labels_per_subspace)``.

        Returns:
            Scalar entropy loss.
        """
        if x.size(0) == 0:
            return x.sum() * 0.0

        # Example: x.shape = (num_nodes, 8)
        #          -> probs.shape = (num_nodes, 4, 2)
        #          probs[0, 0] = [0.12, 0.88] is node/hyperedge item 0's
        #          virtual-label distribution in subspace 0.
        probs = x.view(-1, self.num_subspaces, self.labels_per_subspace)

        # With this, we induce structurally close nodes (or hyperedges) to be assigned to the same label.
        # Example: probs.shape = (num_nodes, 4, 2)
        #          -> entropy.shape = (num_nodes, 4), one entropy per item and subspace
        entropy = -(probs * torch.log(probs + self.eps)).sum(dim=2)
        return entropy.mean()

    def balance_loss(self, x: Tensor) -> Tensor:
        """
        Compute negative entropy of global virtual-label usage.

        This term is minimized, so the negative sign makes optimization maximize entropy of average label usage
        and reduces collapse to one virtual label.

        Args:
            x: Flattened virtual-label probabilities of shape ``(num_items, num_subspaces * labels_per_subspace)``.

        Returns:
            Scalar balance loss.
        """
        if x.size(0) == 0:
            return x.sum() * 0.0

        # Example: with raw_embedding_dim=8, num_subspaces=4, labels_per_subspace=2:
        #          x.shape = (num_nodes, 8)
        #          -> probs.shape = (num_nodes, 4, 2)
        #          -> mean_probs.shape = (4, 2)
        #          mean_probs[0] = average usage of the two labels in subspace 0
        #          across all num_nodes nodes/hyperedges in this tensor.
        probs = x.view(-1, self.num_subspaces, self.labels_per_subspace)
        mean_probs = probs.mean(dim=0)

        # Negative entropy to maximize global label diversity and prevents collapse.
        # Example: mean_probs[0] = [0.50, 0.50] has higher entropy than mean_probs[0] = [0.99, 0.01].
        entropy = -(mean_probs * torch.log(mean_probs + self.eps)).sum(dim=1)
        return -entropy.mean()

    def distinctiveness_loss(self, x: Tensor) -> Tensor:
        """
        Penalize similar virtual-label columns inside each subspace.

        For every subspace, this compares all label columns across items with cosine similarity and applies a diagonal classification objective.
        The diagonal target encourages each label column to be most similar to itself and less similar to other labels.

        Args:
            x: Flattened virtual-label probabilities of shape ``(num_items, num_subspaces * labels_per_subspace)``.

        Returns:
            Scalar distinctiveness loss.
        """
        if x.size(0) == 0:
            return x.sum() * 0.0

        # Distinctiveness compares virtual-label columns inside each subspace across all items.
        # Example: with raw_embedding_dim=8, num_subspaces=4, labels_per_subspace=2:
        #          x.shape = (num_nodes, 8)
        #          -> probs.shape = (num_nodes, 4, 2)
        probs = x.view(-1, self.num_subspaces, self.labels_per_subspace)

        # Build all ordered pairs of virtual-label column ids inside a subspace.
        # Example with num_subspaces=4 and labels_per_subspace=2:
        #         idx_i = [0, 1, 0, 1], shape = (4,)
        #         idx_j = [0, 0, 1, 1], shape = (4,)
        #         pairs are (0,0), (1,0), (0,1), (1,1)
        idx_i = torch.arange(self.labels_per_subspace, device=x.device).repeat(
            self.labels_per_subspace
        )
        idx_j = torch.arange(self.labels_per_subspace, device=x.device).repeat_interleave(
            self.labels_per_subspace
        )

        # Compare every virtual-label column against every other column.
        # Two different labels in the same subspace should not describe the same pattern of nodes/hyperedges.
        # Example: with num_subspaces=4:
        #          probs[:, :, idx_i] and probs[:, :, idx_j] both have shape (4, 4, 4),
        #          where the last dimension enumerates the four ordered label pairs above
        #          probs[:, :, idx_i] == [[[p00, p01, p00, p01],   # node/hyperedge 0's label probabilities for the four pairs
        #                                  [p10, p11, p10, p11],   # node/hyperedge 1's label probabilities for the four pairs
        #                                  [p20, p21, p20, p21],   # node/hyperedge 2's label probabilities for the four pairs
        #                                  [p30, p31, p30, p31]],  # node/hyperedge 3's label probabilities for the four pairs
        #                                 ...]
        #          probs[:, :, idx_j] == [[[p00, p00, p01, p01],   # node/hyperedge 0's label probabilities for the four pairs
        #                                  [p10, p10, p11, p11],   # node/hyperedge 1's label probabilities for the four pairs
        #                                  [p20, p20, p21, p21],   # node/hyperedge 2's label probabilities for the four pairs
        #                                  [p30, p30, p31, p31]],  # node/hyperedge 3's label probabilities for the four pairs
        #                                 ...]
        #          F.cosine_similarity(..., dim=0) compares each pair across the 4 items, producing shape (4, 4)
        #          view(-1, 2, 2) restores one 2x2 similarity matrix per subspace, so shape becomes (4, 2, 2)
        similarity = F.cosine_similarity(
            probs[:, :, idx_i],
            probs[:, :, idx_j],
            dim=0,
            eps=self.eps,
        ).view(-1, self.labels_per_subspace, self.labels_per_subspace)

        # Turn each similarity row into a classification distribution and keep the diagonal self-match probabilities.
        # Example: similarity[subspace 0].shape = (2, 2)
        #          - row 0 scores how label 0 matches labels [0, 1]
        #          - row 1 scores how label 1 matches labels [0, 1]
        #          -> assignment_probs has rows summing to 1 via softmax(dim=2)
        #          -> diagonal_probs keeps P(label 0 matches 0) and P(label 1 matches 1).
        # Minimizing -log(diagonal_probs) encourages each label column to be:
        # - Most similar to itself
        # - Less similar to other label columns
        assignment_probs = torch.softmax(similarity, dim=2)
        diagonal_probs = torch.diagonal(assignment_probs, dim1=1, dim2=2)
        return torch.mean(-torch.log(diagonal_probs + self.eps))

local_loss(node_embeddings, hyperedge_embeddings)

Compute the local entropy loss for one propagation step.

Local loss is minimized to encourage propagated node and hyperedge distributions to become confident within each virtual-label subspace.

Parameters:

Name Type Description Default
node_embeddings Tensor

Propagated node states of shape (num_nodes, num_subspaces * labels_per_subspace).

required
hyperedge_embeddings Tensor

Propagated hyperedge states with the same channel dimension as node_embeddings.

required

Returns:

Type Description
Tensor

Scalar tensor containing node plus hyperedge entropy losses.

Source code in hyperbench/nn/loss.py
def local_loss(self, node_embeddings: Tensor, hyperedge_embeddings: Tensor) -> Tensor:
    """
    Compute the local entropy loss for one propagation step.

    Local loss is minimized to encourage propagated node and hyperedge distributions
    to become confident within each virtual-label subspace.

    Args:
        node_embeddings: Propagated node states of shape ``(num_nodes, num_subspaces * labels_per_subspace)``.
        hyperedge_embeddings: Propagated hyperedge states with the same channel dimension as ``node_embeddings``.

    Returns:
        Scalar tensor containing node plus hyperedge entropy losses.
    """
    return self.entropy_loss(node_embeddings) + self.entropy_loss(hyperedge_embeddings)

global_loss(node_embeddings, hyperedge_embeddings)

Compute global anti-collapse losses for one propagation step.

Global loss combines negative global entropy, which encourages balanced label usage with a distinctiveness term that separates label columns inside each subspace.

Parameters:

Name Type Description Default
node_embeddings Tensor

Propagated node states of shape (num_nodes, num_subspaces * labels_per_subspace).

required
hyperedge_embeddings Tensor

Propagated hyperedge states with the same channel dimension as node_embeddings.

required

Returns:

Type Description
Tensor

Scalar tensor containing node plus hyperedge global losses.

Source code in hyperbench/nn/loss.py
def global_loss(self, node_embeddings: Tensor, hyperedge_embeddings: Tensor) -> Tensor:
    """
    Compute global anti-collapse losses for one propagation step.

    Global loss combines negative global entropy, which encourages balanced label usage
    with a distinctiveness term that separates label columns inside each subspace.

    Args:
        node_embeddings: Propagated node states of shape ``(num_nodes, num_subspaces * labels_per_subspace)``.
        hyperedge_embeddings: Propagated hyperedge states with the same channel dimension as ``node_embeddings``.

    Returns:
        Scalar tensor containing node plus hyperedge global losses.
    """
    return (
        self.balance_loss(node_embeddings)
        + self.distinctiveness_loss(node_embeddings)
        + self.balance_loss(hyperedge_embeddings)
        + self.distinctiveness_loss(hyperedge_embeddings)
    )

total_loss(local_loss, global_loss)

Combine accumulated local and global VilLain losses.

Parameters:

Name Type Description Default
local_loss Tensor

Accumulated local entropy loss.

required
global_loss Tensor

Accumulated balance plus distinctiveness loss.

required

Returns:

Type Description
Tensor

Scalar tensor to minimize.

Source code in hyperbench/nn/loss.py
def total_loss(self, local_loss: Tensor, global_loss: Tensor) -> Tensor:
    """
    Combine accumulated local and global VilLain losses.

    Args:
        local_loss: Accumulated local entropy loss.
        global_loss: Accumulated balance plus distinctiveness loss.

    Returns:
        Scalar tensor to minimize.
    """
    return local_loss + global_loss

entropy_loss(x)

Compute mean entropy within each virtual-label subspace.

Parameters:

Name Type Description Default
x Tensor

Flattened virtual-label probabilities of shape (num_items, num_subspaces * labels_per_subspace).

required

Returns:

Type Description
Tensor

Scalar entropy loss.

Source code in hyperbench/nn/loss.py
def entropy_loss(self, x: Tensor) -> Tensor:
    """
    Compute mean entropy within each virtual-label subspace.

    Args:
        x: Flattened virtual-label probabilities of shape ``(num_items, num_subspaces * labels_per_subspace)``.

    Returns:
        Scalar entropy loss.
    """
    if x.size(0) == 0:
        return x.sum() * 0.0

    # Example: x.shape = (num_nodes, 8)
    #          -> probs.shape = (num_nodes, 4, 2)
    #          probs[0, 0] = [0.12, 0.88] is node/hyperedge item 0's
    #          virtual-label distribution in subspace 0.
    probs = x.view(-1, self.num_subspaces, self.labels_per_subspace)

    # With this, we induce structurally close nodes (or hyperedges) to be assigned to the same label.
    # Example: probs.shape = (num_nodes, 4, 2)
    #          -> entropy.shape = (num_nodes, 4), one entropy per item and subspace
    entropy = -(probs * torch.log(probs + self.eps)).sum(dim=2)
    return entropy.mean()

balance_loss(x)

Compute negative entropy of global virtual-label usage.

This term is minimized, so the negative sign makes optimization maximize entropy of average label usage and reduces collapse to one virtual label.

Parameters:

Name Type Description Default
x Tensor

Flattened virtual-label probabilities of shape (num_items, num_subspaces * labels_per_subspace).

required

Returns:

Type Description
Tensor

Scalar balance loss.

Source code in hyperbench/nn/loss.py
def balance_loss(self, x: Tensor) -> Tensor:
    """
    Compute negative entropy of global virtual-label usage.

    This term is minimized, so the negative sign makes optimization maximize entropy of average label usage
    and reduces collapse to one virtual label.

    Args:
        x: Flattened virtual-label probabilities of shape ``(num_items, num_subspaces * labels_per_subspace)``.

    Returns:
        Scalar balance loss.
    """
    if x.size(0) == 0:
        return x.sum() * 0.0

    # Example: with raw_embedding_dim=8, num_subspaces=4, labels_per_subspace=2:
    #          x.shape = (num_nodes, 8)
    #          -> probs.shape = (num_nodes, 4, 2)
    #          -> mean_probs.shape = (4, 2)
    #          mean_probs[0] = average usage of the two labels in subspace 0
    #          across all num_nodes nodes/hyperedges in this tensor.
    probs = x.view(-1, self.num_subspaces, self.labels_per_subspace)
    mean_probs = probs.mean(dim=0)

    # Negative entropy to maximize global label diversity and prevents collapse.
    # Example: mean_probs[0] = [0.50, 0.50] has higher entropy than mean_probs[0] = [0.99, 0.01].
    entropy = -(mean_probs * torch.log(mean_probs + self.eps)).sum(dim=1)
    return -entropy.mean()

distinctiveness_loss(x)

Penalize similar virtual-label columns inside each subspace.

For every subspace, this compares all label columns across items with cosine similarity and applies a diagonal classification objective. The diagonal target encourages each label column to be most similar to itself and less similar to other labels.

Parameters:

Name Type Description Default
x Tensor

Flattened virtual-label probabilities of shape (num_items, num_subspaces * labels_per_subspace).

required

Returns:

Type Description
Tensor

Scalar distinctiveness loss.

Source code in hyperbench/nn/loss.py
def distinctiveness_loss(self, x: Tensor) -> Tensor:
    """
    Penalize similar virtual-label columns inside each subspace.

    For every subspace, this compares all label columns across items with cosine similarity and applies a diagonal classification objective.
    The diagonal target encourages each label column to be most similar to itself and less similar to other labels.

    Args:
        x: Flattened virtual-label probabilities of shape ``(num_items, num_subspaces * labels_per_subspace)``.

    Returns:
        Scalar distinctiveness loss.
    """
    if x.size(0) == 0:
        return x.sum() * 0.0

    # Distinctiveness compares virtual-label columns inside each subspace across all items.
    # Example: with raw_embedding_dim=8, num_subspaces=4, labels_per_subspace=2:
    #          x.shape = (num_nodes, 8)
    #          -> probs.shape = (num_nodes, 4, 2)
    probs = x.view(-1, self.num_subspaces, self.labels_per_subspace)

    # Build all ordered pairs of virtual-label column ids inside a subspace.
    # Example with num_subspaces=4 and labels_per_subspace=2:
    #         idx_i = [0, 1, 0, 1], shape = (4,)
    #         idx_j = [0, 0, 1, 1], shape = (4,)
    #         pairs are (0,0), (1,0), (0,1), (1,1)
    idx_i = torch.arange(self.labels_per_subspace, device=x.device).repeat(
        self.labels_per_subspace
    )
    idx_j = torch.arange(self.labels_per_subspace, device=x.device).repeat_interleave(
        self.labels_per_subspace
    )

    # Compare every virtual-label column against every other column.
    # Two different labels in the same subspace should not describe the same pattern of nodes/hyperedges.
    # Example: with num_subspaces=4:
    #          probs[:, :, idx_i] and probs[:, :, idx_j] both have shape (4, 4, 4),
    #          where the last dimension enumerates the four ordered label pairs above
    #          probs[:, :, idx_i] == [[[p00, p01, p00, p01],   # node/hyperedge 0's label probabilities for the four pairs
    #                                  [p10, p11, p10, p11],   # node/hyperedge 1's label probabilities for the four pairs
    #                                  [p20, p21, p20, p21],   # node/hyperedge 2's label probabilities for the four pairs
    #                                  [p30, p31, p30, p31]],  # node/hyperedge 3's label probabilities for the four pairs
    #                                 ...]
    #          probs[:, :, idx_j] == [[[p00, p00, p01, p01],   # node/hyperedge 0's label probabilities for the four pairs
    #                                  [p10, p10, p11, p11],   # node/hyperedge 1's label probabilities for the four pairs
    #                                  [p20, p20, p21, p21],   # node/hyperedge 2's label probabilities for the four pairs
    #                                  [p30, p30, p31, p31]],  # node/hyperedge 3's label probabilities for the four pairs
    #                                 ...]
    #          F.cosine_similarity(..., dim=0) compares each pair across the 4 items, producing shape (4, 4)
    #          view(-1, 2, 2) restores one 2x2 similarity matrix per subspace, so shape becomes (4, 2, 2)
    similarity = F.cosine_similarity(
        probs[:, :, idx_i],
        probs[:, :, idx_j],
        dim=0,
        eps=self.eps,
    ).view(-1, self.labels_per_subspace, self.labels_per_subspace)

    # Turn each similarity row into a classification distribution and keep the diagonal self-match probabilities.
    # Example: similarity[subspace 0].shape = (2, 2)
    #          - row 0 scores how label 0 matches labels [0, 1]
    #          - row 1 scores how label 1 matches labels [0, 1]
    #          -> assignment_probs has rows summing to 1 via softmax(dim=2)
    #          -> diagonal_probs keeps P(label 0 matches 0) and P(label 1 matches 1).
    # Minimizing -log(diagonal_probs) encourages each label column to be:
    # - Most similar to itself
    # - Less similar to other label columns
    assignment_probs = torch.softmax(similarity, dim=2)
    diagonal_probs = torch.diagonal(assignment_probs, dim1=1, dim2=2)
    return torch.mean(-torch.log(diagonal_probs + self.eps))

VilLainLossParts

Bases: TypedDict

Named VilLain self-supervised loss parts returned by VilLain.loss.

Parameters:

Name Type Description Default
local_loss

Sum of node and hyperedge local entropy losses over all training propagation steps.

required
global_loss

Sum of balance and distinctiveness losses over all training propagation steps.

required
Source code in hyperbench/nn/loss.py
class VilLainLossParts(TypedDict):
    """
    Named VilLain self-supervised loss parts returned by ``VilLain.loss``.

    Args:
        local_loss: Sum of node and hyperedge local entropy losses over all training propagation steps.
        global_loss: Sum of balance and distinctiveness losses over all training propagation steps.
    """

    local_loss: Tensor
    global_loss: Tensor

CommonNeighborsScorer

Bases: NeighborScorer

Source code in hyperbench/nn/scorer.py
class CommonNeighborsScorer(NeighborScorer):
    __DEFAULT_SCORE = 0.0

    def __init__(self, aggregation: Literal["mean", "min", "sum"]) -> None:
        self.aggregation = aggregation

    def score(
        self,
        candidate_nodes: list[int],
        candidate_to_neighbors: dict[int, Neighborhood],
    ) -> float:
        """
        Compute the CN score for a single candidate hyperedge.

        Args:
            candidate_nodes: List of node IDs forming the candidate hyperedge.
                If less than 2 nodes are provided, the function returns a default score of ``0.0``.
            candidate_to_neighbors: Mapping from node IDs to their set of neighbors.

        Returns:
            The aggregated common neighbors score.
        """
        if len(candidate_nodes) < 2:
            return self.__DEFAULT_SCORE

        pairwise_counts: list[int] = []
        candidates_tensor = torch.tensor(candidate_nodes)

        # Example: candidate_nodes = [1, 2, 3]
        #          -> compute common neighbors for pairs (1, 2), (1, 3), and (2, 3)
        for u, v in torch.combinations(candidates_tensor, 2):
            neighbors_u: Neighborhood = candidate_to_neighbors.get(u.item(), set())
            neighbors_v: Neighborhood = candidate_to_neighbors.get(v.item(), set())

            common_neighbors = neighbors_u & neighbors_v
            pairwise_counts.append(len(common_neighbors))

        return self.__to_score_by_aggregation(pairwise_counts)

    def score_batch(
        self,
        hyperedge_index: Tensor,
        node_to_neighbors: dict[int, Neighborhood] | None = None,
    ) -> Tensor:
        """
        Score all hyperedges in a hyperedge index tensor.

        Args:
            hyperedge_index: Tensor of shape ``(2, |E|)``.
            node_to_neighbors: Optional precomputed node to neighborhood mapping. If None, it will be computed from ``hyperedge_index``.

        Returns:
            A 1-D tensor of shape ``(num_hyperedges,)`` with the CN score for each hyperedge.
        """
        if node_to_neighbors is None:
            node_to_neighbors = Hypergraph.from_hyperedge_index(hyperedge_index).neighbors_of_all()

        scores: list[float] = []
        hyperedge_index_wrapper = HyperedgeIndex(hyperedge_index)
        for hyperedge_id in range(hyperedge_index_wrapper.num_hyperedges):
            node_ids = hyperedge_index_wrapper.nodes_in(hyperedge_id)
            hyperedge_score = self.score(node_ids, node_to_neighbors)
            scores.append(hyperedge_score)

        return torch.tensor(scores, dtype=torch.float32, device=hyperedge_index.device)

    def __to_score_by_aggregation(self, pairwise_counts: list[int]) -> float:
        score = self.__DEFAULT_SCORE
        if len(pairwise_counts) < 1:
            return score

        match self.aggregation:
            case "mean":
                score = sum(pairwise_counts) / len(pairwise_counts)
            case "min":
                score = float(min(pairwise_counts))
            case "sum":
                score = float(sum(pairwise_counts))

        return score

score(candidate_nodes, candidate_to_neighbors)

Compute the CN score for a single candidate hyperedge.

Parameters:

Name Type Description Default
candidate_nodes list[int]

List of node IDs forming the candidate hyperedge. If less than 2 nodes are provided, the function returns a default score of 0.0.

required
candidate_to_neighbors dict[int, Neighborhood]

Mapping from node IDs to their set of neighbors.

required

Returns:

Type Description
float

The aggregated common neighbors score.

Source code in hyperbench/nn/scorer.py
def score(
    self,
    candidate_nodes: list[int],
    candidate_to_neighbors: dict[int, Neighborhood],
) -> float:
    """
    Compute the CN score for a single candidate hyperedge.

    Args:
        candidate_nodes: List of node IDs forming the candidate hyperedge.
            If less than 2 nodes are provided, the function returns a default score of ``0.0``.
        candidate_to_neighbors: Mapping from node IDs to their set of neighbors.

    Returns:
        The aggregated common neighbors score.
    """
    if len(candidate_nodes) < 2:
        return self.__DEFAULT_SCORE

    pairwise_counts: list[int] = []
    candidates_tensor = torch.tensor(candidate_nodes)

    # Example: candidate_nodes = [1, 2, 3]
    #          -> compute common neighbors for pairs (1, 2), (1, 3), and (2, 3)
    for u, v in torch.combinations(candidates_tensor, 2):
        neighbors_u: Neighborhood = candidate_to_neighbors.get(u.item(), set())
        neighbors_v: Neighborhood = candidate_to_neighbors.get(v.item(), set())

        common_neighbors = neighbors_u & neighbors_v
        pairwise_counts.append(len(common_neighbors))

    return self.__to_score_by_aggregation(pairwise_counts)

score_batch(hyperedge_index, node_to_neighbors=None)

Score all hyperedges in a hyperedge index tensor.

Parameters:

Name Type Description Default
hyperedge_index Tensor

Tensor of shape (2, |E|).

required
node_to_neighbors dict[int, Neighborhood] | None

Optional precomputed node to neighborhood mapping. If None, it will be computed from hyperedge_index.

None

Returns:

Type Description
Tensor

A 1-D tensor of shape (num_hyperedges,) with the CN score for each hyperedge.

Source code in hyperbench/nn/scorer.py
def score_batch(
    self,
    hyperedge_index: Tensor,
    node_to_neighbors: dict[int, Neighborhood] | None = None,
) -> Tensor:
    """
    Score all hyperedges in a hyperedge index tensor.

    Args:
        hyperedge_index: Tensor of shape ``(2, |E|)``.
        node_to_neighbors: Optional precomputed node to neighborhood mapping. If None, it will be computed from ``hyperedge_index``.

    Returns:
        A 1-D tensor of shape ``(num_hyperedges,)`` with the CN score for each hyperedge.
    """
    if node_to_neighbors is None:
        node_to_neighbors = Hypergraph.from_hyperedge_index(hyperedge_index).neighbors_of_all()

    scores: list[float] = []
    hyperedge_index_wrapper = HyperedgeIndex(hyperedge_index)
    for hyperedge_id in range(hyperedge_index_wrapper.num_hyperedges):
        node_ids = hyperedge_index_wrapper.nodes_in(hyperedge_id)
        hyperedge_score = self.score(node_ids, node_to_neighbors)
        scores.append(hyperedge_score)

    return torch.tensor(scores, dtype=torch.float32, device=hyperedge_index.device)