Models¶

`hyperbench.models` ¶

`CommonNeighbors` ¶

Bases: Module

Source code in hyperbench/models/common_neighbors.py

class CommonNeighbors(nn.Module):
    def __init__(
        self,
        aggregation: Literal["mean", "min", "sum"],
        scorer: NeighborScorer | None = None,
    ) -> None:
        super().__init__()
        self.scorer = scorer if scorer is not None else CommonNeighborsScorer(aggregation)

    def forward(
        self,
        hyperedge_index: Tensor,
        node_to_neighbors: dict[int, Neighborhood] | None = None,
    ) -> Tensor:
        """
        Compute CN scores for all hyperedges in the batch.

        Args:
            hyperedge_index: Tensor containing the hyperedge indices.
            node_to_neighbors: Optional mapping from nodes to their neighborhoods.

        Returns:
            A 1-D tensor of shape (num_hyperedges,) with CN scores.
        """
        scores = self.scorer.score_batch(hyperedge_index, node_to_neighbors)
        torch.log1p(scores, out=scores)
        return scores

`forward(hyperedge_index, node_to_neighbors=None)` ¶

Compute CN scores for all hyperedges in the batch.

Parameters:

Name	Type	Description	Default
`hyperedge_index`	`Tensor`	Tensor containing the hyperedge indices.	required
`node_to_neighbors`	`dict[int, Neighborhood] \| None`	Optional mapping from nodes to their neighborhoods.	`None`

Returns:

Type	Description
`Tensor`	A 1-D tensor of shape (num_hyperedges,) with CN scores.

Source code in hyperbench/models/common_neighbors.py

def forward(
    self,
    hyperedge_index: Tensor,
    node_to_neighbors: dict[int, Neighborhood] | None = None,
) -> Tensor:
    """
    Compute CN scores for all hyperedges in the batch.

    Args:
        hyperedge_index: Tensor containing the hyperedge indices.
        node_to_neighbors: Optional mapping from nodes to their neighborhoods.

    Returns:
        A 1-D tensor of shape (num_hyperedges,) with CN scores.
    """
    scores = self.scorer.score_batch(hyperedge_index, node_to_neighbors)
    torch.log1p(scores, out=scores)
    return scores

`GCN` ¶

Bases: Module

A reusable multi-layer GCN stack built from torch_geometric.nn.GCNConv.

Parameters:

Name	Type	Description	Default
`in_channels`	`int`	Dimension of the input node embeddings to the GCN layers.	required
`out_channels`	`int`	Dimension of the output node embeddings from the GCN layers.	required
`hidden_channels`	`int \| None`	Dimension of the hidden node embeddings in the GCN layers. Defaults to `in_channels`.	`None`
`num_layers`	`int`	Number of GCN layers. Must be at least 1. Defaults to `2`.	`2`
`drop_rate`	`float`	Dropout rate applied after each GCN layer except the last one.	`0.0`
`bias`	`bool`	Whether to include a bias term in the GCN layers.	`True`
`activation_fn`	`ActivationFn \| None`	Activation function to use after each hidden layer. Defaults to `nn.ReLU`.	`None`
`activation_fn_kwargs`	`dict \| None`	Keyword arguments for the activation function. Defaults to empty dict.	`None`
`improved`	`bool`	Whether to use the improved version of `GCNConv`.	`False`
`add_self_loops`	`bool`	Whether to add self-loops to the input graph.	`True`
`normalize`	`bool`	Whether to symmetrically normalize the adjacency matrix in `GCNConv`.	`True`
`cached`	`bool`	Whether to cache the normalized adjacency matrix in `GCNConv`.	`False`

Source code in hyperbench/models/gcn.py

class GCN(nn.Module):
    """
    A reusable multi-layer GCN stack built from ``torch_geometric.nn.GCNConv``.

    Args:
        in_channels: Dimension of the input node embeddings to the GCN layers.
        out_channels: Dimension of the output node embeddings from the GCN layers.
        hidden_channels: Dimension of the hidden node embeddings in the GCN layers.
            Defaults to ``in_channels``.
        num_layers: Number of GCN layers. Must be at least 1. Defaults to ``2``.
        drop_rate: Dropout rate applied after each GCN layer except the last one.
        bias: Whether to include a bias term in the GCN layers.
        activation_fn: Activation function to use after each hidden layer. Defaults to ``nn.ReLU``.
        activation_fn_kwargs: Keyword arguments for the activation function. Defaults to empty dict.
        improved: Whether to use the improved version of ``GCNConv``.
        add_self_loops: Whether to add self-loops to the input graph.
        normalize: Whether to symmetrically normalize the adjacency matrix in ``GCNConv``.
        cached: Whether to cache the normalized adjacency matrix in ``GCNConv``.
    """

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        hidden_channels: int | None = None,
        num_layers: int = 2,
        drop_rate: float = 0.0,
        bias: bool = True,
        activation_fn: ActivationFn | None = None,
        activation_fn_kwargs: dict | None = None,
        improved: bool = False,
        add_self_loops: bool = True,
        normalize: bool = True,
        cached: bool = False,
    ):
        super().__init__()
        activation_fn = activation_fn if activation_fn is not None else nn.ReLU
        activation_fn_kwargs = activation_fn_kwargs if activation_fn_kwargs is not None else {}

        self.dropout = nn.Dropout(drop_rate)
        self.activation = activation_fn(**activation_fn_kwargs)
        self.layers = self.__build_layers(
            in_channels=in_channels,
            out_channels=out_channels,
            hidden_channels=hidden_channels,
            num_layers=num_layers,
            bias=bias,
            improved=improved,
            add_self_loops=add_self_loops,
            normalize=normalize,
            cached=cached,
        )

    def forward(self, x: Tensor, edge_index: Tensor) -> Tensor:
        num_layers = len(self.layers)
        for idx, layer in enumerate(self.layers):
            x = layer(x, edge_index)

            is_not_last_layer = not is_layer(idx, num_layers - 1)
            if is_not_last_layer:
                x = self.activation(x)
                x = self.dropout(x)

        return x

    def __build_layers(
        self,
        in_channels: int,
        out_channels: int,
        hidden_channels: int | None,
        num_layers: int,
        bias: bool,
        improved: bool,
        add_self_loops: bool,
        normalize: bool,
        cached: bool,
    ) -> nn.ModuleList:
        if num_layers < 1:
            raise ValueError(f"Expected num_layers >= 1 for GCN, got {num_layers}.")

        hidden_channels = hidden_channels if hidden_channels is not None else 0
        if num_layers > 1 and hidden_channels <= 0:
            raise ValueError(
                f"Expected positive hidden_channels for GCN with multiple layers, got {hidden_channels}."
            )

        common_kwargs: dict[str, bool] = {
            "bias": bias,
            "improved": improved,
            "add_self_loops": add_self_loops,
            "normalize": normalize,
            "cached": cached,
        }

        if num_layers == 1:
            return nn.ModuleList([GCNConv(in_channels, out_channels, **common_kwargs)])

        layers = [GCNConv(in_channels, hidden_channels, **common_kwargs)]
        layers.extend(
            GCNConv(hidden_channels, hidden_channels, **common_kwargs)
            for _ in range(num_layers - 2)
        )
        layers.append(GCNConv(hidden_channels, out_channels, **common_kwargs))

        return nn.ModuleList(layers)

`GCNConfig` ¶

Bases: TypedDict

Configuration for the GCN model.

Parameters:

Name	Description	Default
`in_channels`	Dimension of the input node embeddings to the GCN layers.	required
`out_channels`	Dimension of the output node embeddings from the GCN layers.	required
`hidden_channels`	Dimension of the hidden node embeddings in the GCN layers.	required
`num_layers`	Number of GCN layers. Must be at least 1. Defaults to `2`.	required
`drop_rate`	Dropout rate applied after each GCN layer (except the last one). Defaults to `0.0` (no dropout).	required
`activation_fn`	Activation function to use after each hidden layer. Defaults to `nn.ReLU`.	required
`activation_fn_kwargs`	Keyword arguments for the activation function. Defaults to empty dict.	required
`bias`	Whether to include a bias term in the GCN layers. Defaults to `True`.	required
`improved`	Whether to use the improved version of GCNConv. Defaults to `False`.	required
`add_self_loops`	Whether to add self-loops to the input graph. Defaults to `True`.	required
`normalize`	Whether to symmetrically normalize the adjacency matrix in GCNConv. Defaults to `True`.	required
`cached`	Whether to cache the normalized adjacency matrix in GCNConv. Only applicable if the graph structure does not change between epochs. Defaults to `False`.	required

Source code in hyperbench/models/gcn.py

class GCNConfig(TypedDict):
    """
    Configuration for the GCN model.

    Args:
        in_channels: Dimension of the input node embeddings to the GCN layers.
        out_channels: Dimension of the output node embeddings from the GCN layers.
        hidden_channels: Dimension of the hidden node embeddings in the GCN layers.
        num_layers: Number of GCN layers. Must be at least 1. Defaults to ``2``.
        drop_rate: Dropout rate applied after each GCN layer (except the last one). Defaults to ``0.0`` (no dropout).
        activation_fn: Activation function to use after each hidden layer. Defaults to ``nn.ReLU``.
        activation_fn_kwargs: Keyword arguments for the activation function. Defaults to empty dict.
        bias: Whether to include a bias term in the GCN layers. Defaults to ``True``.
        improved: Whether to use the improved version of GCNConv. Defaults to ``False``.
        add_self_loops: Whether to add self-loops to the input graph. Defaults to ``True``.
        normalize: Whether to symmetrically normalize the adjacency matrix in GCNConv. Defaults to ``True``.
        cached: Whether to cache the normalized adjacency matrix in GCNConv.
            Only applicable if the graph structure does not change between epochs. Defaults to ``False``.
    """

    in_channels: int
    out_channels: int
    hidden_channels: NotRequired[int]
    num_layers: NotRequired[int]
    drop_rate: NotRequired[float]
    bias: NotRequired[bool]
    activation_fn: NotRequired[ActivationFn]
    activation_fn_kwargs: NotRequired[dict]
    improved: NotRequired[bool]
    add_self_loops: NotRequired[bool]
    normalize: NotRequired[bool]
    cached: NotRequired[bool]

`HGNN` ¶

Bases: Module

HGNN performs spectral convolution directly on the hypergraph structure using the node-hyperedge incidence matrix, without any reduction to a pairwise graph. Unlike HyperGCN (which approximates each hyperedge by selecting representative pairwise edges via random projection), HGNN preserves all higher-order relationships by passing messages through the full incidence structure: nodes -> hyperedges -> nodes. - Proposed in Hypergraph Neural Networks <https://arxiv.org/pdf/1809.09401>_ paper (AAAI 2019). - Reference implementation: source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hgnn.html#HGNN>_.

Parameters:

Name	Type	Description	Default
`in_channels`	`int`	The number of input channels.	required
`hidden_channels`	`int`	The number of hidden channels.	required
`num_classes`	`int`	The number of output channels.	required
`bias`	`bool`	If set to `False`, the layer will not learn the bias parameter. Defaults to `True`.	`True`
`use_batch_normalization`	`bool`	If set to `True`, layers will use batch normalization. Defaults to `False`.	`False`
`drop_rate`	`float`	Dropout ratio. Defaults to `0.5`.	`0.5`

Source code in hyperbench/models/hgnn.py

class HGNN(nn.Module):
    """
    HGNN performs spectral convolution directly on the hypergraph structure using the
    node-hyperedge incidence matrix, without any reduction to a pairwise graph.
    Unlike HyperGCN (which approximates each hyperedge by selecting representative pairwise
    edges via random projection), HGNN preserves all higher-order relationships by passing
    messages through the full incidence structure: nodes -> hyperedges -> nodes.
    - Proposed in `Hypergraph Neural Networks <https://arxiv.org/pdf/1809.09401>`_ paper (AAAI 2019).
    - Reference implementation: `source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hgnn.html#HGNN>`_.

    Args:
        in_channels: The number of input channels.
        hidden_channels: The number of hidden channels.
        num_classes: The number of output channels.
        bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
        use_batch_normalization: If set to ``True``, layers will use batch normalization. Defaults to ``False``.
        drop_rate: Dropout ratio. Defaults to ``0.5``.
    """

    def __init__(
        self,
        in_channels: int,
        hidden_channels: int,
        num_classes: int,
        bias: bool = True,
        use_batch_normalization: bool = False,
        drop_rate: float = 0.5,
    ):
        super().__init__()

        self.layers = nn.ModuleList(
            [
                HGNNConv(
                    in_channels=in_channels,
                    out_channels=hidden_channels,
                    bias=bias,
                    use_batch_normalization=use_batch_normalization,
                    drop_rate=drop_rate,
                ),
                HGNNConv(
                    in_channels=hidden_channels,
                    out_channels=num_classes,
                    bias=bias,
                    use_batch_normalization=use_batch_normalization,
                    is_last=True,
                ),
            ]
        )

    def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
        """
        Apply two stacked ``HGNNConv`` layers to produce node embeddings.

        The first layer applies ReLU + dropout and maps ``in_channels -> hidden_channels``.
        The second layer is the output layer (no activation/dropout) and maps
        ``hidden_channels -> num_classes``.

        Args:
            x: Input node feature matrix. Size ``(num_nodes, in_channels)``.
            hyperedge_index: Hyperedge incidence in COO format. Size ``(2, num_incidences)``,
                where row 0 contains node IDs and row 1 contains hyperedge IDs.

        Returns:
            The output node feature matrix. Size ``(num_nodes, num_classes)``.
        """
        for layer in self.layers:
            x = layer(x, hyperedge_index)
        return x

`forward(x, hyperedge_index)` ¶

Apply two stacked HGNNConv layers to produce node embeddings.

The first layer applies ReLU + dropout and maps in_channels -> hidden_channels. The second layer is the output layer (no activation/dropout) and maps hidden_channels -> num_classes.

Parameters:

Name	Type	Description	Default
`x`	`Tensor`	Input node feature matrix. Size `(num_nodes, in_channels)`.	required
`hyperedge_index`	`Tensor`	Hyperedge incidence in COO format. Size `(2, num_incidences)`, where row 0 contains node IDs and row 1 contains hyperedge IDs.	required

Returns:

Type	Description
`Tensor`	The output node feature matrix. Size `(num_nodes, num_classes)`.

Source code in hyperbench/models/hgnn.py

def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
    """
    Apply two stacked ``HGNNConv`` layers to produce node embeddings.

    The first layer applies ReLU + dropout and maps ``in_channels -> hidden_channels``.
    The second layer is the output layer (no activation/dropout) and maps
    ``hidden_channels -> num_classes``.

    Args:
        x: Input node feature matrix. Size ``(num_nodes, in_channels)``.
        hyperedge_index: Hyperedge incidence in COO format. Size ``(2, num_incidences)``,
            where row 0 contains node IDs and row 1 contains hyperedge IDs.

    Returns:
        The output node feature matrix. Size ``(num_nodes, num_classes)``.
    """
    for layer in self.layers:
        x = layer(x, hyperedge_index)
    return x

`HNHN` ¶

Bases: Module

HNHN performs incidence-based hypergraph convolution with explicit hyperedge embeddings between the node -> hyperedge -> node propagation steps. - Proposed in HNHN: Hypergraph Networks with Hyperedge Neurons <https://arxiv.org/abs/2006.12278>_ paper. - Reference implementation: source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hnhn.html#HNHN>_.

Parameters:

Name	Type	Description	Default
`in_channels`	`int`	The number of input channels.	required
`hidden_channels`	`int`	The number of hidden channels.	required
`num_classes`	`int`	The number of output channels.	required
`bias`	`bool`	If set to `False`, the layer will not learn the bias parameter. Defaults to `True`.	`True`
`use_batch_normalization`	`bool`	If set to `True`, layers will use batch normalization. Defaults to `False`.	`False`
`drop_rate`	`float`	Dropout ratio. Defaults to `0.5`.	`0.5`

Source code in hyperbench/models/hnhn.py

class HNHN(nn.Module):
    """
    HNHN performs incidence-based hypergraph convolution with explicit hyperedge
    embeddings between the node -> hyperedge -> node propagation steps.
    - Proposed in `HNHN: Hypergraph Networks with Hyperedge Neurons <https://arxiv.org/abs/2006.12278>`_ paper.
    - Reference implementation: `source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hnhn.html#HNHN>`_.

    Args:
        in_channels: The number of input channels.
        hidden_channels: The number of hidden channels.
        num_classes: The number of output channels.
        bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
        use_batch_normalization: If set to ``True``, layers will use batch normalization. Defaults to ``False``.
        drop_rate: Dropout ratio. Defaults to ``0.5``.
    """

    def __init__(
        self,
        in_channels: int,
        hidden_channels: int,
        num_classes: int,
        bias: bool = True,
        use_batch_normalization: bool = False,
        drop_rate: float = 0.5,
    ):
        super().__init__()

        self.layers = nn.ModuleList(
            [
                HNHNConv(
                    in_channels=in_channels,
                    out_channels=hidden_channels,
                    bias=bias,
                    use_batch_normalization=use_batch_normalization,
                    drop_rate=drop_rate,
                ),
                HNHNConv(
                    in_channels=hidden_channels,
                    out_channels=num_classes,
                    bias=bias,
                    use_batch_normalization=use_batch_normalization,
                    is_last=True,
                ),
            ]
        )

    def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
        """
        Apply two stacked ``HNHNConv`` layers to produce node embeddings.

        Args:
            x: Input node feature matrix of size ``(num_nodes, in_channels)``.
            hyperedge_index: Hyperedge incidence in COO format of size ``(2, num_incidences)``.

        Returns:
            The output node feature matrix of size ``(num_nodes, num_classes)``.
        """
        for layer in self.layers:
            x = layer(x, hyperedge_index)
        return x

`forward(x, hyperedge_index)` ¶

Apply two stacked HNHNConv layers to produce node embeddings.

Parameters:

Name	Type	Description	Default
`x`	`Tensor`	Input node feature matrix of size `(num_nodes, in_channels)`.	required
`hyperedge_index`	`Tensor`	Hyperedge incidence in COO format of size `(2, num_incidences)`.	required

Returns:

Type	Description
`Tensor`	The output node feature matrix of size `(num_nodes, num_classes)`.

Source code in hyperbench/models/hnhn.py

def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
    """
    Apply two stacked ``HNHNConv`` layers to produce node embeddings.

    Args:
        x: Input node feature matrix of size ``(num_nodes, in_channels)``.
        hyperedge_index: Hyperedge incidence in COO format of size ``(2, num_incidences)``.

    Returns:
        The output node feature matrix of size ``(num_nodes, num_classes)``.
    """
    for layer in self.layers:
        x = layer(x, hyperedge_index)
    return x

`HGNNP` ¶

Bases: Module

HGNN+ performs hypergraph convolution with two-stage mean aggregation using the incidence structure directly: nodes -> hyperedges -> nodes. - Proposed in HGNN+: General Hypergraph Neural Networks <https://ieeexplore.ieee.org/document/9795251>_ paper (IEEE T-PAMI 2022). - Reference implementation: source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hgnnp.html#HGNNP>_.

Parameters:

Name	Type	Description	Default
`in_channels`	`int`	The number of input channels.	required
`hidden_channels`	`int`	The number of hidden channels.	required
`num_classes`	`int`	The number of output channels.	required
`bias`	`bool`	If set to `False`, the layer will not learn the bias parameter. Defaults to `True`.	`True`
`use_batch_normalization`	`bool`	If set to `True`, layers will use batch normalization. Defaults to `False`.	`False`
`drop_rate`	`float`	Dropout ratio. Defaults to `0.5`.	`0.5`

Source code in hyperbench/models/hgnnp.py

class HGNNP(nn.Module):
    """
    HGNN+ performs hypergraph convolution with two-stage mean aggregation using the
    incidence structure directly: nodes -> hyperedges -> nodes.
    - Proposed in `HGNN+: General Hypergraph Neural Networks <https://ieeexplore.ieee.org/document/9795251>`_ paper (IEEE T-PAMI 2022).
    - Reference implementation: `source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hgnnp.html#HGNNP>`_.

    Args:
        in_channels: The number of input channels.
        hidden_channels: The number of hidden channels.
        num_classes: The number of output channels.
        bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
        use_batch_normalization: If set to ``True``, layers will use batch normalization. Defaults to ``False``.
        drop_rate: Dropout ratio. Defaults to ``0.5``.
    """

    def __init__(
        self,
        in_channels: int,
        hidden_channels: int,
        num_classes: int,
        bias: bool = True,
        use_batch_normalization: bool = False,
        drop_rate: float = 0.5,
    ):
        super().__init__()

        self.layers = nn.ModuleList(
            [
                HGNNPConv(
                    in_channels=in_channels,
                    out_channels=hidden_channels,
                    bias=bias,
                    use_batch_normalization=use_batch_normalization,
                    drop_rate=drop_rate,
                ),
                HGNNPConv(
                    in_channels=hidden_channels,
                    out_channels=num_classes,
                    bias=bias,
                    use_batch_normalization=use_batch_normalization,
                    is_last=True,
                ),
            ]
        )

    def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
        """
        Apply two stacked ``HGNNPConv`` layers to produce node embeddings.

        Args:
            x: Input node feature matrix. Size ``(num_nodes, in_channels)``.
            hyperedge_index: Hyperedge incidence in COO format. Size ``(2, num_incidences)``,
                where row 0 contains node IDs and row 1 contains hyperedge IDs.

        Returns:
            The output node feature matrix. Size ``(num_nodes, num_classes)``.
        """
        for layer in self.layers:
            x = layer(x, hyperedge_index)
        return x

`forward(x, hyperedge_index)` ¶

Apply two stacked HGNNPConv layers to produce node embeddings.

Parameters:

Name	Type	Description	Default
`x`	`Tensor`	Input node feature matrix. Size `(num_nodes, in_channels)`.	required
`hyperedge_index`	`Tensor`	Hyperedge incidence in COO format. Size `(2, num_incidences)`, where row 0 contains node IDs and row 1 contains hyperedge IDs.	required

Returns:

Type	Description
`Tensor`	The output node feature matrix. Size `(num_nodes, num_classes)`.

Source code in hyperbench/models/hgnnp.py

def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
    """
    Apply two stacked ``HGNNPConv`` layers to produce node embeddings.

    Args:
        x: Input node feature matrix. Size ``(num_nodes, in_channels)``.
        hyperedge_index: Hyperedge incidence in COO format. Size ``(2, num_incidences)``,
            where row 0 contains node IDs and row 1 contains hyperedge IDs.

    Returns:
        The output node feature matrix. Size ``(num_nodes, num_classes)``.
    """
    for layer in self.layers:
        x = layer(x, hyperedge_index)
    return x

`HyperGCN` ¶

Bases: Module

HyperGCN approximates each hyperedge of the hypergraph by a set of pairwise edges connecting the vertices of the hyperedge and treats the learning problem as a graph learning problem on the approximation. - Proposed in HyperGCN: A New Method of Training Graph Convolutional Networks on Hypergraphs <https://dl.acm.org/doi/10.5555/3454287.3454422>_ paper (NeurIPS 2019). - Code of the paper: source <https://github.com/malllabiisc/HyperGCN>. - Reference implementation: source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hypergcn.html#HyperGCN>.

Parameters:

Name	Type	Description	Default
`in_channels`	`int`	The number of input channels.	required
`hidden_channels`	`int`	The number of hidden channels.	required
`num_classes`	`int`	The number of classes of the classification task as HyperGCB is a node classification model.	required
`bias`	`bool`	If set to `False`, the layer will not learn the bias parameter. Defaults to `True`.	`True`
`use_batch_normalization`	`bool`	If set to `True`, layers will use batch normalization. Defaults to `False`.	`False`
`drop_rate`	`float`	Dropout ratio. Defaults to `0.5`.	`0.5`
`use_mediator`	`bool`	Whether to use mediator to transform the hyperedges to edges in the graph. Defaults to `False`.	`False`
`fast`	`bool`	If set to `True`, the transformed graph structure will be computed once from the input hypergraph and vertex features, and cached for future use. Defaults to `True`.	`True`

Source code in hyperbench/models/hypergcn.py

class HyperGCN(nn.Module):
    """
    HyperGCN approximates each hyperedge of the hypergraph by a set of pairwise edges connecting the vertices of the hyperedge
    and treats the learning problem as a graph learning problem on the approximation.
    - Proposed in `HyperGCN: A New Method of Training Graph Convolutional Networks on Hypergraphs <https://dl.acm.org/doi/10.5555/3454287.3454422>`_ paper (NeurIPS 2019).
    - Code of the paper: `source <https://github.com/malllabiisc/HyperGCN>`_.
    - Reference implementation: `source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hypergcn.html#HyperGCN>`_.

    Args:
        in_channels: The number of input channels.
        hidden_channels: The number of hidden channels.
        num_classes: The number of classes of the classification task as HyperGCB is a node classification model.
        bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
        use_batch_normalization: If set to ``True``, layers will use batch normalization. Defaults to ``False``.
        drop_rate: Dropout ratio. Defaults to ``0.5``.
        use_mediator: Whether to use mediator to transform the hyperedges to edges in the graph. Defaults to ``False``.
        fast: If set to ``True``, the transformed graph structure will be computed once from the input hypergraph and vertex features, and cached for future use. Defaults to ``True``.
    """

    def __init__(
        self,
        in_channels: int,
        hidden_channels: int,
        num_classes: int,
        bias: bool = True,
        use_batch_normalization: bool = False,
        drop_rate: float = 0.5,
        use_mediator: bool = False,
        fast: bool = True,
    ):
        super().__init__()
        self.fast = fast
        self.use_mediator = use_mediator
        self.cached_gcn_laplacian_matrix: Tensor | None = None

        self.layers = nn.ModuleList(
            [
                HyperGCNConv(
                    in_channels=in_channels,
                    out_channels=hidden_channels,
                    bias=bias,
                    use_batch_normalization=use_batch_normalization,
                    drop_rate=drop_rate,
                    use_mediator=use_mediator,
                ),
                HyperGCNConv(
                    in_channels=hidden_channels,
                    out_channels=num_classes,
                    bias=bias,
                    use_batch_normalization=use_batch_normalization,
                    use_mediator=use_mediator,
                    is_last=True,
                ),
            ]
        )

    def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
        """
        The forward function.

        Args:
            x: Input node feature matrix. Size ``(num_nodes, in_channels)``.
            hyperedge_index: The hyperedge indices of the hypergraph. Size ``(2, num_hyperedges)``.

        Returns:
            The output node feature matrix. Size ``(num_nodes, num_classes)``.
        """
        if not self.fast:
            for layer in self.layers:
                x = layer(x, hyperedge_index)
            return x

        # If the GCN Laplacian is cached, we need to check if the node feature size has changed
        # with cached_gcn_laplacian_matrix.size(0) != x.size(0), this can happen, for example, due to:
        # adding new negative samples or having validation/test sets with different node features
        should_not_use_cached_gcn_laplacian_matrix = (
            self.cached_gcn_laplacian_matrix is None  # Not cached yet
            or self.cached_gcn_laplacian_matrix.size(0)
            != x.size(0)  # Node feature size has changed
        )

        if should_not_use_cached_gcn_laplacian_matrix:
            edge_index, edge_weights = HyperedgeIndex(
                hyperedge_index
            ).reduce_to_edge_index_on_random_direction(
                x=x,
                with_mediators=self.use_mediator,
                return_weights=True,
            )

            self.cached_gcn_laplacian_matrix = EdgeIndex(
                edge_index=edge_index,
                edge_weights=edge_weights,
            ).get_sparse_normalized_gcn_laplacian(num_nodes=x.size(0))

        for layer in self.layers:
            x = layer(x, hyperedge_index, gcn_laplacian_matrix=self.cached_gcn_laplacian_matrix)
        return x

`forward(x, hyperedge_index)` ¶

The forward function.

Parameters:

Name	Type	Description	Default
`x`	`Tensor`	Input node feature matrix. Size `(num_nodes, in_channels)`.	required
`hyperedge_index`	`Tensor`	The hyperedge indices of the hypergraph. Size `(2, num_hyperedges)`.	required

Returns:

Type	Description
`Tensor`	The output node feature matrix. Size `(num_nodes, num_classes)`.

Source code in hyperbench/models/hypergcn.py

def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
    """
    The forward function.

    Args:
        x: Input node feature matrix. Size ``(num_nodes, in_channels)``.
        hyperedge_index: The hyperedge indices of the hypergraph. Size ``(2, num_hyperedges)``.

    Returns:
        The output node feature matrix. Size ``(num_nodes, num_classes)``.
    """
    if not self.fast:
        for layer in self.layers:
            x = layer(x, hyperedge_index)
        return x

    # If the GCN Laplacian is cached, we need to check if the node feature size has changed
    # with cached_gcn_laplacian_matrix.size(0) != x.size(0), this can happen, for example, due to:
    # adding new negative samples or having validation/test sets with different node features
    should_not_use_cached_gcn_laplacian_matrix = (
        self.cached_gcn_laplacian_matrix is None  # Not cached yet
        or self.cached_gcn_laplacian_matrix.size(0)
        != x.size(0)  # Node feature size has changed
    )

    if should_not_use_cached_gcn_laplacian_matrix:
        edge_index, edge_weights = HyperedgeIndex(
            hyperedge_index
        ).reduce_to_edge_index_on_random_direction(
            x=x,
            with_mediators=self.use_mediator,
            return_weights=True,
        )

        self.cached_gcn_laplacian_matrix = EdgeIndex(
            edge_index=edge_index,
            edge_weights=edge_weights,
        ).get_sparse_normalized_gcn_laplacian(num_nodes=x.size(0))

    for layer in self.layers:
        x = layer(x, hyperedge_index, gcn_laplacian_matrix=self.cached_gcn_laplacian_matrix)
    return x

`MLP` ¶

Bases: Module

A simple multi-layer perceptron (MLP) with configurable number of layers, hidden channels, activation functions, normalization, and dropout.

Examples:

>>> mlp = MLP(in_channels=16, out_channels=1, hidden_channels=32, num_layers=3)
>>> x = torch.randn(10, 16)  # 10 samples, 16 features
>>> output = mlp(x)
>>> output.shape
... torch.Size([10, 1])

With custom activation, normalization, and dropout:

>>> mlp = MLP(
...     in_channels=16,
...     out_channels=1,
...     hidden_channels=32,
...     num_layers=3,
...     activation_fn=nn.Tanh,                   # nn.ReLU, nn.LeakyReLU, etc.
...     activation_fn_kwargs={"inplace": True},
...     normalization_fn=nn.BatchNorm1d,         # nn.LayerNorm, etc.
...     normalization_fn_kwargs={"eps": 1e-5},
...     drop_rate=0.5,
... )
>>> x = torch.randn(10, 16)
>>> output = mlp(x)
>>> output.shape
... torch.Size([10, 1])

Parameters:

Name	Type	Description	Default
`in_channels`	`int`	Number of input features.	required
`out_channels`	`int`	Number of output features.	required
`hidden_channels`	`int \| None`	Number of hidden units in each hidden layer. Required if num_layers > 1.	`None`
`num_layers`	`int`	Total number of layers (including output layer). Must be at least 1. Defaults to 1.	`1`
`activation_fn`	`ActivationFn \| None`	Activation function to use after each hidden layer. Defaults to `nn.ReLU`.	`None`
`activation_fn_kwargs`	`dict \| None`	Keyword arguments for the activation function. Defaults to empty dict.	`None`
`normalization_fn`	`NormalizationFn \| None`	Normalization function to use after each hidden layer (before activation). If `None`, no normalization is applied. Defaults to `None`.	`None`
`normalization_fn_kwargs`	`dict \| None`	Keyword arguments for the normalization function. Defaults to empty dict.	`None`
`bias`	`bool`	Whether to include bias terms in the linear layers. Defaults to `True`.	`True`
`drop_rate`	`float`	Dropout rate to apply after each hidden layer (after activation). If 0.0, no dropout is applied. Defaults to 0.0.	`0.0`

Source code in hyperbench/models/mlp.py

class MLP(nn.Module):
    """
    A simple multi-layer perceptron (MLP) with configurable number of layers, hidden channels, activation functions, normalization, and dropout.

    Examples:
        >>> mlp = MLP(in_channels=16, out_channels=1, hidden_channels=32, num_layers=3)
        >>> x = torch.randn(10, 16)  # 10 samples, 16 features
        >>> output = mlp(x)
        >>> output.shape
        ... torch.Size([10, 1])

        With custom activation, normalization, and dropout:
        >>> mlp = MLP(
        ...     in_channels=16,
        ...     out_channels=1,
        ...     hidden_channels=32,
        ...     num_layers=3,
        ...     activation_fn=nn.Tanh,                   # nn.ReLU, nn.LeakyReLU, etc.
        ...     activation_fn_kwargs={"inplace": True},
        ...     normalization_fn=nn.BatchNorm1d,         # nn.LayerNorm, etc.
        ...     normalization_fn_kwargs={"eps": 1e-5},
        ...     drop_rate=0.5,
        ... )
        >>> x = torch.randn(10, 16)
        >>> output = mlp(x)
        >>> output.shape
        ... torch.Size([10, 1])

    Args:
        in_channels: Number of input features.
        out_channels: Number of output features.
        hidden_channels: Number of hidden units in each hidden layer. Required if num_layers > 1.
        num_layers: Total number of layers (including output layer). Must be at least 1. Defaults to 1.
        activation_fn: Activation function to use after each hidden layer. Defaults to ``nn.ReLU``.
        activation_fn_kwargs: Keyword arguments for the activation function. Defaults to empty dict.
        normalization_fn: Normalization function to use after each hidden layer (before activation). If ``None``, no normalization is applied. Defaults to ``None``.
        normalization_fn_kwargs: Keyword arguments for the normalization function. Defaults to empty dict.
        bias: Whether to include bias terms in the linear layers. Defaults to ``True``.
        drop_rate: Dropout rate to apply after each hidden layer (after activation). If 0.0, no dropout is applied. Defaults to 0.0.
    """

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        hidden_channels: int | None = None,
        num_layers: int = 1,
        activation_fn: ActivationFn | None = None,
        activation_fn_kwargs: dict | None = None,
        normalization_fn: NormalizationFn | None = None,
        normalization_fn_kwargs: dict | None = None,
        bias: bool = True,
        drop_rate: float = 0.0,
    ):
        super().__init__()
        self.__validate_num_layers(num_layers, hidden_channels)

        hidden_channels = hidden_channels if hidden_channels is not None else 0
        activation_fn = activation_fn if activation_fn is not None else nn.ReLU
        activation_fn_kwargs = activation_fn_kwargs if activation_fn_kwargs is not None else {}
        normalization_fn_kwargs = (
            normalization_fn_kwargs if normalization_fn_kwargs is not None else {}
        )

        layers = nn.ModuleList()
        for layer_idx in range(num_layers):
            is_output_layer = is_layer(layer_idx, num_layers - 1)

            linear_layer = nn.Linear(
                in_features=in_channels if is_input_layer(layer_idx) else hidden_channels,
                out_features=out_channels if is_output_layer else hidden_channels,
                bias=bias,
            )
            layers.append(linear_layer)

            if not is_output_layer:
                if normalization_fn is not None:
                    layers.append(normalization_fn(hidden_channels, **normalization_fn_kwargs))

                layers.append(activation_fn(**activation_fn_kwargs))

                if drop_rate > 0.0:
                    layers.append(nn.Dropout(drop_rate))

        self.layers = nn.Sequential(*layers)

    def forward(self, x) -> Tensor:
        return self.layers(x)

    def __validate_num_layers(self, num_layers: int, hidden_channels: int | None) -> None:
        if num_layers < 1:
            raise ValueError("At least one layer is required for MLP.")
        if num_layers > 1 and hidden_channels is None:
            raise ValueError("hidden_channels must be specified for MLP with more than 1 layer.")

`SLP` ¶

Bases: MLP

A single-layer perceptron (SLP) which is a special case of MLP with exactly one layer and no hidden units.

Examples:

>>> slp = SLP(in_channels=16, out_channels=1)
>>> x = torch.randn(10, 16)  # 10 samples, 16 features
>>> output = slp(x)
>>> output.shape
... torch.Size([10, 1])

Parameters:

Name	Type	Description	Default
`in_channels`	`int`	Number of input features.	required
`out_channels`	`int`	Number of output features.	required

Source code in hyperbench/models/mlp.py

class SLP(MLP):
    """
    A single-layer perceptron (SLP) which is a special case of MLP with exactly one layer and no hidden units.

    Examples:
        >>> slp = SLP(in_channels=16, out_channels=1)
        >>> x = torch.randn(10, 16)  # 10 samples, 16 features
        >>> output = slp(x)
        >>> output.shape
        ... torch.Size([10, 1])

    Args:
        in_channels: Number of input features.
        out_channels: Number of output features.
    """

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
    ):
        super().__init__(
            in_channels=in_channels,
            out_channels=out_channels,
            num_layers=1,
        )

`NHP` ¶

Bases: Module

Neural Hyperlink Predictor (NHP) for undirected hyperedge link prediction. - Proposed in NHP: Neural Hypergraph Link Prediction <https://dl.acm.org/doi/10.1145/3340531.3411870>_ paper (CIKM 2020). - Reference implementation: source <https://github.com/cyixiao/NHP-reproduce/>_.

NHP scores each candidate hyperedge by building candidate-specific node embeddings. A node that appears in multiple candidate hyperedges can receive a different incidence embedding in each one, because its update depends on the other nodes in that candidate hyperedge.

Examples:

>>> x = [
...     [1., 0.],  # node 0
...     [0., 1.],  # node 1
...     [1., 1.],  # node 2
...     [1., 0.],  # node 3
... ]
>>> hyperedge_index = [
...     [0, 1, 1, 2, 3],  # node IDs
...     [0, 0, 1, 1, 1],  # hyperedge IDs
... ]
>>> # hyperedge 0 = {node 0, node 1}
>>> # hyperedge 1 = {node 1, node 2, node 3}
>>> model = NHP(in_channels=2, hidden_channels=8, aggregation="maxmin")
>>> scores = model(x, hyperedge_index)
>>> scores.shape
... torch.Size([2])

Parameters:

Name	Type	Description	Default
`in_channels`	`int`	Number of input features per node.	required
`hidden_channels`	`int`	Number of hidden units in the node embeddings.	required
`activation_fn`	`ActivationFn \| None`	Activation function to use after the linear transformations. Defaults to `nn.ReLU`.	`None`
`activation_fn_kwargs`	`dict \| None`	Keyword arguments for the activation function. Defaults to empty dict.	`None`
`aggregation`	`Literal['mean', 'maxmin']`	Method to aggregate the incidence embeddings into a hyperedge embedding. Must be either "maxmin" or "mean". Defaults to "maxmin".	`'maxmin'`
`bias`	`bool`	Whether to include bias terms in the linear layers. Defaults to `True`.	`True`

Source code in hyperbench/models/nhp.py

class NHP(nn.Module):
    """
    Neural Hyperlink Predictor (NHP) for undirected hyperedge link prediction.
    - Proposed in `NHP: Neural Hypergraph Link Prediction <https://dl.acm.org/doi/10.1145/3340531.3411870>`_ paper (CIKM 2020).
    - Reference implementation: `source <https://github.com/cyixiao/NHP-reproduce/>`_.

    NHP scores each candidate hyperedge by building candidate-specific node embeddings.
    A node that appears in multiple candidate hyperedges can receive a different incidence embedding in each one,
    because its update depends on the other nodes in that candidate hyperedge.

    Examples:
        >>> x = [
        ...     [1., 0.],  # node 0
        ...     [0., 1.],  # node 1
        ...     [1., 1.],  # node 2
        ...     [1., 0.],  # node 3
        ... ]
        >>> hyperedge_index = [
        ...     [0, 1, 1, 2, 3],  # node IDs
        ...     [0, 0, 1, 1, 1],  # hyperedge IDs
        ... ]
        >>> # hyperedge 0 = {node 0, node 1}
        >>> # hyperedge 1 = {node 1, node 2, node 3}
        >>> model = NHP(in_channels=2, hidden_channels=8, aggregation="maxmin")
        >>> scores = model(x, hyperedge_index)
        >>> scores.shape
        ... torch.Size([2])

    Args:
        in_channels: Number of input features per node.
        hidden_channels: Number of hidden units in the node embeddings.
        activation_fn: Activation function to use after the linear transformations. Defaults to ``nn.ReLU``.
        activation_fn_kwargs: Keyword arguments for the activation function. Defaults to empty dict.
        aggregation: Method to aggregate the incidence embeddings into a hyperedge embedding. Must be either "maxmin" or "mean". Defaults to "maxmin".
        bias: Whether to include bias terms in the linear layers. Defaults to ``True``.
    """

    def __init__(
        self,
        in_channels: int,
        hidden_channels: int,
        activation_fn: ActivationFn | None = None,
        activation_fn_kwargs: dict | None = None,
        aggregation: Literal["mean", "maxmin"] = "maxmin",
        bias: bool = True,
    ):
        super().__init__()

        activation_fn = activation_fn if activation_fn is not None else nn.ReLU
        activation_fn_kwargs = activation_fn_kwargs if activation_fn_kwargs is not None else {}

        self.aggregation = aggregation

        self.self_loop = nn.Linear(in_channels, hidden_channels, bias=bias)
        # GCN message passing is implemented through neighbor sum computation,
        # so one projection is enough for the hyperedge-aware term
        self.hyperedge_aware = nn.Linear(in_channels, hidden_channels, bias=bias)
        self.activation_fn = activation_fn(**activation_fn_kwargs)

        self.hyperedge_score = nn.Linear(hidden_channels, 1, bias=bias)

    def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
        """
        Score each candidate hyperedge.

        Args:
            x: Node feature matrix of shape ``(num_nodes, in_channels)``.
            hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.

        Returns:
            Scores of shape ``(num_hyperedges,)``.
        """
        if hyperedge_index.numel() == 0:
            return x.new_empty((0,))

        # Example: hyperedge_index = [[0, 1, 1, 2, 3],  == node_ids
        #                             [0, 0, 1, 1, 1]]  == hyperedge_ids
        node_ids = hyperedge_index[0]
        hyperedge_ids = hyperedge_index[1]

        # Gather the node features for each incidence
        # Example: x = [[1, 0],  # node 0
        #               [0, 1],  # node 1
        #               [1, 1],  # node 2
        #               [1, 0]]  # node 3
        #          node_ids = [0, 1, 1, 2, 3]
        #          -> incidence_node_features = [[1, 0],  # node 0 in hyperedge 0
        #                                        [0, 1],  # node 1 in hyperedge 0
        #                                        [0, 1],  # node 1 in hyperedge 1
        #                                        [1, 1],  # node 2 in hyperedge 1
        #                                        [1, 0]]  # node 3 in hyperedge 1
        #             shape: (num_incidences, in_channels)
        incidence_node_features = x[node_ids]

        # Do one local message-passing step to sum original node features per hyperedge to get hyperedge features.
        # that are aware of all nodes in the candidate hyperedge.
        # Example: hyperedge 0 contains nodes (0, 1)    -> [1, 0] + [0, 1] = [1, 1]
        #          hyperedge 1 contains nodes (1, 2, 3) -> [0, 1] + [1, 1] + [1, 0] = [2, 2]
        #          -> hyperedge_features = [[1, 1],  # sum for hyperedge 0
        #                                   [2, 2]]  # sum for hyperedge 1
        #             shape: (num_hyperedges, in_channels)
        hyperedge_features = HyperedgeAggregator(
            hyperedge_index=hyperedge_index,
            node_embeddings=x,
        ).pool("sum")

        # Broadcast hyperedge features back to each of their incidences,
        # and remove the current node feature to give to each incidence
        # the features of its neighboring nodes in the candidate hyperedge.
        # Example: hyperedge_features = [[1, 1],  # sum for hyperedge 0
        #                                [2, 2]]  # sum for hyperedge 1
        #                               shape (num_hyperedges, in_channels),
        #          hyperedge_ids = [0, 0, 1, 1, 1],
        #          incidence_node_features = [[1, 0],  # node 0 in hyperedge 0
        #                                     [0, 1],  # node 1 in hyperedge 0
        #                                     [0, 1],  # node 1 in hyperedge 1
        #                                     [1, 1],  # node 2 in hyperedge 1
        #                                     [1, 0]]  # node 3 in hyperedge 1
        #                                    shape: (num_incidences, in_channels)
        #          -> hyperedge_features[hyperedge_ids] = [[1, 1],  # hyperedge 0 for node 0
        #                                                  [1, 1],  # hyperedge 0 for node 1
        #                                                  [2, 2],  # hyperedge 1 for node 1
        #                                                  [2, 2],  # hyperedge 1 for node 2
        #                                                  [2, 2]]  # hyperedge 1 for node 3
        #                                                 shape: (num_incidences, in_channels)
        #          -> neighbor_features_per_incidence = [[0, 1],  # node 0 sees node 1
        #                                                [1, 0],  # node 1 sees node 0
        #                                                [2, 1],  # node 1 sees node 2 and node 3
        #                                                [1, 1],  # node 2 sees node 1 and node 3
        #                                                [1, 2]]  # node 3 sees node 1 and node 2
        #                                               shape: (num_incidences, in_channels)
        neighbor_features_per_incidence = (
            hyperedge_features[hyperedge_ids] - incidence_node_features
        )

        # shape (num_incidences, hidden_channels)
        neighbor_aware_hyperedge_embeddings = self.hyperedge_aware(neighbor_features_per_incidence)
        # shape (num_incidences, hidden_channels)
        selfloop_embeddings = self.self_loop(incidence_node_features)

        # incidence_embeddings[0] = activation_fn(selfloop_embeddings[0] + neighbor_aware_hyperedge_embeddings[0])
        # is the embedding of the first incidence (i.e., node 0 in hyperedge 0)
        # after one local message-passing step inside that candidate hyperedge.
        incidence_embeddings = self.activation_fn(
            selfloop_embeddings + neighbor_aware_hyperedge_embeddings
        )  # shape (num_incidences, hidden_channels)

        # Treat each incidence embedding as a separately aggregatable set of features.
        # This is required because incidence embeddings are not global node embeddings:
        # node 1 may appear twice with two different embeddings as it participates in two different candidate hyperedges.
        # Example: incidence_ids = [0, 1, 2, 3, 4],
        #          hyperedge_ids = [0, 0, 1, 1, 1]
        #          -> incidence_hyperedge_index = [[0, 1, 2, 3, 4],
        #                                          [0, 0, 1, 1, 1]]
        num_incidences = incidence_embeddings.size(0)
        incidence_ids = torch.arange(num_incidences, device=hyperedge_index.device)
        incidence_hyperedge_index = torch.stack([incidence_ids, hyperedge_ids], dim=0)

        # Example: incidence_embeddings = [[1, 2],  # features 0, node 0 in hyperedge 0
        #                                  [3, 4],  # features 1, node 1 in hyperedge 0
        #                                  [5, 6],  # features 2, node 1 in hyperedge 1
        #                                  [7, 8],  # features 3, node 2 in hyperedge 1
        #                                  [9, 10]] # features 4, node 3 in hyperedge 1
        #          -> incidence_aggregator pools features (0, 1) for hyperedge 0 and features (2, 3, 4) for hyperedge 1
        #          if aggregation == "maxmin":
        #          -> hyperedge_embeddings = [[max(1, 3) - min(1, 3), max(2, 4) - min(2, 4)],                # hyperedge 0
        #                                     [max(5, 7, 9) - min(5, 7, 9), max(6, 8, 10) - min(6, 8, 10)]]  # hyperedge 1
        #                                    shape: (num_hyperedges, hidden_channels)
        #         if aggregation == "mean":
        #         -> hyperedge_embeddings = [[mean(1, 3), mean(2, 4)],         # hyperedge 0
        #                                    [mean(5, 7, 9), mean(6, 8, 10)]]  # hyperedge 1
        #                                   shape: (num_hyperedges, hidden_channels)
        incidence_aggregator = HyperedgeAggregator(
            hyperedge_index=incidence_hyperedge_index,
            node_embeddings=incidence_embeddings,
        )

        match self.aggregation:
            case "maxmin":
                max_embeddings = incidence_aggregator.pool("max")
                min_embeddings = incidence_aggregator.pool("min")
                hyperedge_embeddings = max_embeddings - min_embeddings
            case _:
                hyperedge_embeddings = incidence_aggregator.pool("mean")

        # Decode: linear projection to scalar score per hyperedge
        # shape: (num_hyperedges, 1) -> squeeze -> (num_hyperedges,)
        return self.hyperedge_score(hyperedge_embeddings).squeeze(-1)

`forward(x, hyperedge_index)` ¶

Score each candidate hyperedge.

Parameters:

Name	Type	Description	Default
`x`	`Tensor`	Node feature matrix of shape `(num_nodes, in_channels)`.	required
`hyperedge_index`	`Tensor`	Incidence tensor of shape `(2, num_incidences)`.	required

Returns:

Type	Description
`Tensor`	Scores of shape `(num_hyperedges,)`.

Source code in hyperbench/models/nhp.py

def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
    """
    Score each candidate hyperedge.

    Args:
        x: Node feature matrix of shape ``(num_nodes, in_channels)``.
        hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.

    Returns:
        Scores of shape ``(num_hyperedges,)``.
    """
    if hyperedge_index.numel() == 0:
        return x.new_empty((0,))

    # Example: hyperedge_index = [[0, 1, 1, 2, 3],  == node_ids
    #                             [0, 0, 1, 1, 1]]  == hyperedge_ids
    node_ids = hyperedge_index[0]
    hyperedge_ids = hyperedge_index[1]

    # Gather the node features for each incidence
    # Example: x = [[1, 0],  # node 0
    #               [0, 1],  # node 1
    #               [1, 1],  # node 2
    #               [1, 0]]  # node 3
    #          node_ids = [0, 1, 1, 2, 3]
    #          -> incidence_node_features = [[1, 0],  # node 0 in hyperedge 0
    #                                        [0, 1],  # node 1 in hyperedge 0
    #                                        [0, 1],  # node 1 in hyperedge 1
    #                                        [1, 1],  # node 2 in hyperedge 1
    #                                        [1, 0]]  # node 3 in hyperedge 1
    #             shape: (num_incidences, in_channels)
    incidence_node_features = x[node_ids]

    # Do one local message-passing step to sum original node features per hyperedge to get hyperedge features.
    # that are aware of all nodes in the candidate hyperedge.
    # Example: hyperedge 0 contains nodes (0, 1)    -> [1, 0] + [0, 1] = [1, 1]
    #          hyperedge 1 contains nodes (1, 2, 3) -> [0, 1] + [1, 1] + [1, 0] = [2, 2]
    #          -> hyperedge_features = [[1, 1],  # sum for hyperedge 0
    #                                   [2, 2]]  # sum for hyperedge 1
    #             shape: (num_hyperedges, in_channels)
    hyperedge_features = HyperedgeAggregator(
        hyperedge_index=hyperedge_index,
        node_embeddings=x,
    ).pool("sum")

    # Broadcast hyperedge features back to each of their incidences,
    # and remove the current node feature to give to each incidence
    # the features of its neighboring nodes in the candidate hyperedge.
    # Example: hyperedge_features = [[1, 1],  # sum for hyperedge 0
    #                                [2, 2]]  # sum for hyperedge 1
    #                               shape (num_hyperedges, in_channels),
    #          hyperedge_ids = [0, 0, 1, 1, 1],
    #          incidence_node_features = [[1, 0],  # node 0 in hyperedge 0
    #                                     [0, 1],  # node 1 in hyperedge 0
    #                                     [0, 1],  # node 1 in hyperedge 1
    #                                     [1, 1],  # node 2 in hyperedge 1
    #                                     [1, 0]]  # node 3 in hyperedge 1
    #                                    shape: (num_incidences, in_channels)
    #          -> hyperedge_features[hyperedge_ids] = [[1, 1],  # hyperedge 0 for node 0
    #                                                  [1, 1],  # hyperedge 0 for node 1
    #                                                  [2, 2],  # hyperedge 1 for node 1
    #                                                  [2, 2],  # hyperedge 1 for node 2
    #                                                  [2, 2]]  # hyperedge 1 for node 3
    #                                                 shape: (num_incidences, in_channels)
    #          -> neighbor_features_per_incidence = [[0, 1],  # node 0 sees node 1
    #                                                [1, 0],  # node 1 sees node 0
    #                                                [2, 1],  # node 1 sees node 2 and node 3
    #                                                [1, 1],  # node 2 sees node 1 and node 3
    #                                                [1, 2]]  # node 3 sees node 1 and node 2
    #                                               shape: (num_incidences, in_channels)
    neighbor_features_per_incidence = (
        hyperedge_features[hyperedge_ids] - incidence_node_features
    )

    # shape (num_incidences, hidden_channels)
    neighbor_aware_hyperedge_embeddings = self.hyperedge_aware(neighbor_features_per_incidence)
    # shape (num_incidences, hidden_channels)
    selfloop_embeddings = self.self_loop(incidence_node_features)

    # incidence_embeddings[0] = activation_fn(selfloop_embeddings[0] + neighbor_aware_hyperedge_embeddings[0])
    # is the embedding of the first incidence (i.e., node 0 in hyperedge 0)
    # after one local message-passing step inside that candidate hyperedge.
    incidence_embeddings = self.activation_fn(
        selfloop_embeddings + neighbor_aware_hyperedge_embeddings
    )  # shape (num_incidences, hidden_channels)

    # Treat each incidence embedding as a separately aggregatable set of features.
    # This is required because incidence embeddings are not global node embeddings:
    # node 1 may appear twice with two different embeddings as it participates in two different candidate hyperedges.
    # Example: incidence_ids = [0, 1, 2, 3, 4],
    #          hyperedge_ids = [0, 0, 1, 1, 1]
    #          -> incidence_hyperedge_index = [[0, 1, 2, 3, 4],
    #                                          [0, 0, 1, 1, 1]]
    num_incidences = incidence_embeddings.size(0)
    incidence_ids = torch.arange(num_incidences, device=hyperedge_index.device)
    incidence_hyperedge_index = torch.stack([incidence_ids, hyperedge_ids], dim=0)

    # Example: incidence_embeddings = [[1, 2],  # features 0, node 0 in hyperedge 0
    #                                  [3, 4],  # features 1, node 1 in hyperedge 0
    #                                  [5, 6],  # features 2, node 1 in hyperedge 1
    #                                  [7, 8],  # features 3, node 2 in hyperedge 1
    #                                  [9, 10]] # features 4, node 3 in hyperedge 1
    #          -> incidence_aggregator pools features (0, 1) for hyperedge 0 and features (2, 3, 4) for hyperedge 1
    #          if aggregation == "maxmin":
    #          -> hyperedge_embeddings = [[max(1, 3) - min(1, 3), max(2, 4) - min(2, 4)],                # hyperedge 0
    #                                     [max(5, 7, 9) - min(5, 7, 9), max(6, 8, 10) - min(6, 8, 10)]]  # hyperedge 1
    #                                    shape: (num_hyperedges, hidden_channels)
    #         if aggregation == "mean":
    #         -> hyperedge_embeddings = [[mean(1, 3), mean(2, 4)],         # hyperedge 0
    #                                    [mean(5, 7, 9), mean(6, 8, 10)]]  # hyperedge 1
    #                                   shape: (num_hyperedges, hidden_channels)
    incidence_aggregator = HyperedgeAggregator(
        hyperedge_index=incidence_hyperedge_index,
        node_embeddings=incidence_embeddings,
    )

    match self.aggregation:
        case "maxmin":
            max_embeddings = incidence_aggregator.pool("max")
            min_embeddings = incidence_aggregator.pool("min")
            hyperedge_embeddings = max_embeddings - min_embeddings
        case _:
            hyperedge_embeddings = incidence_aggregator.pool("mean")

    # Decode: linear projection to scalar score per hyperedge
    # shape: (num_hyperedges, 1) -> squeeze -> (num_hyperedges,)
    return self.hyperedge_score(hyperedge_embeddings).squeeze(-1)

`Node2Vec` ¶

Bases: Module

Node2Vec implementation based on torch_geometric.nn.Node2Vec.

Parameters:

Name	Type	Description	Default
`edge_index`	`Tensor`	Edge index representing the graph structure. Size `(2, num_edges)`.	required
`embedding_dim`	`int`	Dimension of the node embeddings to learn.	required
`walk_length`	`int`	Length of each random walk.	`20`
`context_size`	`int`	Window size for the skip-gram model (number of neighbors in the walk considered as context). For example, if `context_size=2` and `walk_length=5`, then for a random walk `[v0, v1, v2, v3, v4]`, the context for `v2` would be `[v0, v1, v3, v4]` as we take neighbors within distance 2 in the walk. The pairs generated by skip-gram would be `[(v2, v0), (v2, v1), (v2, v3), (v2, v4)]`. Rule of thumb: Graphs with strong local structure (5-10), Graphs with communities/long-range patterns (10-20). Defaults to `10`.	`10`
`num_walks_per_node`	`int`	Number of random walks to start at each node.	`10`
`p`	`float`	Return hyperparameter for Node2Vec. Default is `1.0` (unbiased). This controls the probability of stepping back to the node visited in the previous step. Lower values of `p` make immediate backtracking more likely, which keeps walks closer to the local neighborhood. Higher values of `p` discourage returning to the previous node, so walks are less likely to bounce back and forth across the same edge.	`1.0`
`q`	`float`	In-out hyperparameter for Node2Vec. Default is `1.0` (unbiased). This controls whether walks stay near the source node or explore further outward. Lower values of `q` bias the walk toward outward exploration, behaving more like DFS and emphasizing structural roles. Higher values of `q` bias the walk toward nearby nodes, behaving more like BFS and emphasizing community structure and homophily.	`1.0`
`num_negative_samples`	`int`	Number of negative samples to use for training the skip-gram model. If set to `X`, then for each positive pair `(u, v)` generated from the random walks, `X` negative pairs `(u, v_neg)` will be generated, where `v_neg` is a node sampled uniformly at random from all nodes in the graph. Defaults to `1`, meaning one negative sample per positive pair.	`1`
`num_nodes`	`int \| None`	Total number of nodes in the graph. If not provided, it will be inferred from the hyperedge_index. This is only needed if the hyperedge_index does not include all nodes (e.g., some isolated nodes are missing).	`None`
`sparse`	`bool`	Whether Node2Vec embeddings should use sparse gradients.	`True`

Source code in hyperbench/models/node2vec.py

class Node2Vec(nn.Module):
    """
    Node2Vec implementation based on ``torch_geometric.nn.Node2Vec``.

    Args:
        edge_index: Edge index representing the graph structure. Size ``(2, num_edges)``.
        embedding_dim: Dimension of the node embeddings to learn.
        walk_length: Length of each random walk.
        context_size: Window size for the skip-gram model (number of neighbors in the walk considered as context).
            For example, if ``context_size=2`` and ``walk_length=5``, then for a random walk ``[v0, v1, v2, v3, v4]``,
            the context for ``v2`` would be ``[v0, v1, v3, v4]`` as we take neighbors within distance 2 in the walk.
            The pairs generated by skip-gram would be ``[(v2, v0), (v2, v1), (v2, v3), (v2, v4)]``.
            Rule of thumb: Graphs with strong local structure (5-10), Graphs with communities/long-range patterns (10-20).
            Defaults to ``10``.
        num_walks_per_node: Number of random walks to start at each node.
        p: Return hyperparameter for Node2Vec. Default is ``1.0`` (unbiased).
            This controls the probability of stepping back to the node visited in the previous step.
            Lower values of ``p`` make immediate backtracking more likely, which keeps walks closer to the
            local neighborhood. Higher values of ``p`` discourage returning to the previous node, so walks
            are less likely to bounce back and forth across the same edge.
        q: In-out hyperparameter for Node2Vec. Default is ``1.0`` (unbiased).
            This controls whether walks stay near the source node or explore further outward.
            Lower values of ``q`` bias the walk toward outward exploration, behaving more like DFS and
            emphasizing structural roles. Higher values of ``q`` bias the walk toward nearby nodes,
            behaving more like BFS and emphasizing community structure and homophily.
        num_negative_samples: Number of negative samples to use for training the skip-gram model.
            If set to ``X``, then for each positive pair ``(u, v)`` generated from the random walks, ``X`` negative pairs ``(u, v_neg)`` will be generated,
            where ``v_neg`` is a node sampled uniformly at random from all nodes in the graph.
            Defaults to ``1``, meaning one negative sample per positive pair.
        num_nodes: Total number of nodes in the graph. If not provided, it will be inferred from the hyperedge_index.
            This is only needed if the hyperedge_index does not include all nodes (e.g., some isolated nodes are missing).
        sparse: Whether Node2Vec embeddings should use sparse gradients.
    """

    def __init__(
        self,
        edge_index: Tensor,
        embedding_dim: int,
        walk_length: int = 20,
        context_size: int = 10,
        num_walks_per_node: int = 10,
        p: float = 1.0,
        q: float = 1.0,
        num_negative_samples: int = 1,
        num_nodes: int | None = None,
        sparse: bool = True,
    ):
        super().__init__()
        if walk_length < context_size:
            raise ValueError(
                f"Expected walk_length >= context_size, got "
                f"walk_length={walk_length}, context_size={context_size}."
            )

        self.model = PyGNode2Vec(
            edge_index=edge_index,
            embedding_dim=embedding_dim,
            walk_length=walk_length,
            context_size=context_size,
            walks_per_node=num_walks_per_node,
            p=p,
            q=q,
            num_negative_samples=num_negative_samples,
            num_nodes=num_nodes,
            sparse=sparse,
        )

    def forward(self, batch: Tensor | None = None) -> Tensor:
        return self.model(batch)

    @property
    def num_embeddings(self) -> int:
        return int(self.model.embedding.num_embeddings)

    def loss(self, pos_rw: Tensor, neg_rw: Tensor) -> Tensor:
        return self.model.loss(pos_rw, neg_rw)

    def loader(self, batch_size: int = 128, shuffle: bool = True):
        return self.model.loader(batch_size=batch_size, shuffle=shuffle)

`Node2VecConfig` ¶

Bases: TypedDict

Configuration for the Node2Vec model.

Parameters:

Name	Description	Default
`edge_index`	Edge index representing the graph structure. Size `(2, num_edges)`.	required
`embedding_dim`	Dimension of the node embeddings to learn.	required
`walk_length`	Length of each random walk.	required
`context_size`	Window size for the skip-gram model (number of neighbors in the walk considered as context). For example, if `context_size=2` and `walk_length=5`, then for a random walk `[v0, v1, v2, v3, v4]`, the context for `v2` would be `[v0, v1, v3, v4]` as we take neighbors within distance 2 in the walk. The pairs generated by skip-gram would be `[(v2, v0), (v2, v1), (v2, v3), (v2, v4)]`. Rule of thumb: Graphs with strong local structure (5-10), Graphs with communities/long-range patterns (10-20). Defaults to `10`.	required
`num_walks_per_node`	Number of random walks to start at each node.	required
`p`	Return hyperparameter for Node2Vec. Default is `1.0` (unbiased). This controls the probability of stepping back to the node visited in the previous step. Lower values of `p` make immediate backtracking more likely, which keeps walks closer to the local neighborhood. Higher values of `p` discourage returning to the previous node, so walks are less likely to bounce back and forth across the same edge.	required
`q`	In-out hyperparameter for Node2Vec. Default is `1.0` (unbiased). This controls whether walks stay near the source node or explore further outward. Lower values of `q` bias the walk toward outward exploration, behaving more like DFS and emphasizing structural roles. Higher values of `q` bias the walk toward nearby nodes, behaving more like BFS and emphasizing community structure and homophily.	required
`num_negative_samples`	Number of negative samples to use for training the skip-gram model. If set to `X`, then for each positive pair `(u, v)` generated from the random walks, `X` negative pairs `(u, v_neg)` will be generated, where `v_neg` is a node sampled uniformly at random from all nodes in the graph. Defaults to `1`, meaning one negative sample per positive pair.	required
`num_nodes`	Total number of nodes in the graph. If not provided, it will be inferred from the hyperedge_index. This is only needed if the hyperedge_index does not include all nodes (e.g., some isolated nodes are missing).	required
`sparse`	Whether Node2Vec embeddings should use sparse gradients.	required

Source code in hyperbench/models/node2vec.py

class Node2VecConfig(TypedDict):
    """
    Configuration for the Node2Vec model.

    Args:
        edge_index: Edge index representing the graph structure. Size ``(2, num_edges)``.
        embedding_dim: Dimension of the node embeddings to learn.
        walk_length: Length of each random walk.
        context_size: Window size for the skip-gram model (number of neighbors in the walk considered as context).
            For example, if ``context_size=2`` and ``walk_length=5``, then for a random walk ``[v0, v1, v2, v3, v4]``,
            the context for ``v2`` would be ``[v0, v1, v3, v4]`` as we take neighbors within distance 2 in the walk.
            The pairs generated by skip-gram would be ``[(v2, v0), (v2, v1), (v2, v3), (v2, v4)]``.
            Rule of thumb: Graphs with strong local structure (5-10), Graphs with communities/long-range patterns (10-20).
            Defaults to ``10``.
        num_walks_per_node: Number of random walks to start at each node.
        p: Return hyperparameter for Node2Vec. Default is ``1.0`` (unbiased).
            This controls the probability of stepping back to the node visited in the previous step.
            Lower values of ``p`` make immediate backtracking more likely, which keeps walks closer to the
            local neighborhood. Higher values of ``p`` discourage returning to the previous node, so walks
            are less likely to bounce back and forth across the same edge.
        q: In-out hyperparameter for Node2Vec. Default is ``1.0`` (unbiased).
            This controls whether walks stay near the source node or explore further outward.
            Lower values of ``q`` bias the walk toward outward exploration, behaving more like DFS and
            emphasizing structural roles. Higher values of ``q`` bias the walk toward nearby nodes,
            behaving more like BFS and emphasizing community structure and homophily.
        num_negative_samples: Number of negative samples to use for training the skip-gram model.
            If set to ``X``, then for each positive pair ``(u, v)`` generated from the random walks, ``X`` negative pairs ``(u, v_neg)`` will be generated,
            where ``v_neg`` is a node sampled uniformly at random from all nodes in the graph.
            Defaults to ``1``, meaning one negative sample per positive pair.
        num_nodes: Total number of nodes in the graph. If not provided, it will be inferred from the hyperedge_index.
            This is only needed if the hyperedge_index does not include all nodes (e.g., some isolated nodes are missing).
        sparse: Whether Node2Vec embeddings should use sparse gradients.
    """

    edge_index: Tensor
    embedding_dim: int
    context_size: NotRequired[int]
    walk_length: NotRequired[int]
    num_walks_per_node: NotRequired[int]
    p: NotRequired[float]
    q: NotRequired[float]
    num_negative_samples: NotRequired[int]
    num_nodes: NotRequired[int]
    sparse: NotRequired[bool]

`Node2VecGCN` ¶

Bases: Module

A joint encoder that first learns Node2Vec embeddings and then refines them with GCN layers.

Parameters:

Name	Type	Description	Default
`node2vec_config`	`Node2VecConfig`	Model-side configuration for the internal `Node2Vec` encoder.	required
`gcn_config`	`GCNConfig`	Model-side configuration for the GCN stack applied to the Node2Vec embeddings.	required

Source code in hyperbench/models/node2vec.py

class Node2VecGCN(nn.Module):
    """
    A joint encoder that first learns Node2Vec embeddings and then refines them with GCN layers.

    Args:
        node2vec_config: Model-side configuration for the internal ``Node2Vec`` encoder.
        gcn_config: Model-side configuration for the GCN stack applied to the Node2Vec embeddings.
    """

    def __init__(
        self,
        node2vec_config: Node2VecConfig,
        gcn_config: GCNConfig,
    ):
        super().__init__()
        self.node2vec = Node2Vec(**node2vec_config)
        self.gcn = GCN(**gcn_config)

    def forward(
        self,
        batch: Tensor | None = None,
        edge_index: Tensor | None = None,
    ) -> Tensor:
        if edge_index is None:
            raise ValueError("Node2VecGCN requires edge_index in forward().")

        node_embeddings = self.node2vec(batch)
        return self.gcn(node_embeddings, edge_index)

    @property
    def num_embeddings(self) -> int:
        return self.node2vec.num_embeddings

    def loss(self, pos_rw: Tensor, neg_rw: Tensor) -> Tensor:
        return self.node2vec.loss(pos_rw, neg_rw)

    def loader(self, batch_size: int = 128, shuffle: bool = True):
        return self.node2vec.loader(batch_size=batch_size, shuffle=shuffle)

`VilLain` ¶

Bases: Module

VilLain learns node-specific virtual-label logits instead of consuming existing node features. The model is transductive: rows in node_embedding correspond to the fixed global node space used during training. - Proposed in VilLain: Self-Supervised Learning on Homogeneous Hypergraphs without Features via Virtual Label Propagation <https://dl.acm.org/doi/pdf/10.1145/3589334.3645454>_ paper (WWW 2024). - Reference implementation: source <https://github.com/geon0325/VilLain/>_.

Each forward pass: 1. Samples differentiable virtual-label assignments with Gumbel-Softmax. 2. Propagates them over the incidence structure. 3. Returns averaged propagated node embeddings.

Parameters:

Name	Type	Description	Default
`num_nodes`	`int`	Total number of trainable nodes.	required
`embedding_dim`	`int`	Returned embedding dimension. Defaults to `128`.	`128`
`labels_per_subspace`	`int`	Number of virtual labels per subspace. Defaults to `2`.	`2`
`training_steps`	`int`	Propagation steps used for self-supervised loss. Defaults to `4`.	`4`
`generation_steps`	`int`	Propagation steps averaged for final embeddings. Defaults to `100`.	`100`
`tau`	`float`	Gumbel-Softmax temperature. Defaults to `1.0`.	`1.0`
`eps`	`float`	Numerical stability constant. Defaults to `1e-10`.	`1e-10`

Source code in hyperbench/models/villain.py

class VilLain(nn.Module):
    """
    VilLain learns node-specific virtual-label logits instead of consuming existing node features.
    The model is transductive: rows in ``node_embedding`` correspond to the fixed global node space used during training.
    - Proposed in `VilLain: Self-Supervised Learning on Homogeneous Hypergraphs without Features via Virtual Label Propagation <https://dl.acm.org/doi/pdf/10.1145/3589334.3645454>`_ paper (WWW 2024).
    - Reference implementation: `source <https://github.com/geon0325/VilLain/>`_.

    Each forward pass:
    1. Samples differentiable virtual-label assignments with Gumbel-Softmax.
    2. Propagates them over the incidence structure.
    3. Returns averaged propagated node embeddings.


    Args:
        num_nodes: Total number of trainable nodes.
        embedding_dim: Returned embedding dimension. Defaults to ``128``.
        labels_per_subspace: Number of virtual labels per subspace. Defaults to ``2``.
        training_steps: Propagation steps used for self-supervised loss. Defaults to ``4``.
        generation_steps: Propagation steps averaged for final embeddings. Defaults to ``100``.
        tau: Gumbel-Softmax temperature. Defaults to ``1.0``.
        eps: Numerical stability constant. Defaults to ``1e-10``.
    """

    def __init__(
        self,
        num_nodes: int,
        embedding_dim: int = 128,
        labels_per_subspace: int = 2,
        training_steps: int = 4,
        generation_steps: int = 100,
        tau: float = 1.0,
        eps: float = 1e-10,
    ):
        super().__init__()
        self.__validate_args(
            num_nodes=num_nodes,
            embedding_dim=embedding_dim,
            labels_per_subspace=labels_per_subspace,
            training_steps=training_steps,
            generation_steps=generation_steps,
            tau=tau,
            eps=eps,
        )

        self.num_nodes = num_nodes
        self.embedding_dim = embedding_dim
        self.labels_per_subspace = labels_per_subspace
        self.training_steps = training_steps
        self.generation_steps = generation_steps
        self.tau = tau
        self.eps = eps

        self.num_subspaces = math.ceil(embedding_dim / labels_per_subspace)
        self.raw_embedding_dim = self.num_subspaces * labels_per_subspace
        self.node_embedding = nn.Parameter(torch.empty(size=(num_nodes, self.raw_embedding_dim)))

        self.loss_fn = VilLainLoss(
            num_subspaces=self.num_subspaces,
            labels_per_subspace=self.labels_per_subspace,
            eps=self.eps,
        )

        self.reset_parameters()

    def forward(
        self,
        hyperedge_index: Tensor,
        node_ids: Tensor | None = None,
        num_hyperedges: int | None = None,
    ) -> tuple[Tensor, VilLainLossParts]:
        """
        Compute the self-supervised VilLain objective.
        Use ``hyperedge_embeddings`` or ``node_embeddings`` to generate final embeddings for inference after training.

        Args:
            hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
            node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
                Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table.
                This is needed as the model keeps an internal embedding table with a row for every node in the global node space.
            num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges.
                If not provided, the hyperedge count is inferred from ``hyperedge_index``.

        Returns:
            Node embeddings of shape ``(num_local_nodes, embedding_dim)``.
        """
        return self.loss(
            hyperedge_index=hyperedge_index,
            node_ids=node_ids,
            num_hyperedges=num_hyperedges,
        )

    def loss(
        self,
        hyperedge_index: Tensor,
        node_ids: Tensor | None = None,
        num_hyperedges: int | None = None,
    ) -> tuple[Tensor, VilLainLossParts]:
        """
        Compute the self-supervised VilLain objective.

        Args:
            hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
            node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
                Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table.
                This is needed as the model keeps an internal embedding table with a row for every node in the global node space.
            num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges.
                If not provided, the hyperedge count is inferred from ``hyperedge_index``.

        Returns:
            A tuple ``(total_loss, loss_parts)`` where ``loss_parts`` contains ``local_loss`` and ``global_loss`` scalar tensors.
        """
        node_embeddings = self.__get_initial_virtual_node_features(node_ids=node_ids)
        actual_num_hyperedges = self.__num_hyperedges(hyperedge_index, num_hyperedges)

        local_loss = node_embeddings.new_zeros(size=())
        global_loss = node_embeddings.new_zeros(size=())
        for _ in range(self.training_steps):
            node_embeddings, hyperedge_embeddings = self.__message_passing(
                x=node_embeddings,
                hyperedge_index=hyperedge_index,
                num_hyperedges=actual_num_hyperedges,
            )
            local_loss = local_loss + self.loss_fn.local_loss(node_embeddings, hyperedge_embeddings)
            global_loss = global_loss + self.loss_fn.global_loss(
                node_embeddings, hyperedge_embeddings
            )

        return self.loss_fn.total_loss(local_loss, global_loss), {
            "local_loss": local_loss,
            "global_loss": global_loss,
        }

    def hyperedge_embeddings(
        self,
        hyperedge_index: Tensor,
        node_ids: Tensor | None = None,
        num_hyperedges: int | None = None,
    ) -> Tensor:
        """
        Generate hyperedge embeddings by averaging propagated hyperedge states.
        Every generation step computes hyperedge states from the current node states, then updates node states for the next step.

        Args:
            hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
            node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
                Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table.
                This is needed as the model keeps an internal embedding table with a row for every node in the global node space.
            num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges.
                If not provided, the hyperedge count is inferred from ``hyperedge_index``.

        Returns:
            Hyperedge embeddings of shape ``(num_hyperedges, embedding_dim)``.
        """
        return self.__embeddings(
            hyperedge_index=hyperedge_index,
            node_ids=node_ids,
            num_hyperedges=num_hyperedges,
            mode="hyperedge",
        )

    def node_embeddings(
        self,
        hyperedge_index: Tensor,
        node_ids: Tensor | None = None,
        num_hyperedges: int | None = None,
    ) -> Tensor:
        """
        Generate node embeddings by averaging propagated node states.

        Args:
            hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
            node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
                Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table.
                This is needed as the model keeps an internal embedding table with a row for every node in the global node space.
            num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges.
                If not provided, the hyperedge count is inferred from ``hyperedge_index``.

        Returns:
            Node embeddings of shape ``(num_local_nodes, embedding_dim)``.
        """
        return self.__embeddings(
            hyperedge_index=hyperedge_index,
            node_ids=node_ids,
            num_hyperedges=num_hyperedges,
            mode="node",
        )

    def reset_parameters(self) -> None:
        """Initialize trainable virtual-label logits near zero."""
        nn.init.normal_(self.node_embedding, mean=0.0, std=0.1)

    def __embeddings(
        self,
        hyperedge_index: Tensor,
        node_ids: Tensor | None,
        num_hyperedges: int | None,
        mode: Literal["node", "hyperedge"] = "node",
    ) -> Tensor:
        """
        Generate final node or hyperedge embeddings for inference.

        Args:
            hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
            node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
            num_hyperedges: Optional explicit hyperedge count to preserve empty hyperedges during propagation.
            mode: Selects whether to accumulate propagated node states or hyperedge states.

        Returns:
            Averaged embeddings truncated to ``embedding_dim``.
        """
        with torch.no_grad():
            x = self.__get_initial_virtual_node_features(node_ids=node_ids)
            actual_num_hyperedges = self.__num_hyperedges(hyperedge_index, num_hyperedges)

            final_embeddings_size = (
                (x.size(0), self.raw_embedding_dim)
                if mode == "node"
                else (actual_num_hyperedges, self.raw_embedding_dim)
            )
            final_embeddings = x.new_zeros(size=final_embeddings_size)
            for _ in range(self.generation_steps):
                x, hyperedge_embeddings = self.__message_passing(
                    x=x,
                    hyperedge_index=hyperedge_index,
                    num_hyperedges=actual_num_hyperedges,
                )

                # Suppose generation_steps = 100.
                # Average 100 propagated embeddings for each node/hyperedge to get more stable final embeddings.
                # Sum here and divide by generation_steps later to avoid storing all 100 embeddings in memory at once.
                final_embeddings = final_embeddings + (
                    x if mode == "node" else hyperedge_embeddings
                )
            final_embeddings = final_embeddings / self.generation_steps

            # Example: final_embeddings.shape = (num_nodes/num_hyperedges, 8) with raw_embedding_dim=8
            #          -> returned shape = (num_nodes/num_hyperedges, 4) with embedding_dim=4
            #             as it takes the first 4 channels of the raw embedding as the final embedding.
            return final_embeddings[:, : self.embedding_dim]

    def __get_initial_virtual_node_features(self, node_ids: Tensor | None = None) -> Tensor:
        """
        Convert trainable node logits into flattened virtual-label probabilities.

        Args:
            node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
                If ``None``, all node rows are used.

        Returns:
            A tensor of shape ``(num_selected_nodes, raw_embedding_dim)``.
        """
        logits = self.node_embedding if node_ids is None else self.node_embedding[node_ids]

        # Split flat logits into independent virtual-label subspaces.
        # Example: with raw_embedding_dim=8, num_subspaces=4, labels_per_subspace=2:
        #          logits.shape = (num_nodes, 8)
        #          -> viewed_logits shape = (num_nodes, 4, 2)
        #          viewed_logits[0] = [[l00, l01],  # node 0, subspace 0
        #                              [l02, l03],  # node 0, subspace 1
        #                              [l04, l05],  # node 0, subspace 2
        #                              [l06, l07]]  # node 0, subspace 3
        viewed_logits = logits.view(-1, self.num_subspaces, self.labels_per_subspace)

        # Convert each subspace's logits into a differentiable virtual-label assignment.
        # Example: viewed_logits[0, 0] = [0.03, -0.02]
        #          -> probs[0, 0] might be [0.47, 0.53] with tau=1.0
        #          probs.shape remains (num_nodes, 4, 2).
        probs = F.gumbel_softmax(viewed_logits, tau=self.tau, dim=2, hard=False)

        # Flatten subspaces back into a standard node-by-channel node feature matrix.
        # The aggregators expect matrices shaped (num_nodes, num_channels==raw_embedding_dim),
        # so propagation happens on the flattened channel dimension.
        # Example: probs.shape = (num_nodes, 4, 2) -> shape = (num_nodes, 8)
        return probs.reshape(-1, self.raw_embedding_dim)

    def __message_passing(
        self,
        x: Tensor,
        hyperedge_index: Tensor,
        num_hyperedges: int,
    ) -> tuple[Tensor, Tensor]:
        """
        One round of message passing, where nodes send messages to hyperedges and then hyperedges send messages back to nodes.

        Args:
            x: Virtual node features of shape (num_nodes, raw_embedding_dim).
            hyperedge_index: Hyperedge index tensor of shape (2, num_edges).
            num_hyperedges: Total number of hyperedges.

        Returns:
            The updated node and hyperedge embeddings after one round of message passing.
        """
        hyperedge_embeddings = HyperedgeAggregator(
            hyperedge_index=hyperedge_index,
            node_embeddings=x,
            num_hyperedges=num_hyperedges,
        ).pool("mean")

        node_embeddings = NodeAggregator(
            hyperedge_index=hyperedge_index,
            hyperedge_embeddings=hyperedge_embeddings,
            num_nodes=x.size(0),
        ).pool("mean")

        return node_embeddings, hyperedge_embeddings

    def __num_hyperedges(
        self,
        hyperedge_index: Tensor,
        num_hyperedges: int | None,
    ) -> int:
        """
        Return the explicit hyperedge count or infer it from the ``hyperedge_index``, if not provided.
        Explicit counts are required when empty hyperedges must remain in the hypergraph.
        """
        if num_hyperedges is not None:
            return num_hyperedges
        return HyperedgeIndex(hyperedge_index).num_hyperedges

    def __validate_args(
        self,
        num_nodes: int,
        embedding_dim: int,
        labels_per_subspace: int,
        training_steps: int,
        generation_steps: int,
        tau: float,
        eps: float,
    ) -> None:
        if num_nodes < 1:
            raise ValueError("num_nodes must be positive.")
        if embedding_dim < 1:
            raise ValueError("embedding_dim must be positive.")
        if labels_per_subspace < 2:
            raise ValueError("labels_per_subspace must be at least 2.")
        if training_steps < 1:
            raise ValueError("training_steps must be positive.")
        if generation_steps < 1:
            raise ValueError("generation_steps must be positive.")
        if tau <= 0:
            raise ValueError("tau must be positive.")
        if eps <= 0:
            raise ValueError("eps must be positive.")

`forward(hyperedge_index, node_ids=None, num_hyperedges=None)` ¶

Compute the self-supervised VilLain objective. Use hyperedge_embeddings or node_embeddings to generate final embeddings for inference after training.

Parameters:

Name	Type	Description	Default
`hyperedge_index`	`Tensor`	Incidence tensor of shape `(2, num_incidences)`.	required
`node_ids`	`Tensor \| None`	Optional global node ids matching local node ids the embedding table in the transductive setting. Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table. This is needed as the model keeps an internal embedding table with a row for every node in the global node space.	`None`
`num_hyperedges`	`int \| None`	Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges. If not provided, the hyperedge count is inferred from `hyperedge_index`.	`None`

Returns:

Type	Description
`tuple[Tensor, VilLainLossParts]`	Node embeddings of shape `(num_local_nodes, embedding_dim)`.

Source code in hyperbench/models/villain.py

def forward(
    self,
    hyperedge_index: Tensor,
    node_ids: Tensor | None = None,
    num_hyperedges: int | None = None,
) -> tuple[Tensor, VilLainLossParts]:
    """
    Compute the self-supervised VilLain objective.
    Use ``hyperedge_embeddings`` or ``node_embeddings`` to generate final embeddings for inference after training.

    Args:
        hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
        node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
            Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table.
            This is needed as the model keeps an internal embedding table with a row for every node in the global node space.
        num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges.
            If not provided, the hyperedge count is inferred from ``hyperedge_index``.

    Returns:
        Node embeddings of shape ``(num_local_nodes, embedding_dim)``.
    """
    return self.loss(
        hyperedge_index=hyperedge_index,
        node_ids=node_ids,
        num_hyperedges=num_hyperedges,
    )

`loss(hyperedge_index, node_ids=None, num_hyperedges=None)` ¶

Compute the self-supervised VilLain objective.

Parameters:

Name	Type	Description	Default
`hyperedge_index`	`Tensor`	Incidence tensor of shape `(2, num_incidences)`.	required
`node_ids`	`Tensor \| None`	Optional global node ids matching local node ids the embedding table in the transductive setting. Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table. This is needed as the model keeps an internal embedding table with a row for every node in the global node space.	`None`
`num_hyperedges`	`int \| None`	Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges. If not provided, the hyperedge count is inferred from `hyperedge_index`.	`None`

Returns:

Type	Description
`tuple[Tensor, VilLainLossParts]`	A tuple `(total_loss, loss_parts)` where `loss_parts` contains `local_loss` and `global_loss` scalar tensors.

Source code in hyperbench/models/villain.py

def loss(
    self,
    hyperedge_index: Tensor,
    node_ids: Tensor | None = None,
    num_hyperedges: int | None = None,
) -> tuple[Tensor, VilLainLossParts]:
    """
    Compute the self-supervised VilLain objective.

    Args:
        hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
        node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
            Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table.
            This is needed as the model keeps an internal embedding table with a row for every node in the global node space.
        num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges.
            If not provided, the hyperedge count is inferred from ``hyperedge_index``.

    Returns:
        A tuple ``(total_loss, loss_parts)`` where ``loss_parts`` contains ``local_loss`` and ``global_loss`` scalar tensors.
    """
    node_embeddings = self.__get_initial_virtual_node_features(node_ids=node_ids)
    actual_num_hyperedges = self.__num_hyperedges(hyperedge_index, num_hyperedges)

    local_loss = node_embeddings.new_zeros(size=())
    global_loss = node_embeddings.new_zeros(size=())
    for _ in range(self.training_steps):
        node_embeddings, hyperedge_embeddings = self.__message_passing(
            x=node_embeddings,
            hyperedge_index=hyperedge_index,
            num_hyperedges=actual_num_hyperedges,
        )
        local_loss = local_loss + self.loss_fn.local_loss(node_embeddings, hyperedge_embeddings)
        global_loss = global_loss + self.loss_fn.global_loss(
            node_embeddings, hyperedge_embeddings
        )

    return self.loss_fn.total_loss(local_loss, global_loss), {
        "local_loss": local_loss,
        "global_loss": global_loss,
    }

`hyperedge_embeddings(hyperedge_index, node_ids=None, num_hyperedges=None)` ¶

Generate hyperedge embeddings by averaging propagated hyperedge states. Every generation step computes hyperedge states from the current node states, then updates node states for the next step.

Parameters:

Name	Type	Description	Default
`hyperedge_index`	`Tensor`	Incidence tensor of shape `(2, num_incidences)`.	required
`node_ids`	`Tensor \| None`	Optional global node ids matching local node ids the embedding table in the transductive setting. Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table. This is needed as the model keeps an internal embedding table with a row for every node in the global node space.	`None`
`num_hyperedges`	`int \| None`	Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges. If not provided, the hyperedge count is inferred from `hyperedge_index`.	`None`

Returns:

Type	Description
`Tensor`	Hyperedge embeddings of shape `(num_hyperedges, embedding_dim)`.

Source code in hyperbench/models/villain.py

def hyperedge_embeddings(
    self,
    hyperedge_index: Tensor,
    node_ids: Tensor | None = None,
    num_hyperedges: int | None = None,
) -> Tensor:
    """
    Generate hyperedge embeddings by averaging propagated hyperedge states.
    Every generation step computes hyperedge states from the current node states, then updates node states for the next step.

    Args:
        hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
        node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
            Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table.
            This is needed as the model keeps an internal embedding table with a row for every node in the global node space.
        num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges.
            If not provided, the hyperedge count is inferred from ``hyperedge_index``.

    Returns:
        Hyperedge embeddings of shape ``(num_hyperedges, embedding_dim)``.
    """
    return self.__embeddings(
        hyperedge_index=hyperedge_index,
        node_ids=node_ids,
        num_hyperedges=num_hyperedges,
        mode="hyperedge",
    )

`node_embeddings(hyperedge_index, node_ids=None, num_hyperedges=None)` ¶

Generate node embeddings by averaging propagated node states.

Parameters:

Name	Type	Description	Default
`hyperedge_index`	`Tensor`	Incidence tensor of shape `(2, num_incidences)`.	required
`node_ids`	`Tensor \| None`	Optional global node ids matching local node ids the embedding table in the transductive setting. Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table. This is needed as the model keeps an internal embedding table with a row for every node in the global node space.	`None`
`num_hyperedges`	`int \| None`	Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges. If not provided, the hyperedge count is inferred from `hyperedge_index`.	`None`

Returns:

Type	Description
`Tensor`	Node embeddings of shape `(num_local_nodes, embedding_dim)`.

Source code in hyperbench/models/villain.py

def node_embeddings(
    self,
    hyperedge_index: Tensor,
    node_ids: Tensor | None = None,
    num_hyperedges: int | None = None,
) -> Tensor:
    """
    Generate node embeddings by averaging propagated node states.

    Args:
        hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
        node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
            Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table.
            This is needed as the model keeps an internal embedding table with a row for every node in the global node space.
        num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges.
            If not provided, the hyperedge count is inferred from ``hyperedge_index``.

    Returns:
        Node embeddings of shape ``(num_local_nodes, embedding_dim)``.
    """
    return self.__embeddings(
        hyperedge_index=hyperedge_index,
        node_ids=node_ids,
        num_hyperedges=num_hyperedges,
        mode="node",
    )

`reset_parameters()` ¶

Initialize trainable virtual-label logits near zero.

Source code in hyperbench/models/villain.py

def reset_parameters(self) -> None:
    """Initialize trainable virtual-label logits near zero."""
    nn.init.normal_(self.node_embedding, mean=0.0, std=0.1)

`__embeddings(hyperedge_index, node_ids, num_hyperedges, mode='node')` ¶

Generate final node or hyperedge embeddings for inference.

Parameters:

Name	Type	Description	Default
`hyperedge_index`	`Tensor`	Incidence tensor of shape `(2, num_incidences)`.	required
`node_ids`	`Tensor \| None`	Optional global node ids matching local node ids the embedding table in the transductive setting.	required
`num_hyperedges`	`int \| None`	Optional explicit hyperedge count to preserve empty hyperedges during propagation.	required
`mode`	`Literal['node', 'hyperedge']`	Selects whether to accumulate propagated node states or hyperedge states.	`'node'`

Returns:

Type	Description
`Tensor`	Averaged embeddings truncated to `embedding_dim`.

Source code in hyperbench/models/villain.py

def __embeddings(
    self,
    hyperedge_index: Tensor,
    node_ids: Tensor | None,
    num_hyperedges: int | None,
    mode: Literal["node", "hyperedge"] = "node",
) -> Tensor:
    """
    Generate final node or hyperedge embeddings for inference.

    Args:
        hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
        node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
        num_hyperedges: Optional explicit hyperedge count to preserve empty hyperedges during propagation.
        mode: Selects whether to accumulate propagated node states or hyperedge states.

    Returns:
        Averaged embeddings truncated to ``embedding_dim``.
    """
    with torch.no_grad():
        x = self.__get_initial_virtual_node_features(node_ids=node_ids)
        actual_num_hyperedges = self.__num_hyperedges(hyperedge_index, num_hyperedges)

        final_embeddings_size = (
            (x.size(0), self.raw_embedding_dim)
            if mode == "node"
            else (actual_num_hyperedges, self.raw_embedding_dim)
        )
        final_embeddings = x.new_zeros(size=final_embeddings_size)
        for _ in range(self.generation_steps):
            x, hyperedge_embeddings = self.__message_passing(
                x=x,
                hyperedge_index=hyperedge_index,
                num_hyperedges=actual_num_hyperedges,
            )

            # Suppose generation_steps = 100.
            # Average 100 propagated embeddings for each node/hyperedge to get more stable final embeddings.
            # Sum here and divide by generation_steps later to avoid storing all 100 embeddings in memory at once.
            final_embeddings = final_embeddings + (
                x if mode == "node" else hyperedge_embeddings
            )
        final_embeddings = final_embeddings / self.generation_steps

        # Example: final_embeddings.shape = (num_nodes/num_hyperedges, 8) with raw_embedding_dim=8
        #          -> returned shape = (num_nodes/num_hyperedges, 4) with embedding_dim=4
        #             as it takes the first 4 channels of the raw embedding as the final embedding.
        return final_embeddings[:, : self.embedding_dim]

`__get_initial_virtual_node_features(node_ids=None)` ¶

Convert trainable node logits into flattened virtual-label probabilities.

Parameters:

Name	Type	Description	Default
`node_ids`	`Tensor \| None`	Optional global node ids matching local node ids the embedding table in the transductive setting. If `None`, all node rows are used.	`None`

Returns:

Type	Description
`Tensor`	A tensor of shape `(num_selected_nodes, raw_embedding_dim)`.

Source code in hyperbench/models/villain.py

def __get_initial_virtual_node_features(self, node_ids: Tensor | None = None) -> Tensor:
    """
    Convert trainable node logits into flattened virtual-label probabilities.

    Args:
        node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
            If ``None``, all node rows are used.

    Returns:
        A tensor of shape ``(num_selected_nodes, raw_embedding_dim)``.
    """
    logits = self.node_embedding if node_ids is None else self.node_embedding[node_ids]

    # Split flat logits into independent virtual-label subspaces.
    # Example: with raw_embedding_dim=8, num_subspaces=4, labels_per_subspace=2:
    #          logits.shape = (num_nodes, 8)
    #          -> viewed_logits shape = (num_nodes, 4, 2)
    #          viewed_logits[0] = [[l00, l01],  # node 0, subspace 0
    #                              [l02, l03],  # node 0, subspace 1
    #                              [l04, l05],  # node 0, subspace 2
    #                              [l06, l07]]  # node 0, subspace 3
    viewed_logits = logits.view(-1, self.num_subspaces, self.labels_per_subspace)

    # Convert each subspace's logits into a differentiable virtual-label assignment.
    # Example: viewed_logits[0, 0] = [0.03, -0.02]
    #          -> probs[0, 0] might be [0.47, 0.53] with tau=1.0
    #          probs.shape remains (num_nodes, 4, 2).
    probs = F.gumbel_softmax(viewed_logits, tau=self.tau, dim=2, hard=False)

    # Flatten subspaces back into a standard node-by-channel node feature matrix.
    # The aggregators expect matrices shaped (num_nodes, num_channels==raw_embedding_dim),
    # so propagation happens on the flattened channel dimension.
    # Example: probs.shape = (num_nodes, 4, 2) -> shape = (num_nodes, 8)
    return probs.reshape(-1, self.raw_embedding_dim)

`__message_passing(x, hyperedge_index, num_hyperedges)` ¶

One round of message passing, where nodes send messages to hyperedges and then hyperedges send messages back to nodes.

Parameters:

Name	Type	Description	Default
`x`	`Tensor`	Virtual node features of shape (num_nodes, raw_embedding_dim).	required
`hyperedge_index`	`Tensor`	Hyperedge index tensor of shape (2, num_edges).	required
`num_hyperedges`	`int`	Total number of hyperedges.	required

Returns:

Type	Description
`tuple[Tensor, Tensor]`	The updated node and hyperedge embeddings after one round of message passing.

Source code in hyperbench/models/villain.py

def __message_passing(
    self,
    x: Tensor,
    hyperedge_index: Tensor,
    num_hyperedges: int,
) -> tuple[Tensor, Tensor]:
    """
    One round of message passing, where nodes send messages to hyperedges and then hyperedges send messages back to nodes.

    Args:
        x: Virtual node features of shape (num_nodes, raw_embedding_dim).
        hyperedge_index: Hyperedge index tensor of shape (2, num_edges).
        num_hyperedges: Total number of hyperedges.

    Returns:
        The updated node and hyperedge embeddings after one round of message passing.
    """
    hyperedge_embeddings = HyperedgeAggregator(
        hyperedge_index=hyperedge_index,
        node_embeddings=x,
        num_hyperedges=num_hyperedges,
    ).pool("mean")

    node_embeddings = NodeAggregator(
        hyperedge_index=hyperedge_index,
        hyperedge_embeddings=hyperedge_embeddings,
        num_nodes=x.size(0),
    ).pool("mean")

    return node_embeddings, hyperedge_embeddings

`__num_hyperedges(hyperedge_index, num_hyperedges)` ¶

Return the explicit hyperedge count or infer it from the hyperedge_index, if not provided. Explicit counts are required when empty hyperedges must remain in the hypergraph.

Source code in hyperbench/models/villain.py

def __num_hyperedges(
    self,
    hyperedge_index: Tensor,
    num_hyperedges: int | None,
) -> int:
    """
    Return the explicit hyperedge count or infer it from the ``hyperedge_index``, if not provided.
    Explicit counts are required when empty hyperedges must remain in the hypergraph.
    """
    if num_hyperedges is not None:
        return num_hyperedges
    return HyperedgeIndex(hyperedge_index).num_hyperedges

Models¶

hyperbench.models ¶

CommonNeighbors ¶

forward(hyperedge_index, node_to_neighbors=None) ¶

GCN ¶

GCNConfig ¶

HGNN ¶

forward(x, hyperedge_index) ¶

HNHN ¶

forward(x, hyperedge_index) ¶

HGNNP ¶

forward(x, hyperedge_index) ¶

HyperGCN ¶

forward(x, hyperedge_index) ¶

MLP ¶

SLP ¶

NHP ¶

forward(x, hyperedge_index) ¶

Node2Vec ¶

Node2VecConfig ¶

Node2VecGCN ¶

VilLain ¶

forward(hyperedge_index, node_ids=None, num_hyperedges=None) ¶

loss(hyperedge_index, node_ids=None, num_hyperedges=None) ¶

hyperedge_embeddings(hyperedge_index, node_ids=None, num_hyperedges=None) ¶

node_embeddings(hyperedge_index, node_ids=None, num_hyperedges=None) ¶

reset_parameters() ¶

__embeddings(hyperedge_index, node_ids, num_hyperedges, mode='node') ¶

__get_initial_virtual_node_features(node_ids=None) ¶

__message_passing(x, hyperedge_index, num_hyperedges) ¶

__num_hyperedges(hyperedge_index, num_hyperedges) ¶

`hyperbench.models` ¶

`CommonNeighbors` ¶

`forward(hyperedge_index, node_to_neighbors=None)` ¶

`GCN` ¶

`GCNConfig` ¶

`HGNN` ¶

`forward(x, hyperedge_index)` ¶

`HNHN` ¶

`forward(x, hyperedge_index)` ¶

`HGNNP` ¶

`forward(x, hyperedge_index)` ¶

`HyperGCN` ¶

`forward(x, hyperedge_index)` ¶

`MLP` ¶

`SLP` ¶

`NHP` ¶

`forward(x, hyperedge_index)` ¶

`Node2Vec` ¶

`Node2VecConfig` ¶

`Node2VecGCN` ¶

`VilLain` ¶

`forward(hyperedge_index, node_ids=None, num_hyperedges=None)` ¶

`loss(hyperedge_index, node_ids=None, num_hyperedges=None)` ¶

`hyperedge_embeddings(hyperedge_index, node_ids=None, num_hyperedges=None)` ¶

`node_embeddings(hyperedge_index, node_ids=None, num_hyperedges=None)` ¶

`reset_parameters()` ¶

`__embeddings(hyperedge_index, node_ids, num_hyperedges, mode='node')` ¶

`__get_initial_virtual_node_features(node_ids=None)` ¶

`__message_passing(x, hyperedge_index, num_hyperedges)` ¶

`__num_hyperedges(hyperedge_index, num_hyperedges)` ¶