Skip to content

Models

hyperbench.models

CommonNeighbors

Bases: Module

Source code in hyperbench/models/common_neighbors.py
class CommonNeighbors(nn.Module):
    def __init__(
        self,
        aggregation: Literal["mean", "min", "sum"],
        scorer: NeighborScorer | None = None,
    ) -> None:
        super().__init__()
        self.scorer = scorer if scorer is not None else CommonNeighborsScorer(aggregation)

    def forward(
        self,
        hyperedge_index: Tensor,
        node_to_neighbors: dict[int, Neighborhood] | None = None,
    ) -> Tensor:
        """
        Compute CN scores for all hyperedges in the batch.

        Args:
            hyperedge_index: Tensor containing the hyperedge indices.
            node_to_neighbors: Optional mapping from nodes to their neighborhoods.

        Returns:
            A 1-D tensor of shape (num_hyperedges,) with CN scores.
        """
        scores = self.scorer.score_batch(hyperedge_index, node_to_neighbors)
        torch.log1p(scores, out=scores)
        return scores

forward(hyperedge_index, node_to_neighbors=None)

Compute CN scores for all hyperedges in the batch.

Parameters:

Name Type Description Default
hyperedge_index Tensor

Tensor containing the hyperedge indices.

required
node_to_neighbors dict[int, Neighborhood] | None

Optional mapping from nodes to their neighborhoods.

None

Returns:

Type Description
Tensor

A 1-D tensor of shape (num_hyperedges,) with CN scores.

Source code in hyperbench/models/common_neighbors.py
def forward(
    self,
    hyperedge_index: Tensor,
    node_to_neighbors: dict[int, Neighborhood] | None = None,
) -> Tensor:
    """
    Compute CN scores for all hyperedges in the batch.

    Args:
        hyperedge_index: Tensor containing the hyperedge indices.
        node_to_neighbors: Optional mapping from nodes to their neighborhoods.

    Returns:
        A 1-D tensor of shape (num_hyperedges,) with CN scores.
    """
    scores = self.scorer.score_batch(hyperedge_index, node_to_neighbors)
    torch.log1p(scores, out=scores)
    return scores

GCN

Bases: Module

A reusable multi-layer GCN stack built from torch_geometric.nn.GCNConv.

Parameters:

Name Type Description Default
in_channels int

Dimension of the input node embeddings to the GCN layers.

required
out_channels int

Dimension of the output node embeddings from the GCN layers.

required
hidden_channels int | None

Dimension of the hidden node embeddings in the GCN layers. Defaults to in_channels.

None
num_layers int

Number of GCN layers. Must be at least 1. Defaults to 2.

2
drop_rate float

Dropout rate applied after each GCN layer except the last one.

0.0
bias bool

Whether to include a bias term in the GCN layers.

True
activation_fn ActivationFn | None

Activation function to use after each hidden layer. Defaults to nn.ReLU.

None
activation_fn_kwargs dict | None

Keyword arguments for the activation function. Defaults to empty dict.

None
improved bool

Whether to use the improved version of GCNConv.

False
add_self_loops bool

Whether to add self-loops to the input graph.

True
normalize bool

Whether to symmetrically normalize the adjacency matrix in GCNConv.

True
cached bool

Whether to cache the normalized adjacency matrix in GCNConv.

False
Source code in hyperbench/models/gcn.py
class GCN(nn.Module):
    """
    A reusable multi-layer GCN stack built from ``torch_geometric.nn.GCNConv``.

    Args:
        in_channels: Dimension of the input node embeddings to the GCN layers.
        out_channels: Dimension of the output node embeddings from the GCN layers.
        hidden_channels: Dimension of the hidden node embeddings in the GCN layers.
            Defaults to ``in_channels``.
        num_layers: Number of GCN layers. Must be at least 1. Defaults to ``2``.
        drop_rate: Dropout rate applied after each GCN layer except the last one.
        bias: Whether to include a bias term in the GCN layers.
        activation_fn: Activation function to use after each hidden layer. Defaults to ``nn.ReLU``.
        activation_fn_kwargs: Keyword arguments for the activation function. Defaults to empty dict.
        improved: Whether to use the improved version of ``GCNConv``.
        add_self_loops: Whether to add self-loops to the input graph.
        normalize: Whether to symmetrically normalize the adjacency matrix in ``GCNConv``.
        cached: Whether to cache the normalized adjacency matrix in ``GCNConv``.
    """

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        hidden_channels: int | None = None,
        num_layers: int = 2,
        drop_rate: float = 0.0,
        bias: bool = True,
        activation_fn: ActivationFn | None = None,
        activation_fn_kwargs: dict | None = None,
        improved: bool = False,
        add_self_loops: bool = True,
        normalize: bool = True,
        cached: bool = False,
    ):
        super().__init__()
        activation_fn = activation_fn if activation_fn is not None else nn.ReLU
        activation_fn_kwargs = activation_fn_kwargs if activation_fn_kwargs is not None else {}

        self.dropout = nn.Dropout(drop_rate)
        self.activation = activation_fn(**activation_fn_kwargs)
        self.layers = self.__build_layers(
            in_channels=in_channels,
            out_channels=out_channels,
            hidden_channels=hidden_channels,
            num_layers=num_layers,
            bias=bias,
            improved=improved,
            add_self_loops=add_self_loops,
            normalize=normalize,
            cached=cached,
        )

    def forward(self, x: Tensor, edge_index: Tensor) -> Tensor:
        num_layers = len(self.layers)
        for idx, layer in enumerate(self.layers):
            x = layer(x, edge_index)

            is_not_last_layer = not is_layer(idx, num_layers - 1)
            if is_not_last_layer:
                x = self.activation(x)
                x = self.dropout(x)

        return x

    def __build_layers(
        self,
        in_channels: int,
        out_channels: int,
        hidden_channels: int | None,
        num_layers: int,
        bias: bool,
        improved: bool,
        add_self_loops: bool,
        normalize: bool,
        cached: bool,
    ) -> nn.ModuleList:
        if num_layers < 1:
            raise ValueError(f"Expected num_layers >= 1 for GCN, got {num_layers}.")

        hidden_channels = hidden_channels if hidden_channels is not None else 0
        if num_layers > 1 and hidden_channels <= 0:
            raise ValueError(
                f"Expected positive hidden_channels for GCN with multiple layers, got {hidden_channels}."
            )

        common_kwargs: dict[str, bool] = {
            "bias": bias,
            "improved": improved,
            "add_self_loops": add_self_loops,
            "normalize": normalize,
            "cached": cached,
        }

        if num_layers == 1:
            return nn.ModuleList([GCNConv(in_channels, out_channels, **common_kwargs)])

        layers = [GCNConv(in_channels, hidden_channels, **common_kwargs)]
        layers.extend(
            GCNConv(hidden_channels, hidden_channels, **common_kwargs)
            for _ in range(num_layers - 2)
        )
        layers.append(GCNConv(hidden_channels, out_channels, **common_kwargs))

        return nn.ModuleList(layers)

GCNConfig

Bases: TypedDict

Configuration for the GCN model.

Parameters:

Name Type Description Default
in_channels

Dimension of the input node embeddings to the GCN layers.

required
out_channels

Dimension of the output node embeddings from the GCN layers.

required
hidden_channels

Dimension of the hidden node embeddings in the GCN layers.

required
num_layers

Number of GCN layers. Must be at least 1. Defaults to 2.

required
drop_rate

Dropout rate applied after each GCN layer (except the last one). Defaults to 0.0 (no dropout).

required
activation_fn

Activation function to use after each hidden layer. Defaults to nn.ReLU.

required
activation_fn_kwargs

Keyword arguments for the activation function. Defaults to empty dict.

required
bias

Whether to include a bias term in the GCN layers. Defaults to True.

required
improved

Whether to use the improved version of GCNConv. Defaults to False.

required
add_self_loops

Whether to add self-loops to the input graph. Defaults to True.

required
normalize

Whether to symmetrically normalize the adjacency matrix in GCNConv. Defaults to True.

required
cached

Whether to cache the normalized adjacency matrix in GCNConv. Only applicable if the graph structure does not change between epochs. Defaults to False.

required
Source code in hyperbench/models/gcn.py
class GCNConfig(TypedDict):
    """
    Configuration for the GCN model.

    Args:
        in_channels: Dimension of the input node embeddings to the GCN layers.
        out_channels: Dimension of the output node embeddings from the GCN layers.
        hidden_channels: Dimension of the hidden node embeddings in the GCN layers.
        num_layers: Number of GCN layers. Must be at least 1. Defaults to ``2``.
        drop_rate: Dropout rate applied after each GCN layer (except the last one). Defaults to ``0.0`` (no dropout).
        activation_fn: Activation function to use after each hidden layer. Defaults to ``nn.ReLU``.
        activation_fn_kwargs: Keyword arguments for the activation function. Defaults to empty dict.
        bias: Whether to include a bias term in the GCN layers. Defaults to ``True``.
        improved: Whether to use the improved version of GCNConv. Defaults to ``False``.
        add_self_loops: Whether to add self-loops to the input graph. Defaults to ``True``.
        normalize: Whether to symmetrically normalize the adjacency matrix in GCNConv. Defaults to ``True``.
        cached: Whether to cache the normalized adjacency matrix in GCNConv.
            Only applicable if the graph structure does not change between epochs. Defaults to ``False``.
    """

    in_channels: int
    out_channels: int
    hidden_channels: NotRequired[int]
    num_layers: NotRequired[int]
    drop_rate: NotRequired[float]
    bias: NotRequired[bool]
    activation_fn: NotRequired[ActivationFn]
    activation_fn_kwargs: NotRequired[dict]
    improved: NotRequired[bool]
    add_self_loops: NotRequired[bool]
    normalize: NotRequired[bool]
    cached: NotRequired[bool]

HGNN

Bases: Module

HGNN performs spectral convolution directly on the hypergraph structure using the node-hyperedge incidence matrix, without any reduction to a pairwise graph. Unlike HyperGCN (which approximates each hyperedge by selecting representative pairwise edges via random projection), HGNN preserves all higher-order relationships by passing messages through the full incidence structure: nodes -> hyperedges -> nodes. - Proposed in Hypergraph Neural Networks <https://arxiv.org/pdf/1809.09401>_ paper (AAAI 2019). - Reference implementation: source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hgnn.html#HGNN>_.

Parameters:

Name Type Description Default
in_channels int

The number of input channels.

required
hidden_channels int

The number of hidden channels.

required
num_classes int

The number of output channels.

required
bias bool

If set to False, the layer will not learn the bias parameter. Defaults to True.

True
use_batch_normalization bool

If set to True, layers will use batch normalization. Defaults to False.

False
drop_rate float

Dropout ratio. Defaults to 0.5.

0.5
Source code in hyperbench/models/hgnn.py
class HGNN(nn.Module):
    """
    HGNN performs spectral convolution directly on the hypergraph structure using the
    node-hyperedge incidence matrix, without any reduction to a pairwise graph.
    Unlike HyperGCN (which approximates each hyperedge by selecting representative pairwise
    edges via random projection), HGNN preserves all higher-order relationships by passing
    messages through the full incidence structure: nodes -> hyperedges -> nodes.
    - Proposed in `Hypergraph Neural Networks <https://arxiv.org/pdf/1809.09401>`_ paper (AAAI 2019).
    - Reference implementation: `source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hgnn.html#HGNN>`_.

    Args:
        in_channels: The number of input channels.
        hidden_channels: The number of hidden channels.
        num_classes: The number of output channels.
        bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
        use_batch_normalization: If set to ``True``, layers will use batch normalization. Defaults to ``False``.
        drop_rate: Dropout ratio. Defaults to ``0.5``.
    """

    def __init__(
        self,
        in_channels: int,
        hidden_channels: int,
        num_classes: int,
        bias: bool = True,
        use_batch_normalization: bool = False,
        drop_rate: float = 0.5,
    ):
        super().__init__()

        self.layers = nn.ModuleList(
            [
                HGNNConv(
                    in_channels=in_channels,
                    out_channels=hidden_channels,
                    bias=bias,
                    use_batch_normalization=use_batch_normalization,
                    drop_rate=drop_rate,
                ),
                HGNNConv(
                    in_channels=hidden_channels,
                    out_channels=num_classes,
                    bias=bias,
                    use_batch_normalization=use_batch_normalization,
                    is_last=True,
                ),
            ]
        )

    def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
        """
        Apply two stacked ``HGNNConv`` layers to produce node embeddings.

        The first layer applies ReLU + dropout and maps ``in_channels -> hidden_channels``.
        The second layer is the output layer (no activation/dropout) and maps
        ``hidden_channels -> num_classes``.

        Args:
            x: Input node feature matrix. Size ``(num_nodes, in_channels)``.
            hyperedge_index: Hyperedge incidence in COO format. Size ``(2, num_incidences)``,
                where row 0 contains node IDs and row 1 contains hyperedge IDs.

        Returns:
            The output node feature matrix. Size ``(num_nodes, num_classes)``.
        """
        for layer in self.layers:
            x = layer(x, hyperedge_index)
        return x

forward(x, hyperedge_index)

Apply two stacked HGNNConv layers to produce node embeddings.

The first layer applies ReLU + dropout and maps in_channels -> hidden_channels. The second layer is the output layer (no activation/dropout) and maps hidden_channels -> num_classes.

Parameters:

Name Type Description Default
x Tensor

Input node feature matrix. Size (num_nodes, in_channels).

required
hyperedge_index Tensor

Hyperedge incidence in COO format. Size (2, num_incidences), where row 0 contains node IDs and row 1 contains hyperedge IDs.

required

Returns:

Type Description
Tensor

The output node feature matrix. Size (num_nodes, num_classes).

Source code in hyperbench/models/hgnn.py
def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
    """
    Apply two stacked ``HGNNConv`` layers to produce node embeddings.

    The first layer applies ReLU + dropout and maps ``in_channels -> hidden_channels``.
    The second layer is the output layer (no activation/dropout) and maps
    ``hidden_channels -> num_classes``.

    Args:
        x: Input node feature matrix. Size ``(num_nodes, in_channels)``.
        hyperedge_index: Hyperedge incidence in COO format. Size ``(2, num_incidences)``,
            where row 0 contains node IDs and row 1 contains hyperedge IDs.

    Returns:
        The output node feature matrix. Size ``(num_nodes, num_classes)``.
    """
    for layer in self.layers:
        x = layer(x, hyperedge_index)
    return x

HNHN

Bases: Module

HNHN performs incidence-based hypergraph convolution with explicit hyperedge embeddings between the node -> hyperedge -> node propagation steps. - Proposed in HNHN: Hypergraph Networks with Hyperedge Neurons <https://arxiv.org/abs/2006.12278>_ paper. - Reference implementation: source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hnhn.html#HNHN>_.

Parameters:

Name Type Description Default
in_channels int

The number of input channels.

required
hidden_channels int

The number of hidden channels.

required
num_classes int

The number of output channels.

required
bias bool

If set to False, the layer will not learn the bias parameter. Defaults to True.

True
use_batch_normalization bool

If set to True, layers will use batch normalization. Defaults to False.

False
drop_rate float

Dropout ratio. Defaults to 0.5.

0.5
Source code in hyperbench/models/hnhn.py
class HNHN(nn.Module):
    """
    HNHN performs incidence-based hypergraph convolution with explicit hyperedge
    embeddings between the node -> hyperedge -> node propagation steps.
    - Proposed in `HNHN: Hypergraph Networks with Hyperedge Neurons <https://arxiv.org/abs/2006.12278>`_ paper.
    - Reference implementation: `source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hnhn.html#HNHN>`_.

    Args:
        in_channels: The number of input channels.
        hidden_channels: The number of hidden channels.
        num_classes: The number of output channels.
        bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
        use_batch_normalization: If set to ``True``, layers will use batch normalization. Defaults to ``False``.
        drop_rate: Dropout ratio. Defaults to ``0.5``.
    """

    def __init__(
        self,
        in_channels: int,
        hidden_channels: int,
        num_classes: int,
        bias: bool = True,
        use_batch_normalization: bool = False,
        drop_rate: float = 0.5,
    ):
        super().__init__()

        self.layers = nn.ModuleList(
            [
                HNHNConv(
                    in_channels=in_channels,
                    out_channels=hidden_channels,
                    bias=bias,
                    use_batch_normalization=use_batch_normalization,
                    drop_rate=drop_rate,
                ),
                HNHNConv(
                    in_channels=hidden_channels,
                    out_channels=num_classes,
                    bias=bias,
                    use_batch_normalization=use_batch_normalization,
                    is_last=True,
                ),
            ]
        )

    def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
        """
        Apply two stacked ``HNHNConv`` layers to produce node embeddings.

        Args:
            x: Input node feature matrix of size ``(num_nodes, in_channels)``.
            hyperedge_index: Hyperedge incidence in COO format of size ``(2, num_incidences)``.

        Returns:
            The output node feature matrix of size ``(num_nodes, num_classes)``.
        """
        for layer in self.layers:
            x = layer(x, hyperedge_index)
        return x

forward(x, hyperedge_index)

Apply two stacked HNHNConv layers to produce node embeddings.

Parameters:

Name Type Description Default
x Tensor

Input node feature matrix of size (num_nodes, in_channels).

required
hyperedge_index Tensor

Hyperedge incidence in COO format of size (2, num_incidences).

required

Returns:

Type Description
Tensor

The output node feature matrix of size (num_nodes, num_classes).

Source code in hyperbench/models/hnhn.py
def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
    """
    Apply two stacked ``HNHNConv`` layers to produce node embeddings.

    Args:
        x: Input node feature matrix of size ``(num_nodes, in_channels)``.
        hyperedge_index: Hyperedge incidence in COO format of size ``(2, num_incidences)``.

    Returns:
        The output node feature matrix of size ``(num_nodes, num_classes)``.
    """
    for layer in self.layers:
        x = layer(x, hyperedge_index)
    return x

HGNNP

Bases: Module

HGNN+ performs hypergraph convolution with two-stage mean aggregation using the incidence structure directly: nodes -> hyperedges -> nodes. - Proposed in HGNN+: General Hypergraph Neural Networks <https://ieeexplore.ieee.org/document/9795251>_ paper (IEEE T-PAMI 2022). - Reference implementation: source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hgnnp.html#HGNNP>_.

Parameters:

Name Type Description Default
in_channels int

The number of input channels.

required
hidden_channels int

The number of hidden channels.

required
num_classes int

The number of output channels.

required
bias bool

If set to False, the layer will not learn the bias parameter. Defaults to True.

True
use_batch_normalization bool

If set to True, layers will use batch normalization. Defaults to False.

False
drop_rate float

Dropout ratio. Defaults to 0.5.

0.5
Source code in hyperbench/models/hgnnp.py
class HGNNP(nn.Module):
    """
    HGNN+ performs hypergraph convolution with two-stage mean aggregation using the
    incidence structure directly: nodes -> hyperedges -> nodes.
    - Proposed in `HGNN+: General Hypergraph Neural Networks <https://ieeexplore.ieee.org/document/9795251>`_ paper (IEEE T-PAMI 2022).
    - Reference implementation: `source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hgnnp.html#HGNNP>`_.

    Args:
        in_channels: The number of input channels.
        hidden_channels: The number of hidden channels.
        num_classes: The number of output channels.
        bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
        use_batch_normalization: If set to ``True``, layers will use batch normalization. Defaults to ``False``.
        drop_rate: Dropout ratio. Defaults to ``0.5``.
    """

    def __init__(
        self,
        in_channels: int,
        hidden_channels: int,
        num_classes: int,
        bias: bool = True,
        use_batch_normalization: bool = False,
        drop_rate: float = 0.5,
    ):
        super().__init__()

        self.layers = nn.ModuleList(
            [
                HGNNPConv(
                    in_channels=in_channels,
                    out_channels=hidden_channels,
                    bias=bias,
                    use_batch_normalization=use_batch_normalization,
                    drop_rate=drop_rate,
                ),
                HGNNPConv(
                    in_channels=hidden_channels,
                    out_channels=num_classes,
                    bias=bias,
                    use_batch_normalization=use_batch_normalization,
                    is_last=True,
                ),
            ]
        )

    def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
        """
        Apply two stacked ``HGNNPConv`` layers to produce node embeddings.

        Args:
            x: Input node feature matrix. Size ``(num_nodes, in_channels)``.
            hyperedge_index: Hyperedge incidence in COO format. Size ``(2, num_incidences)``,
                where row 0 contains node IDs and row 1 contains hyperedge IDs.

        Returns:
            The output node feature matrix. Size ``(num_nodes, num_classes)``.
        """
        for layer in self.layers:
            x = layer(x, hyperedge_index)
        return x

forward(x, hyperedge_index)

Apply two stacked HGNNPConv layers to produce node embeddings.

Parameters:

Name Type Description Default
x Tensor

Input node feature matrix. Size (num_nodes, in_channels).

required
hyperedge_index Tensor

Hyperedge incidence in COO format. Size (2, num_incidences), where row 0 contains node IDs and row 1 contains hyperedge IDs.

required

Returns:

Type Description
Tensor

The output node feature matrix. Size (num_nodes, num_classes).

Source code in hyperbench/models/hgnnp.py
def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
    """
    Apply two stacked ``HGNNPConv`` layers to produce node embeddings.

    Args:
        x: Input node feature matrix. Size ``(num_nodes, in_channels)``.
        hyperedge_index: Hyperedge incidence in COO format. Size ``(2, num_incidences)``,
            where row 0 contains node IDs and row 1 contains hyperedge IDs.

    Returns:
        The output node feature matrix. Size ``(num_nodes, num_classes)``.
    """
    for layer in self.layers:
        x = layer(x, hyperedge_index)
    return x

HyperGCN

Bases: Module

HyperGCN approximates each hyperedge of the hypergraph by a set of pairwise edges connecting the vertices of the hyperedge and treats the learning problem as a graph learning problem on the approximation. - Proposed in HyperGCN: A New Method of Training Graph Convolutional Networks on Hypergraphs <https://dl.acm.org/doi/10.5555/3454287.3454422>_ paper (NeurIPS 2019). - Code of the paper: source <https://github.com/malllabiisc/HyperGCN>. - Reference implementation: source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hypergcn.html#HyperGCN>.

Parameters:

Name Type Description Default
in_channels int

The number of input channels.

required
hidden_channels int

The number of hidden channels.

required
num_classes int

The number of classes of the classification task as HyperGCB is a node classification model.

required
bias bool

If set to False, the layer will not learn the bias parameter. Defaults to True.

True
use_batch_normalization bool

If set to True, layers will use batch normalization. Defaults to False.

False
drop_rate float

Dropout ratio. Defaults to 0.5.

0.5
use_mediator bool

Whether to use mediator to transform the hyperedges to edges in the graph. Defaults to False.

False
fast bool

If set to True, the transformed graph structure will be computed once from the input hypergraph and vertex features, and cached for future use. Defaults to True.

True
Source code in hyperbench/models/hypergcn.py
class HyperGCN(nn.Module):
    """
    HyperGCN approximates each hyperedge of the hypergraph by a set of pairwise edges connecting the vertices of the hyperedge
    and treats the learning problem as a graph learning problem on the approximation.
    - Proposed in `HyperGCN: A New Method of Training Graph Convolutional Networks on Hypergraphs <https://dl.acm.org/doi/10.5555/3454287.3454422>`_ paper (NeurIPS 2019).
    - Code of the paper: `source <https://github.com/malllabiisc/HyperGCN>`_.
    - Reference implementation: `source <https://deephypergraph.readthedocs.io/en/latest/_modules/dhg/models/hypergraphs/hypergcn.html#HyperGCN>`_.

    Args:
        in_channels: The number of input channels.
        hidden_channels: The number of hidden channels.
        num_classes: The number of classes of the classification task as HyperGCB is a node classification model.
        bias: If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
        use_batch_normalization: If set to ``True``, layers will use batch normalization. Defaults to ``False``.
        drop_rate: Dropout ratio. Defaults to ``0.5``.
        use_mediator: Whether to use mediator to transform the hyperedges to edges in the graph. Defaults to ``False``.
        fast: If set to ``True``, the transformed graph structure will be computed once from the input hypergraph and vertex features, and cached for future use. Defaults to ``True``.
    """

    def __init__(
        self,
        in_channels: int,
        hidden_channels: int,
        num_classes: int,
        bias: bool = True,
        use_batch_normalization: bool = False,
        drop_rate: float = 0.5,
        use_mediator: bool = False,
        fast: bool = True,
    ):
        super().__init__()
        self.fast = fast
        self.use_mediator = use_mediator
        self.cached_gcn_laplacian_matrix: Tensor | None = None

        self.layers = nn.ModuleList(
            [
                HyperGCNConv(
                    in_channels=in_channels,
                    out_channels=hidden_channels,
                    bias=bias,
                    use_batch_normalization=use_batch_normalization,
                    drop_rate=drop_rate,
                    use_mediator=use_mediator,
                ),
                HyperGCNConv(
                    in_channels=hidden_channels,
                    out_channels=num_classes,
                    bias=bias,
                    use_batch_normalization=use_batch_normalization,
                    use_mediator=use_mediator,
                    is_last=True,
                ),
            ]
        )

    def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
        """
        The forward function.

        Args:
            x: Input node feature matrix. Size ``(num_nodes, in_channels)``.
            hyperedge_index: The hyperedge indices of the hypergraph. Size ``(2, num_hyperedges)``.

        Returns:
            The output node feature matrix. Size ``(num_nodes, num_classes)``.
        """
        if not self.fast:
            for layer in self.layers:
                x = layer(x, hyperedge_index)
            return x

        # If the GCN Laplacian is cached, we need to check if the node feature size has changed
        # with cached_gcn_laplacian_matrix.size(0) != x.size(0), this can happen, for example, due to:
        # adding new negative samples or having validation/test sets with different node features
        should_not_use_cached_gcn_laplacian_matrix = (
            self.cached_gcn_laplacian_matrix is None  # Not cached yet
            or self.cached_gcn_laplacian_matrix.size(0)
            != x.size(0)  # Node feature size has changed
        )

        if should_not_use_cached_gcn_laplacian_matrix:
            edge_index, edge_weights = HyperedgeIndex(
                hyperedge_index
            ).reduce_to_edge_index_on_random_direction(
                x=x,
                with_mediators=self.use_mediator,
                return_weights=True,
            )

            self.cached_gcn_laplacian_matrix = EdgeIndex(
                edge_index=edge_index,
                edge_weights=edge_weights,
            ).get_sparse_normalized_gcn_laplacian(num_nodes=x.size(0))

        for layer in self.layers:
            x = layer(x, hyperedge_index, gcn_laplacian_matrix=self.cached_gcn_laplacian_matrix)
        return x

forward(x, hyperedge_index)

The forward function.

Parameters:

Name Type Description Default
x Tensor

Input node feature matrix. Size (num_nodes, in_channels).

required
hyperedge_index Tensor

The hyperedge indices of the hypergraph. Size (2, num_hyperedges).

required

Returns:

Type Description
Tensor

The output node feature matrix. Size (num_nodes, num_classes).

Source code in hyperbench/models/hypergcn.py
def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
    """
    The forward function.

    Args:
        x: Input node feature matrix. Size ``(num_nodes, in_channels)``.
        hyperedge_index: The hyperedge indices of the hypergraph. Size ``(2, num_hyperedges)``.

    Returns:
        The output node feature matrix. Size ``(num_nodes, num_classes)``.
    """
    if not self.fast:
        for layer in self.layers:
            x = layer(x, hyperedge_index)
        return x

    # If the GCN Laplacian is cached, we need to check if the node feature size has changed
    # with cached_gcn_laplacian_matrix.size(0) != x.size(0), this can happen, for example, due to:
    # adding new negative samples or having validation/test sets with different node features
    should_not_use_cached_gcn_laplacian_matrix = (
        self.cached_gcn_laplacian_matrix is None  # Not cached yet
        or self.cached_gcn_laplacian_matrix.size(0)
        != x.size(0)  # Node feature size has changed
    )

    if should_not_use_cached_gcn_laplacian_matrix:
        edge_index, edge_weights = HyperedgeIndex(
            hyperedge_index
        ).reduce_to_edge_index_on_random_direction(
            x=x,
            with_mediators=self.use_mediator,
            return_weights=True,
        )

        self.cached_gcn_laplacian_matrix = EdgeIndex(
            edge_index=edge_index,
            edge_weights=edge_weights,
        ).get_sparse_normalized_gcn_laplacian(num_nodes=x.size(0))

    for layer in self.layers:
        x = layer(x, hyperedge_index, gcn_laplacian_matrix=self.cached_gcn_laplacian_matrix)
    return x

MLP

Bases: Module

A simple multi-layer perceptron (MLP) with configurable number of layers, hidden channels, activation functions, normalization, and dropout.

Examples:

>>> mlp = MLP(in_channels=16, out_channels=1, hidden_channels=32, num_layers=3)
>>> x = torch.randn(10, 16)  # 10 samples, 16 features
>>> output = mlp(x)
>>> output.shape
... torch.Size([10, 1])

With custom activation, normalization, and dropout:

>>> mlp = MLP(
...     in_channels=16,
...     out_channels=1,
...     hidden_channels=32,
...     num_layers=3,
...     activation_fn=nn.Tanh,                   # nn.ReLU, nn.LeakyReLU, etc.
...     activation_fn_kwargs={"inplace": True},
...     normalization_fn=nn.BatchNorm1d,         # nn.LayerNorm, etc.
...     normalization_fn_kwargs={"eps": 1e-5},
...     drop_rate=0.5,
... )
>>> x = torch.randn(10, 16)
>>> output = mlp(x)
>>> output.shape
... torch.Size([10, 1])

Parameters:

Name Type Description Default
in_channels int

Number of input features.

required
out_channels int

Number of output features.

required
hidden_channels int | None

Number of hidden units in each hidden layer. Required if num_layers > 1.

None
num_layers int

Total number of layers (including output layer). Must be at least 1. Defaults to 1.

1
activation_fn ActivationFn | None

Activation function to use after each hidden layer. Defaults to nn.ReLU.

None
activation_fn_kwargs dict | None

Keyword arguments for the activation function. Defaults to empty dict.

None
normalization_fn NormalizationFn | None

Normalization function to use after each hidden layer (before activation). If None, no normalization is applied. Defaults to None.

None
normalization_fn_kwargs dict | None

Keyword arguments for the normalization function. Defaults to empty dict.

None
bias bool

Whether to include bias terms in the linear layers. Defaults to True.

True
drop_rate float

Dropout rate to apply after each hidden layer (after activation). If 0.0, no dropout is applied. Defaults to 0.0.

0.0
Source code in hyperbench/models/mlp.py
class MLP(nn.Module):
    """
    A simple multi-layer perceptron (MLP) with configurable number of layers, hidden channels, activation functions, normalization, and dropout.

    Examples:
        >>> mlp = MLP(in_channels=16, out_channels=1, hidden_channels=32, num_layers=3)
        >>> x = torch.randn(10, 16)  # 10 samples, 16 features
        >>> output = mlp(x)
        >>> output.shape
        ... torch.Size([10, 1])

        With custom activation, normalization, and dropout:
        >>> mlp = MLP(
        ...     in_channels=16,
        ...     out_channels=1,
        ...     hidden_channels=32,
        ...     num_layers=3,
        ...     activation_fn=nn.Tanh,                   # nn.ReLU, nn.LeakyReLU, etc.
        ...     activation_fn_kwargs={"inplace": True},
        ...     normalization_fn=nn.BatchNorm1d,         # nn.LayerNorm, etc.
        ...     normalization_fn_kwargs={"eps": 1e-5},
        ...     drop_rate=0.5,
        ... )
        >>> x = torch.randn(10, 16)
        >>> output = mlp(x)
        >>> output.shape
        ... torch.Size([10, 1])

    Args:
        in_channels: Number of input features.
        out_channels: Number of output features.
        hidden_channels: Number of hidden units in each hidden layer. Required if num_layers > 1.
        num_layers: Total number of layers (including output layer). Must be at least 1. Defaults to 1.
        activation_fn: Activation function to use after each hidden layer. Defaults to ``nn.ReLU``.
        activation_fn_kwargs: Keyword arguments for the activation function. Defaults to empty dict.
        normalization_fn: Normalization function to use after each hidden layer (before activation). If ``None``, no normalization is applied. Defaults to ``None``.
        normalization_fn_kwargs: Keyword arguments for the normalization function. Defaults to empty dict.
        bias: Whether to include bias terms in the linear layers. Defaults to ``True``.
        drop_rate: Dropout rate to apply after each hidden layer (after activation). If 0.0, no dropout is applied. Defaults to 0.0.
    """

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        hidden_channels: int | None = None,
        num_layers: int = 1,
        activation_fn: ActivationFn | None = None,
        activation_fn_kwargs: dict | None = None,
        normalization_fn: NormalizationFn | None = None,
        normalization_fn_kwargs: dict | None = None,
        bias: bool = True,
        drop_rate: float = 0.0,
    ):
        super().__init__()
        self.__validate_num_layers(num_layers, hidden_channels)

        hidden_channels = hidden_channels if hidden_channels is not None else 0
        activation_fn = activation_fn if activation_fn is not None else nn.ReLU
        activation_fn_kwargs = activation_fn_kwargs if activation_fn_kwargs is not None else {}
        normalization_fn_kwargs = (
            normalization_fn_kwargs if normalization_fn_kwargs is not None else {}
        )

        layers = nn.ModuleList()
        for layer_idx in range(num_layers):
            is_output_layer = is_layer(layer_idx, num_layers - 1)

            linear_layer = nn.Linear(
                in_features=in_channels if is_input_layer(layer_idx) else hidden_channels,
                out_features=out_channels if is_output_layer else hidden_channels,
                bias=bias,
            )
            layers.append(linear_layer)

            if not is_output_layer:
                if normalization_fn is not None:
                    layers.append(normalization_fn(hidden_channels, **normalization_fn_kwargs))

                layers.append(activation_fn(**activation_fn_kwargs))

                if drop_rate > 0.0:
                    layers.append(nn.Dropout(drop_rate))

        self.layers = nn.Sequential(*layers)

    def forward(self, x) -> Tensor:
        return self.layers(x)

    def __validate_num_layers(self, num_layers: int, hidden_channels: int | None) -> None:
        if num_layers < 1:
            raise ValueError("At least one layer is required for MLP.")
        if num_layers > 1 and hidden_channels is None:
            raise ValueError("hidden_channels must be specified for MLP with more than 1 layer.")

SLP

Bases: MLP

A single-layer perceptron (SLP) which is a special case of MLP with exactly one layer and no hidden units.

Examples:

>>> slp = SLP(in_channels=16, out_channels=1)
>>> x = torch.randn(10, 16)  # 10 samples, 16 features
>>> output = slp(x)
>>> output.shape
... torch.Size([10, 1])

Parameters:

Name Type Description Default
in_channels int

Number of input features.

required
out_channels int

Number of output features.

required
Source code in hyperbench/models/mlp.py
class SLP(MLP):
    """
    A single-layer perceptron (SLP) which is a special case of MLP with exactly one layer and no hidden units.

    Examples:
        >>> slp = SLP(in_channels=16, out_channels=1)
        >>> x = torch.randn(10, 16)  # 10 samples, 16 features
        >>> output = slp(x)
        >>> output.shape
        ... torch.Size([10, 1])

    Args:
        in_channels: Number of input features.
        out_channels: Number of output features.
    """

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
    ):
        super().__init__(
            in_channels=in_channels,
            out_channels=out_channels,
            num_layers=1,
        )

NHP

Bases: Module

Neural Hyperlink Predictor (NHP) for undirected hyperedge link prediction. - Proposed in NHP: Neural Hypergraph Link Prediction <https://dl.acm.org/doi/10.1145/3340531.3411870>_ paper (CIKM 2020). - Reference implementation: source <https://github.com/cyixiao/NHP-reproduce/>_.

NHP scores each candidate hyperedge by building candidate-specific node embeddings. A node that appears in multiple candidate hyperedges can receive a different incidence embedding in each one, because its update depends on the other nodes in that candidate hyperedge.

Examples:

>>> x = [
...     [1., 0.],  # node 0
...     [0., 1.],  # node 1
...     [1., 1.],  # node 2
...     [1., 0.],  # node 3
... ]
>>> hyperedge_index = [
...     [0, 1, 1, 2, 3],  # node IDs
...     [0, 0, 1, 1, 1],  # hyperedge IDs
... ]
>>> # hyperedge 0 = {node 0, node 1}
>>> # hyperedge 1 = {node 1, node 2, node 3}
>>> model = NHP(in_channels=2, hidden_channels=8, aggregation="maxmin")
>>> scores = model(x, hyperedge_index)
>>> scores.shape
... torch.Size([2])

Parameters:

Name Type Description Default
in_channels int

Number of input features per node.

required
hidden_channels int

Number of hidden units in the node embeddings.

required
activation_fn ActivationFn | None

Activation function to use after the linear transformations. Defaults to nn.ReLU.

None
activation_fn_kwargs dict | None

Keyword arguments for the activation function. Defaults to empty dict.

None
aggregation Literal['mean', 'maxmin']

Method to aggregate the incidence embeddings into a hyperedge embedding. Must be either "maxmin" or "mean". Defaults to "maxmin".

'maxmin'
bias bool

Whether to include bias terms in the linear layers. Defaults to True.

True
Source code in hyperbench/models/nhp.py
class NHP(nn.Module):
    """
    Neural Hyperlink Predictor (NHP) for undirected hyperedge link prediction.
    - Proposed in `NHP: Neural Hypergraph Link Prediction <https://dl.acm.org/doi/10.1145/3340531.3411870>`_ paper (CIKM 2020).
    - Reference implementation: `source <https://github.com/cyixiao/NHP-reproduce/>`_.

    NHP scores each candidate hyperedge by building candidate-specific node embeddings.
    A node that appears in multiple candidate hyperedges can receive a different incidence embedding in each one,
    because its update depends on the other nodes in that candidate hyperedge.

    Examples:
        >>> x = [
        ...     [1., 0.],  # node 0
        ...     [0., 1.],  # node 1
        ...     [1., 1.],  # node 2
        ...     [1., 0.],  # node 3
        ... ]
        >>> hyperedge_index = [
        ...     [0, 1, 1, 2, 3],  # node IDs
        ...     [0, 0, 1, 1, 1],  # hyperedge IDs
        ... ]
        >>> # hyperedge 0 = {node 0, node 1}
        >>> # hyperedge 1 = {node 1, node 2, node 3}
        >>> model = NHP(in_channels=2, hidden_channels=8, aggregation="maxmin")
        >>> scores = model(x, hyperedge_index)
        >>> scores.shape
        ... torch.Size([2])

    Args:
        in_channels: Number of input features per node.
        hidden_channels: Number of hidden units in the node embeddings.
        activation_fn: Activation function to use after the linear transformations. Defaults to ``nn.ReLU``.
        activation_fn_kwargs: Keyword arguments for the activation function. Defaults to empty dict.
        aggregation: Method to aggregate the incidence embeddings into a hyperedge embedding. Must be either "maxmin" or "mean". Defaults to "maxmin".
        bias: Whether to include bias terms in the linear layers. Defaults to ``True``.
    """

    def __init__(
        self,
        in_channels: int,
        hidden_channels: int,
        activation_fn: ActivationFn | None = None,
        activation_fn_kwargs: dict | None = None,
        aggregation: Literal["mean", "maxmin"] = "maxmin",
        bias: bool = True,
    ):
        super().__init__()

        activation_fn = activation_fn if activation_fn is not None else nn.ReLU
        activation_fn_kwargs = activation_fn_kwargs if activation_fn_kwargs is not None else {}

        self.aggregation = aggregation

        self.self_loop = nn.Linear(in_channels, hidden_channels, bias=bias)
        # GCN message passing is implemented through neighbor sum computation,
        # so one projection is enough for the hyperedge-aware term
        self.hyperedge_aware = nn.Linear(in_channels, hidden_channels, bias=bias)
        self.activation_fn = activation_fn(**activation_fn_kwargs)

        self.hyperedge_score = nn.Linear(hidden_channels, 1, bias=bias)

    def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
        """
        Score each candidate hyperedge.

        Args:
            x: Node feature matrix of shape ``(num_nodes, in_channels)``.
            hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.

        Returns:
            Scores of shape ``(num_hyperedges,)``.
        """
        if hyperedge_index.numel() == 0:
            return x.new_empty((0,))

        # Example: hyperedge_index = [[0, 1, 1, 2, 3],  == node_ids
        #                             [0, 0, 1, 1, 1]]  == hyperedge_ids
        node_ids = hyperedge_index[0]
        hyperedge_ids = hyperedge_index[1]

        # Gather the node features for each incidence
        # Example: x = [[1, 0],  # node 0
        #               [0, 1],  # node 1
        #               [1, 1],  # node 2
        #               [1, 0]]  # node 3
        #          node_ids = [0, 1, 1, 2, 3]
        #          -> incidence_node_features = [[1, 0],  # node 0 in hyperedge 0
        #                                        [0, 1],  # node 1 in hyperedge 0
        #                                        [0, 1],  # node 1 in hyperedge 1
        #                                        [1, 1],  # node 2 in hyperedge 1
        #                                        [1, 0]]  # node 3 in hyperedge 1
        #             shape: (num_incidences, in_channels)
        incidence_node_features = x[node_ids]

        # Do one local message-passing step to sum original node features per hyperedge to get hyperedge features.
        # that are aware of all nodes in the candidate hyperedge.
        # Example: hyperedge 0 contains nodes (0, 1)    -> [1, 0] + [0, 1] = [1, 1]
        #          hyperedge 1 contains nodes (1, 2, 3) -> [0, 1] + [1, 1] + [1, 0] = [2, 2]
        #          -> hyperedge_features = [[1, 1],  # sum for hyperedge 0
        #                                   [2, 2]]  # sum for hyperedge 1
        #             shape: (num_hyperedges, in_channels)
        hyperedge_features = HyperedgeAggregator(
            hyperedge_index=hyperedge_index,
            node_embeddings=x,
        ).pool("sum")

        # Broadcast hyperedge features back to each of their incidences,
        # and remove the current node feature to give to each incidence
        # the features of its neighboring nodes in the candidate hyperedge.
        # Example: hyperedge_features = [[1, 1],  # sum for hyperedge 0
        #                                [2, 2]]  # sum for hyperedge 1
        #                               shape (num_hyperedges, in_channels),
        #          hyperedge_ids = [0, 0, 1, 1, 1],
        #          incidence_node_features = [[1, 0],  # node 0 in hyperedge 0
        #                                     [0, 1],  # node 1 in hyperedge 0
        #                                     [0, 1],  # node 1 in hyperedge 1
        #                                     [1, 1],  # node 2 in hyperedge 1
        #                                     [1, 0]]  # node 3 in hyperedge 1
        #                                    shape: (num_incidences, in_channels)
        #          -> hyperedge_features[hyperedge_ids] = [[1, 1],  # hyperedge 0 for node 0
        #                                                  [1, 1],  # hyperedge 0 for node 1
        #                                                  [2, 2],  # hyperedge 1 for node 1
        #                                                  [2, 2],  # hyperedge 1 for node 2
        #                                                  [2, 2]]  # hyperedge 1 for node 3
        #                                                 shape: (num_incidences, in_channels)
        #          -> neighbor_features_per_incidence = [[0, 1],  # node 0 sees node 1
        #                                                [1, 0],  # node 1 sees node 0
        #                                                [2, 1],  # node 1 sees node 2 and node 3
        #                                                [1, 1],  # node 2 sees node 1 and node 3
        #                                                [1, 2]]  # node 3 sees node 1 and node 2
        #                                               shape: (num_incidences, in_channels)
        neighbor_features_per_incidence = (
            hyperedge_features[hyperedge_ids] - incidence_node_features
        )

        # shape (num_incidences, hidden_channels)
        neighbor_aware_hyperedge_embeddings = self.hyperedge_aware(neighbor_features_per_incidence)
        # shape (num_incidences, hidden_channels)
        selfloop_embeddings = self.self_loop(incidence_node_features)

        # incidence_embeddings[0] = activation_fn(selfloop_embeddings[0] + neighbor_aware_hyperedge_embeddings[0])
        # is the embedding of the first incidence (i.e., node 0 in hyperedge 0)
        # after one local message-passing step inside that candidate hyperedge.
        incidence_embeddings = self.activation_fn(
            selfloop_embeddings + neighbor_aware_hyperedge_embeddings
        )  # shape (num_incidences, hidden_channels)

        # Treat each incidence embedding as a separately aggregatable set of features.
        # This is required because incidence embeddings are not global node embeddings:
        # node 1 may appear twice with two different embeddings as it participates in two different candidate hyperedges.
        # Example: incidence_ids = [0, 1, 2, 3, 4],
        #          hyperedge_ids = [0, 0, 1, 1, 1]
        #          -> incidence_hyperedge_index = [[0, 1, 2, 3, 4],
        #                                          [0, 0, 1, 1, 1]]
        num_incidences = incidence_embeddings.size(0)
        incidence_ids = torch.arange(num_incidences, device=hyperedge_index.device)
        incidence_hyperedge_index = torch.stack([incidence_ids, hyperedge_ids], dim=0)

        # Example: incidence_embeddings = [[1, 2],  # features 0, node 0 in hyperedge 0
        #                                  [3, 4],  # features 1, node 1 in hyperedge 0
        #                                  [5, 6],  # features 2, node 1 in hyperedge 1
        #                                  [7, 8],  # features 3, node 2 in hyperedge 1
        #                                  [9, 10]] # features 4, node 3 in hyperedge 1
        #          -> incidence_aggregator pools features (0, 1) for hyperedge 0 and features (2, 3, 4) for hyperedge 1
        #          if aggregation == "maxmin":
        #          -> hyperedge_embeddings = [[max(1, 3) - min(1, 3), max(2, 4) - min(2, 4)],                # hyperedge 0
        #                                     [max(5, 7, 9) - min(5, 7, 9), max(6, 8, 10) - min(6, 8, 10)]]  # hyperedge 1
        #                                    shape: (num_hyperedges, hidden_channels)
        #         if aggregation == "mean":
        #         -> hyperedge_embeddings = [[mean(1, 3), mean(2, 4)],         # hyperedge 0
        #                                    [mean(5, 7, 9), mean(6, 8, 10)]]  # hyperedge 1
        #                                   shape: (num_hyperedges, hidden_channels)
        incidence_aggregator = HyperedgeAggregator(
            hyperedge_index=incidence_hyperedge_index,
            node_embeddings=incidence_embeddings,
        )

        match self.aggregation:
            case "maxmin":
                max_embeddings = incidence_aggregator.pool("max")
                min_embeddings = incidence_aggregator.pool("min")
                hyperedge_embeddings = max_embeddings - min_embeddings
            case _:
                hyperedge_embeddings = incidence_aggregator.pool("mean")

        # Decode: linear projection to scalar score per hyperedge
        # shape: (num_hyperedges, 1) -> squeeze -> (num_hyperedges,)
        return self.hyperedge_score(hyperedge_embeddings).squeeze(-1)

forward(x, hyperedge_index)

Score each candidate hyperedge.

Parameters:

Name Type Description Default
x Tensor

Node feature matrix of shape (num_nodes, in_channels).

required
hyperedge_index Tensor

Incidence tensor of shape (2, num_incidences).

required

Returns:

Type Description
Tensor

Scores of shape (num_hyperedges,).

Source code in hyperbench/models/nhp.py
def forward(self, x: Tensor, hyperedge_index: Tensor) -> Tensor:
    """
    Score each candidate hyperedge.

    Args:
        x: Node feature matrix of shape ``(num_nodes, in_channels)``.
        hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.

    Returns:
        Scores of shape ``(num_hyperedges,)``.
    """
    if hyperedge_index.numel() == 0:
        return x.new_empty((0,))

    # Example: hyperedge_index = [[0, 1, 1, 2, 3],  == node_ids
    #                             [0, 0, 1, 1, 1]]  == hyperedge_ids
    node_ids = hyperedge_index[0]
    hyperedge_ids = hyperedge_index[1]

    # Gather the node features for each incidence
    # Example: x = [[1, 0],  # node 0
    #               [0, 1],  # node 1
    #               [1, 1],  # node 2
    #               [1, 0]]  # node 3
    #          node_ids = [0, 1, 1, 2, 3]
    #          -> incidence_node_features = [[1, 0],  # node 0 in hyperedge 0
    #                                        [0, 1],  # node 1 in hyperedge 0
    #                                        [0, 1],  # node 1 in hyperedge 1
    #                                        [1, 1],  # node 2 in hyperedge 1
    #                                        [1, 0]]  # node 3 in hyperedge 1
    #             shape: (num_incidences, in_channels)
    incidence_node_features = x[node_ids]

    # Do one local message-passing step to sum original node features per hyperedge to get hyperedge features.
    # that are aware of all nodes in the candidate hyperedge.
    # Example: hyperedge 0 contains nodes (0, 1)    -> [1, 0] + [0, 1] = [1, 1]
    #          hyperedge 1 contains nodes (1, 2, 3) -> [0, 1] + [1, 1] + [1, 0] = [2, 2]
    #          -> hyperedge_features = [[1, 1],  # sum for hyperedge 0
    #                                   [2, 2]]  # sum for hyperedge 1
    #             shape: (num_hyperedges, in_channels)
    hyperedge_features = HyperedgeAggregator(
        hyperedge_index=hyperedge_index,
        node_embeddings=x,
    ).pool("sum")

    # Broadcast hyperedge features back to each of their incidences,
    # and remove the current node feature to give to each incidence
    # the features of its neighboring nodes in the candidate hyperedge.
    # Example: hyperedge_features = [[1, 1],  # sum for hyperedge 0
    #                                [2, 2]]  # sum for hyperedge 1
    #                               shape (num_hyperedges, in_channels),
    #          hyperedge_ids = [0, 0, 1, 1, 1],
    #          incidence_node_features = [[1, 0],  # node 0 in hyperedge 0
    #                                     [0, 1],  # node 1 in hyperedge 0
    #                                     [0, 1],  # node 1 in hyperedge 1
    #                                     [1, 1],  # node 2 in hyperedge 1
    #                                     [1, 0]]  # node 3 in hyperedge 1
    #                                    shape: (num_incidences, in_channels)
    #          -> hyperedge_features[hyperedge_ids] = [[1, 1],  # hyperedge 0 for node 0
    #                                                  [1, 1],  # hyperedge 0 for node 1
    #                                                  [2, 2],  # hyperedge 1 for node 1
    #                                                  [2, 2],  # hyperedge 1 for node 2
    #                                                  [2, 2]]  # hyperedge 1 for node 3
    #                                                 shape: (num_incidences, in_channels)
    #          -> neighbor_features_per_incidence = [[0, 1],  # node 0 sees node 1
    #                                                [1, 0],  # node 1 sees node 0
    #                                                [2, 1],  # node 1 sees node 2 and node 3
    #                                                [1, 1],  # node 2 sees node 1 and node 3
    #                                                [1, 2]]  # node 3 sees node 1 and node 2
    #                                               shape: (num_incidences, in_channels)
    neighbor_features_per_incidence = (
        hyperedge_features[hyperedge_ids] - incidence_node_features
    )

    # shape (num_incidences, hidden_channels)
    neighbor_aware_hyperedge_embeddings = self.hyperedge_aware(neighbor_features_per_incidence)
    # shape (num_incidences, hidden_channels)
    selfloop_embeddings = self.self_loop(incidence_node_features)

    # incidence_embeddings[0] = activation_fn(selfloop_embeddings[0] + neighbor_aware_hyperedge_embeddings[0])
    # is the embedding of the first incidence (i.e., node 0 in hyperedge 0)
    # after one local message-passing step inside that candidate hyperedge.
    incidence_embeddings = self.activation_fn(
        selfloop_embeddings + neighbor_aware_hyperedge_embeddings
    )  # shape (num_incidences, hidden_channels)

    # Treat each incidence embedding as a separately aggregatable set of features.
    # This is required because incidence embeddings are not global node embeddings:
    # node 1 may appear twice with two different embeddings as it participates in two different candidate hyperedges.
    # Example: incidence_ids = [0, 1, 2, 3, 4],
    #          hyperedge_ids = [0, 0, 1, 1, 1]
    #          -> incidence_hyperedge_index = [[0, 1, 2, 3, 4],
    #                                          [0, 0, 1, 1, 1]]
    num_incidences = incidence_embeddings.size(0)
    incidence_ids = torch.arange(num_incidences, device=hyperedge_index.device)
    incidence_hyperedge_index = torch.stack([incidence_ids, hyperedge_ids], dim=0)

    # Example: incidence_embeddings = [[1, 2],  # features 0, node 0 in hyperedge 0
    #                                  [3, 4],  # features 1, node 1 in hyperedge 0
    #                                  [5, 6],  # features 2, node 1 in hyperedge 1
    #                                  [7, 8],  # features 3, node 2 in hyperedge 1
    #                                  [9, 10]] # features 4, node 3 in hyperedge 1
    #          -> incidence_aggregator pools features (0, 1) for hyperedge 0 and features (2, 3, 4) for hyperedge 1
    #          if aggregation == "maxmin":
    #          -> hyperedge_embeddings = [[max(1, 3) - min(1, 3), max(2, 4) - min(2, 4)],                # hyperedge 0
    #                                     [max(5, 7, 9) - min(5, 7, 9), max(6, 8, 10) - min(6, 8, 10)]]  # hyperedge 1
    #                                    shape: (num_hyperedges, hidden_channels)
    #         if aggregation == "mean":
    #         -> hyperedge_embeddings = [[mean(1, 3), mean(2, 4)],         # hyperedge 0
    #                                    [mean(5, 7, 9), mean(6, 8, 10)]]  # hyperedge 1
    #                                   shape: (num_hyperedges, hidden_channels)
    incidence_aggregator = HyperedgeAggregator(
        hyperedge_index=incidence_hyperedge_index,
        node_embeddings=incidence_embeddings,
    )

    match self.aggregation:
        case "maxmin":
            max_embeddings = incidence_aggregator.pool("max")
            min_embeddings = incidence_aggregator.pool("min")
            hyperedge_embeddings = max_embeddings - min_embeddings
        case _:
            hyperedge_embeddings = incidence_aggregator.pool("mean")

    # Decode: linear projection to scalar score per hyperedge
    # shape: (num_hyperedges, 1) -> squeeze -> (num_hyperedges,)
    return self.hyperedge_score(hyperedge_embeddings).squeeze(-1)

Node2Vec

Bases: Module

Node2Vec implementation based on torch_geometric.nn.Node2Vec.

Parameters:

Name Type Description Default
edge_index Tensor

Edge index representing the graph structure. Size (2, num_edges).

required
embedding_dim int

Dimension of the node embeddings to learn.

required
walk_length int

Length of each random walk.

20
context_size int

Window size for the skip-gram model (number of neighbors in the walk considered as context). For example, if context_size=2 and walk_length=5, then for a random walk [v0, v1, v2, v3, v4], the context for v2 would be [v0, v1, v3, v4] as we take neighbors within distance 2 in the walk. The pairs generated by skip-gram would be [(v2, v0), (v2, v1), (v2, v3), (v2, v4)]. Rule of thumb: Graphs with strong local structure (5-10), Graphs with communities/long-range patterns (10-20). Defaults to 10.

10
num_walks_per_node int

Number of random walks to start at each node.

10
p float

Return hyperparameter for Node2Vec. Default is 1.0 (unbiased). This controls the probability of stepping back to the node visited in the previous step. Lower values of p make immediate backtracking more likely, which keeps walks closer to the local neighborhood. Higher values of p discourage returning to the previous node, so walks are less likely to bounce back and forth across the same edge.

1.0
q float

In-out hyperparameter for Node2Vec. Default is 1.0 (unbiased). This controls whether walks stay near the source node or explore further outward. Lower values of q bias the walk toward outward exploration, behaving more like DFS and emphasizing structural roles. Higher values of q bias the walk toward nearby nodes, behaving more like BFS and emphasizing community structure and homophily.

1.0
num_negative_samples int

Number of negative samples to use for training the skip-gram model. If set to X, then for each positive pair (u, v) generated from the random walks, X negative pairs (u, v_neg) will be generated, where v_neg is a node sampled uniformly at random from all nodes in the graph. Defaults to 1, meaning one negative sample per positive pair.

1
num_nodes int | None

Total number of nodes in the graph. If not provided, it will be inferred from the hyperedge_index. This is only needed if the hyperedge_index does not include all nodes (e.g., some isolated nodes are missing).

None
sparse bool

Whether Node2Vec embeddings should use sparse gradients.

True
Source code in hyperbench/models/node2vec.py
class Node2Vec(nn.Module):
    """
    Node2Vec implementation based on ``torch_geometric.nn.Node2Vec``.

    Args:
        edge_index: Edge index representing the graph structure. Size ``(2, num_edges)``.
        embedding_dim: Dimension of the node embeddings to learn.
        walk_length: Length of each random walk.
        context_size: Window size for the skip-gram model (number of neighbors in the walk considered as context).
            For example, if ``context_size=2`` and ``walk_length=5``, then for a random walk ``[v0, v1, v2, v3, v4]``,
            the context for ``v2`` would be ``[v0, v1, v3, v4]`` as we take neighbors within distance 2 in the walk.
            The pairs generated by skip-gram would be ``[(v2, v0), (v2, v1), (v2, v3), (v2, v4)]``.
            Rule of thumb: Graphs with strong local structure (5-10), Graphs with communities/long-range patterns (10-20).
            Defaults to ``10``.
        num_walks_per_node: Number of random walks to start at each node.
        p: Return hyperparameter for Node2Vec. Default is ``1.0`` (unbiased).
            This controls the probability of stepping back to the node visited in the previous step.
            Lower values of ``p`` make immediate backtracking more likely, which keeps walks closer to the
            local neighborhood. Higher values of ``p`` discourage returning to the previous node, so walks
            are less likely to bounce back and forth across the same edge.
        q: In-out hyperparameter for Node2Vec. Default is ``1.0`` (unbiased).
            This controls whether walks stay near the source node or explore further outward.
            Lower values of ``q`` bias the walk toward outward exploration, behaving more like DFS and
            emphasizing structural roles. Higher values of ``q`` bias the walk toward nearby nodes,
            behaving more like BFS and emphasizing community structure and homophily.
        num_negative_samples: Number of negative samples to use for training the skip-gram model.
            If set to ``X``, then for each positive pair ``(u, v)`` generated from the random walks, ``X`` negative pairs ``(u, v_neg)`` will be generated,
            where ``v_neg`` is a node sampled uniformly at random from all nodes in the graph.
            Defaults to ``1``, meaning one negative sample per positive pair.
        num_nodes: Total number of nodes in the graph. If not provided, it will be inferred from the hyperedge_index.
            This is only needed if the hyperedge_index does not include all nodes (e.g., some isolated nodes are missing).
        sparse: Whether Node2Vec embeddings should use sparse gradients.
    """

    def __init__(
        self,
        edge_index: Tensor,
        embedding_dim: int,
        walk_length: int = 20,
        context_size: int = 10,
        num_walks_per_node: int = 10,
        p: float = 1.0,
        q: float = 1.0,
        num_negative_samples: int = 1,
        num_nodes: int | None = None,
        sparse: bool = True,
    ):
        super().__init__()
        if walk_length < context_size:
            raise ValueError(
                f"Expected walk_length >= context_size, got "
                f"walk_length={walk_length}, context_size={context_size}."
            )

        self.model = PyGNode2Vec(
            edge_index=edge_index,
            embedding_dim=embedding_dim,
            walk_length=walk_length,
            context_size=context_size,
            walks_per_node=num_walks_per_node,
            p=p,
            q=q,
            num_negative_samples=num_negative_samples,
            num_nodes=num_nodes,
            sparse=sparse,
        )

    def forward(self, batch: Tensor | None = None) -> Tensor:
        return self.model(batch)

    @property
    def num_embeddings(self) -> int:
        return int(self.model.embedding.num_embeddings)

    def loss(self, pos_rw: Tensor, neg_rw: Tensor) -> Tensor:
        return self.model.loss(pos_rw, neg_rw)

    def loader(self, batch_size: int = 128, shuffle: bool = True):
        return self.model.loader(batch_size=batch_size, shuffle=shuffle)

Node2VecConfig

Bases: TypedDict

Configuration for the Node2Vec model.

Parameters:

Name Type Description Default
edge_index

Edge index representing the graph structure. Size (2, num_edges).

required
embedding_dim

Dimension of the node embeddings to learn.

required
walk_length

Length of each random walk.

required
context_size

Window size for the skip-gram model (number of neighbors in the walk considered as context). For example, if context_size=2 and walk_length=5, then for a random walk [v0, v1, v2, v3, v4], the context for v2 would be [v0, v1, v3, v4] as we take neighbors within distance 2 in the walk. The pairs generated by skip-gram would be [(v2, v0), (v2, v1), (v2, v3), (v2, v4)]. Rule of thumb: Graphs with strong local structure (5-10), Graphs with communities/long-range patterns (10-20). Defaults to 10.

required
num_walks_per_node

Number of random walks to start at each node.

required
p

Return hyperparameter for Node2Vec. Default is 1.0 (unbiased). This controls the probability of stepping back to the node visited in the previous step. Lower values of p make immediate backtracking more likely, which keeps walks closer to the local neighborhood. Higher values of p discourage returning to the previous node, so walks are less likely to bounce back and forth across the same edge.

required
q

In-out hyperparameter for Node2Vec. Default is 1.0 (unbiased). This controls whether walks stay near the source node or explore further outward. Lower values of q bias the walk toward outward exploration, behaving more like DFS and emphasizing structural roles. Higher values of q bias the walk toward nearby nodes, behaving more like BFS and emphasizing community structure and homophily.

required
num_negative_samples

Number of negative samples to use for training the skip-gram model. If set to X, then for each positive pair (u, v) generated from the random walks, X negative pairs (u, v_neg) will be generated, where v_neg is a node sampled uniformly at random from all nodes in the graph. Defaults to 1, meaning one negative sample per positive pair.

required
num_nodes

Total number of nodes in the graph. If not provided, it will be inferred from the hyperedge_index. This is only needed if the hyperedge_index does not include all nodes (e.g., some isolated nodes are missing).

required
sparse

Whether Node2Vec embeddings should use sparse gradients.

required
Source code in hyperbench/models/node2vec.py
class Node2VecConfig(TypedDict):
    """
    Configuration for the Node2Vec model.

    Args:
        edge_index: Edge index representing the graph structure. Size ``(2, num_edges)``.
        embedding_dim: Dimension of the node embeddings to learn.
        walk_length: Length of each random walk.
        context_size: Window size for the skip-gram model (number of neighbors in the walk considered as context).
            For example, if ``context_size=2`` and ``walk_length=5``, then for a random walk ``[v0, v1, v2, v3, v4]``,
            the context for ``v2`` would be ``[v0, v1, v3, v4]`` as we take neighbors within distance 2 in the walk.
            The pairs generated by skip-gram would be ``[(v2, v0), (v2, v1), (v2, v3), (v2, v4)]``.
            Rule of thumb: Graphs with strong local structure (5-10), Graphs with communities/long-range patterns (10-20).
            Defaults to ``10``.
        num_walks_per_node: Number of random walks to start at each node.
        p: Return hyperparameter for Node2Vec. Default is ``1.0`` (unbiased).
            This controls the probability of stepping back to the node visited in the previous step.
            Lower values of ``p`` make immediate backtracking more likely, which keeps walks closer to the
            local neighborhood. Higher values of ``p`` discourage returning to the previous node, so walks
            are less likely to bounce back and forth across the same edge.
        q: In-out hyperparameter for Node2Vec. Default is ``1.0`` (unbiased).
            This controls whether walks stay near the source node or explore further outward.
            Lower values of ``q`` bias the walk toward outward exploration, behaving more like DFS and
            emphasizing structural roles. Higher values of ``q`` bias the walk toward nearby nodes,
            behaving more like BFS and emphasizing community structure and homophily.
        num_negative_samples: Number of negative samples to use for training the skip-gram model.
            If set to ``X``, then for each positive pair ``(u, v)`` generated from the random walks, ``X`` negative pairs ``(u, v_neg)`` will be generated,
            where ``v_neg`` is a node sampled uniformly at random from all nodes in the graph.
            Defaults to ``1``, meaning one negative sample per positive pair.
        num_nodes: Total number of nodes in the graph. If not provided, it will be inferred from the hyperedge_index.
            This is only needed if the hyperedge_index does not include all nodes (e.g., some isolated nodes are missing).
        sparse: Whether Node2Vec embeddings should use sparse gradients.
    """

    edge_index: Tensor
    embedding_dim: int
    context_size: NotRequired[int]
    walk_length: NotRequired[int]
    num_walks_per_node: NotRequired[int]
    p: NotRequired[float]
    q: NotRequired[float]
    num_negative_samples: NotRequired[int]
    num_nodes: NotRequired[int]
    sparse: NotRequired[bool]

Node2VecGCN

Bases: Module

A joint encoder that first learns Node2Vec embeddings and then refines them with GCN layers.

Parameters:

Name Type Description Default
node2vec_config Node2VecConfig

Model-side configuration for the internal Node2Vec encoder.

required
gcn_config GCNConfig

Model-side configuration for the GCN stack applied to the Node2Vec embeddings.

required
Source code in hyperbench/models/node2vec.py
class Node2VecGCN(nn.Module):
    """
    A joint encoder that first learns Node2Vec embeddings and then refines them with GCN layers.

    Args:
        node2vec_config: Model-side configuration for the internal ``Node2Vec`` encoder.
        gcn_config: Model-side configuration for the GCN stack applied to the Node2Vec embeddings.
    """

    def __init__(
        self,
        node2vec_config: Node2VecConfig,
        gcn_config: GCNConfig,
    ):
        super().__init__()
        self.node2vec = Node2Vec(**node2vec_config)
        self.gcn = GCN(**gcn_config)

    def forward(
        self,
        batch: Tensor | None = None,
        edge_index: Tensor | None = None,
    ) -> Tensor:
        if edge_index is None:
            raise ValueError("Node2VecGCN requires edge_index in forward().")

        node_embeddings = self.node2vec(batch)
        return self.gcn(node_embeddings, edge_index)

    @property
    def num_embeddings(self) -> int:
        return self.node2vec.num_embeddings

    def loss(self, pos_rw: Tensor, neg_rw: Tensor) -> Tensor:
        return self.node2vec.loss(pos_rw, neg_rw)

    def loader(self, batch_size: int = 128, shuffle: bool = True):
        return self.node2vec.loader(batch_size=batch_size, shuffle=shuffle)

VilLain

Bases: Module

VilLain learns node-specific virtual-label logits instead of consuming existing node features. The model is transductive: rows in node_embedding correspond to the fixed global node space used during training. - Proposed in VilLain: Self-Supervised Learning on Homogeneous Hypergraphs without Features via Virtual Label Propagation <https://dl.acm.org/doi/pdf/10.1145/3589334.3645454>_ paper (WWW 2024). - Reference implementation: source <https://github.com/geon0325/VilLain/>_.

Each forward pass: 1. Samples differentiable virtual-label assignments with Gumbel-Softmax. 2. Propagates them over the incidence structure. 3. Returns averaged propagated node embeddings.

Parameters:

Name Type Description Default
num_nodes int

Total number of trainable nodes.

required
embedding_dim int

Returned embedding dimension. Defaults to 128.

128
labels_per_subspace int

Number of virtual labels per subspace. Defaults to 2.

2
training_steps int

Propagation steps used for self-supervised loss. Defaults to 4.

4
generation_steps int

Propagation steps averaged for final embeddings. Defaults to 100.

100
tau float

Gumbel-Softmax temperature. Defaults to 1.0.

1.0
eps float

Numerical stability constant. Defaults to 1e-10.

1e-10
Source code in hyperbench/models/villain.py
class VilLain(nn.Module):
    """
    VilLain learns node-specific virtual-label logits instead of consuming existing node features.
    The model is transductive: rows in ``node_embedding`` correspond to the fixed global node space used during training.
    - Proposed in `VilLain: Self-Supervised Learning on Homogeneous Hypergraphs without Features via Virtual Label Propagation <https://dl.acm.org/doi/pdf/10.1145/3589334.3645454>`_ paper (WWW 2024).
    - Reference implementation: `source <https://github.com/geon0325/VilLain/>`_.

    Each forward pass:
    1. Samples differentiable virtual-label assignments with Gumbel-Softmax.
    2. Propagates them over the incidence structure.
    3. Returns averaged propagated node embeddings.


    Args:
        num_nodes: Total number of trainable nodes.
        embedding_dim: Returned embedding dimension. Defaults to ``128``.
        labels_per_subspace: Number of virtual labels per subspace. Defaults to ``2``.
        training_steps: Propagation steps used for self-supervised loss. Defaults to ``4``.
        generation_steps: Propagation steps averaged for final embeddings. Defaults to ``100``.
        tau: Gumbel-Softmax temperature. Defaults to ``1.0``.
        eps: Numerical stability constant. Defaults to ``1e-10``.
    """

    def __init__(
        self,
        num_nodes: int,
        embedding_dim: int = 128,
        labels_per_subspace: int = 2,
        training_steps: int = 4,
        generation_steps: int = 100,
        tau: float = 1.0,
        eps: float = 1e-10,
    ):
        super().__init__()
        self.__validate_args(
            num_nodes=num_nodes,
            embedding_dim=embedding_dim,
            labels_per_subspace=labels_per_subspace,
            training_steps=training_steps,
            generation_steps=generation_steps,
            tau=tau,
            eps=eps,
        )

        self.num_nodes = num_nodes
        self.embedding_dim = embedding_dim
        self.labels_per_subspace = labels_per_subspace
        self.training_steps = training_steps
        self.generation_steps = generation_steps
        self.tau = tau
        self.eps = eps

        self.num_subspaces = math.ceil(embedding_dim / labels_per_subspace)
        self.raw_embedding_dim = self.num_subspaces * labels_per_subspace
        self.node_embedding = nn.Parameter(torch.empty(size=(num_nodes, self.raw_embedding_dim)))

        self.loss_fn = VilLainLoss(
            num_subspaces=self.num_subspaces,
            labels_per_subspace=self.labels_per_subspace,
            eps=self.eps,
        )

        self.reset_parameters()

    def forward(
        self,
        hyperedge_index: Tensor,
        node_ids: Tensor | None = None,
        num_hyperedges: int | None = None,
    ) -> tuple[Tensor, VilLainLossParts]:
        """
        Compute the self-supervised VilLain objective.
        Use ``hyperedge_embeddings`` or ``node_embeddings`` to generate final embeddings for inference after training.

        Args:
            hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
            node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
                Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table.
                This is needed as the model keeps an internal embedding table with a row for every node in the global node space.
            num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges.
                If not provided, the hyperedge count is inferred from ``hyperedge_index``.

        Returns:
            Node embeddings of shape ``(num_local_nodes, embedding_dim)``.
        """
        return self.loss(
            hyperedge_index=hyperedge_index,
            node_ids=node_ids,
            num_hyperedges=num_hyperedges,
        )

    def loss(
        self,
        hyperedge_index: Tensor,
        node_ids: Tensor | None = None,
        num_hyperedges: int | None = None,
    ) -> tuple[Tensor, VilLainLossParts]:
        """
        Compute the self-supervised VilLain objective.

        Args:
            hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
            node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
                Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table.
                This is needed as the model keeps an internal embedding table with a row for every node in the global node space.
            num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges.
                If not provided, the hyperedge count is inferred from ``hyperedge_index``.

        Returns:
            A tuple ``(total_loss, loss_parts)`` where ``loss_parts`` contains ``local_loss`` and ``global_loss`` scalar tensors.
        """
        node_embeddings = self.__get_initial_virtual_node_features(node_ids=node_ids)
        actual_num_hyperedges = self.__num_hyperedges(hyperedge_index, num_hyperedges)

        local_loss = node_embeddings.new_zeros(size=())
        global_loss = node_embeddings.new_zeros(size=())
        for _ in range(self.training_steps):
            node_embeddings, hyperedge_embeddings = self.__message_passing(
                x=node_embeddings,
                hyperedge_index=hyperedge_index,
                num_hyperedges=actual_num_hyperedges,
            )
            local_loss = local_loss + self.loss_fn.local_loss(node_embeddings, hyperedge_embeddings)
            global_loss = global_loss + self.loss_fn.global_loss(
                node_embeddings, hyperedge_embeddings
            )

        return self.loss_fn.total_loss(local_loss, global_loss), {
            "local_loss": local_loss,
            "global_loss": global_loss,
        }

    def hyperedge_embeddings(
        self,
        hyperedge_index: Tensor,
        node_ids: Tensor | None = None,
        num_hyperedges: int | None = None,
    ) -> Tensor:
        """
        Generate hyperedge embeddings by averaging propagated hyperedge states.
        Every generation step computes hyperedge states from the current node states, then updates node states for the next step.

        Args:
            hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
            node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
                Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table.
                This is needed as the model keeps an internal embedding table with a row for every node in the global node space.
            num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges.
                If not provided, the hyperedge count is inferred from ``hyperedge_index``.

        Returns:
            Hyperedge embeddings of shape ``(num_hyperedges, embedding_dim)``.
        """
        return self.__embeddings(
            hyperedge_index=hyperedge_index,
            node_ids=node_ids,
            num_hyperedges=num_hyperedges,
            mode="hyperedge",
        )

    def node_embeddings(
        self,
        hyperedge_index: Tensor,
        node_ids: Tensor | None = None,
        num_hyperedges: int | None = None,
    ) -> Tensor:
        """
        Generate node embeddings by averaging propagated node states.

        Args:
            hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
            node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
                Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table.
                This is needed as the model keeps an internal embedding table with a row for every node in the global node space.
            num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges.
                If not provided, the hyperedge count is inferred from ``hyperedge_index``.

        Returns:
            Node embeddings of shape ``(num_local_nodes, embedding_dim)``.
        """
        return self.__embeddings(
            hyperedge_index=hyperedge_index,
            node_ids=node_ids,
            num_hyperedges=num_hyperedges,
            mode="node",
        )

    def reset_parameters(self) -> None:
        """Initialize trainable virtual-label logits near zero."""
        nn.init.normal_(self.node_embedding, mean=0.0, std=0.1)

    def __embeddings(
        self,
        hyperedge_index: Tensor,
        node_ids: Tensor | None,
        num_hyperedges: int | None,
        mode: Literal["node", "hyperedge"] = "node",
    ) -> Tensor:
        """
        Generate final node or hyperedge embeddings for inference.

        Args:
            hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
            node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
            num_hyperedges: Optional explicit hyperedge count to preserve empty hyperedges during propagation.
            mode: Selects whether to accumulate propagated node states or hyperedge states.

        Returns:
            Averaged embeddings truncated to ``embedding_dim``.
        """
        with torch.no_grad():
            x = self.__get_initial_virtual_node_features(node_ids=node_ids)
            actual_num_hyperedges = self.__num_hyperedges(hyperedge_index, num_hyperedges)

            final_embeddings_size = (
                (x.size(0), self.raw_embedding_dim)
                if mode == "node"
                else (actual_num_hyperedges, self.raw_embedding_dim)
            )
            final_embeddings = x.new_zeros(size=final_embeddings_size)
            for _ in range(self.generation_steps):
                x, hyperedge_embeddings = self.__message_passing(
                    x=x,
                    hyperedge_index=hyperedge_index,
                    num_hyperedges=actual_num_hyperedges,
                )

                # Suppose generation_steps = 100.
                # Average 100 propagated embeddings for each node/hyperedge to get more stable final embeddings.
                # Sum here and divide by generation_steps later to avoid storing all 100 embeddings in memory at once.
                final_embeddings = final_embeddings + (
                    x if mode == "node" else hyperedge_embeddings
                )
            final_embeddings = final_embeddings / self.generation_steps

            # Example: final_embeddings.shape = (num_nodes/num_hyperedges, 8) with raw_embedding_dim=8
            #          -> returned shape = (num_nodes/num_hyperedges, 4) with embedding_dim=4
            #             as it takes the first 4 channels of the raw embedding as the final embedding.
            return final_embeddings[:, : self.embedding_dim]

    def __get_initial_virtual_node_features(self, node_ids: Tensor | None = None) -> Tensor:
        """
        Convert trainable node logits into flattened virtual-label probabilities.

        Args:
            node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
                If ``None``, all node rows are used.

        Returns:
            A tensor of shape ``(num_selected_nodes, raw_embedding_dim)``.
        """
        logits = self.node_embedding if node_ids is None else self.node_embedding[node_ids]

        # Split flat logits into independent virtual-label subspaces.
        # Example: with raw_embedding_dim=8, num_subspaces=4, labels_per_subspace=2:
        #          logits.shape = (num_nodes, 8)
        #          -> viewed_logits shape = (num_nodes, 4, 2)
        #          viewed_logits[0] = [[l00, l01],  # node 0, subspace 0
        #                              [l02, l03],  # node 0, subspace 1
        #                              [l04, l05],  # node 0, subspace 2
        #                              [l06, l07]]  # node 0, subspace 3
        viewed_logits = logits.view(-1, self.num_subspaces, self.labels_per_subspace)

        # Convert each subspace's logits into a differentiable virtual-label assignment.
        # Example: viewed_logits[0, 0] = [0.03, -0.02]
        #          -> probs[0, 0] might be [0.47, 0.53] with tau=1.0
        #          probs.shape remains (num_nodes, 4, 2).
        probs = F.gumbel_softmax(viewed_logits, tau=self.tau, dim=2, hard=False)

        # Flatten subspaces back into a standard node-by-channel node feature matrix.
        # The aggregators expect matrices shaped (num_nodes, num_channels==raw_embedding_dim),
        # so propagation happens on the flattened channel dimension.
        # Example: probs.shape = (num_nodes, 4, 2) -> shape = (num_nodes, 8)
        return probs.reshape(-1, self.raw_embedding_dim)

    def __message_passing(
        self,
        x: Tensor,
        hyperedge_index: Tensor,
        num_hyperedges: int,
    ) -> tuple[Tensor, Tensor]:
        """
        One round of message passing, where nodes send messages to hyperedges and then hyperedges send messages back to nodes.

        Args:
            x: Virtual node features of shape (num_nodes, raw_embedding_dim).
            hyperedge_index: Hyperedge index tensor of shape (2, num_edges).
            num_hyperedges: Total number of hyperedges.

        Returns:
            The updated node and hyperedge embeddings after one round of message passing.
        """
        hyperedge_embeddings = HyperedgeAggregator(
            hyperedge_index=hyperedge_index,
            node_embeddings=x,
            num_hyperedges=num_hyperedges,
        ).pool("mean")

        node_embeddings = NodeAggregator(
            hyperedge_index=hyperedge_index,
            hyperedge_embeddings=hyperedge_embeddings,
            num_nodes=x.size(0),
        ).pool("mean")

        return node_embeddings, hyperedge_embeddings

    def __num_hyperedges(
        self,
        hyperedge_index: Tensor,
        num_hyperedges: int | None,
    ) -> int:
        """
        Return the explicit hyperedge count or infer it from the ``hyperedge_index``, if not provided.
        Explicit counts are required when empty hyperedges must remain in the hypergraph.
        """
        if num_hyperedges is not None:
            return num_hyperedges
        return HyperedgeIndex(hyperedge_index).num_hyperedges

    def __validate_args(
        self,
        num_nodes: int,
        embedding_dim: int,
        labels_per_subspace: int,
        training_steps: int,
        generation_steps: int,
        tau: float,
        eps: float,
    ) -> None:
        if num_nodes < 1:
            raise ValueError("num_nodes must be positive.")
        if embedding_dim < 1:
            raise ValueError("embedding_dim must be positive.")
        if labels_per_subspace < 2:
            raise ValueError("labels_per_subspace must be at least 2.")
        if training_steps < 1:
            raise ValueError("training_steps must be positive.")
        if generation_steps < 1:
            raise ValueError("generation_steps must be positive.")
        if tau <= 0:
            raise ValueError("tau must be positive.")
        if eps <= 0:
            raise ValueError("eps must be positive.")

forward(hyperedge_index, node_ids=None, num_hyperedges=None)

Compute the self-supervised VilLain objective. Use hyperedge_embeddings or node_embeddings to generate final embeddings for inference after training.

Parameters:

Name Type Description Default
hyperedge_index Tensor

Incidence tensor of shape (2, num_incidences).

required
node_ids Tensor | None

Optional global node ids matching local node ids the embedding table in the transductive setting. Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table. This is needed as the model keeps an internal embedding table with a row for every node in the global node space.

None
num_hyperedges int | None

Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges. If not provided, the hyperedge count is inferred from hyperedge_index.

None

Returns:

Type Description
tuple[Tensor, VilLainLossParts]

Node embeddings of shape (num_local_nodes, embedding_dim).

Source code in hyperbench/models/villain.py
def forward(
    self,
    hyperedge_index: Tensor,
    node_ids: Tensor | None = None,
    num_hyperedges: int | None = None,
) -> tuple[Tensor, VilLainLossParts]:
    """
    Compute the self-supervised VilLain objective.
    Use ``hyperedge_embeddings`` or ``node_embeddings`` to generate final embeddings for inference after training.

    Args:
        hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
        node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
            Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table.
            This is needed as the model keeps an internal embedding table with a row for every node in the global node space.
        num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges.
            If not provided, the hyperedge count is inferred from ``hyperedge_index``.

    Returns:
        Node embeddings of shape ``(num_local_nodes, embedding_dim)``.
    """
    return self.loss(
        hyperedge_index=hyperedge_index,
        node_ids=node_ids,
        num_hyperedges=num_hyperedges,
    )

loss(hyperedge_index, node_ids=None, num_hyperedges=None)

Compute the self-supervised VilLain objective.

Parameters:

Name Type Description Default
hyperedge_index Tensor

Incidence tensor of shape (2, num_incidences).

required
node_ids Tensor | None

Optional global node ids matching local node ids the embedding table in the transductive setting. Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table. This is needed as the model keeps an internal embedding table with a row for every node in the global node space.

None
num_hyperedges int | None

Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges. If not provided, the hyperedge count is inferred from hyperedge_index.

None

Returns:

Type Description
tuple[Tensor, VilLainLossParts]

A tuple (total_loss, loss_parts) where loss_parts contains local_loss and global_loss scalar tensors.

Source code in hyperbench/models/villain.py
def loss(
    self,
    hyperedge_index: Tensor,
    node_ids: Tensor | None = None,
    num_hyperedges: int | None = None,
) -> tuple[Tensor, VilLainLossParts]:
    """
    Compute the self-supervised VilLain objective.

    Args:
        hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
        node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
            Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table.
            This is needed as the model keeps an internal embedding table with a row for every node in the global node space.
        num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges.
            If not provided, the hyperedge count is inferred from ``hyperedge_index``.

    Returns:
        A tuple ``(total_loss, loss_parts)`` where ``loss_parts`` contains ``local_loss`` and ``global_loss`` scalar tensors.
    """
    node_embeddings = self.__get_initial_virtual_node_features(node_ids=node_ids)
    actual_num_hyperedges = self.__num_hyperedges(hyperedge_index, num_hyperedges)

    local_loss = node_embeddings.new_zeros(size=())
    global_loss = node_embeddings.new_zeros(size=())
    for _ in range(self.training_steps):
        node_embeddings, hyperedge_embeddings = self.__message_passing(
            x=node_embeddings,
            hyperedge_index=hyperedge_index,
            num_hyperedges=actual_num_hyperedges,
        )
        local_loss = local_loss + self.loss_fn.local_loss(node_embeddings, hyperedge_embeddings)
        global_loss = global_loss + self.loss_fn.global_loss(
            node_embeddings, hyperedge_embeddings
        )

    return self.loss_fn.total_loss(local_loss, global_loss), {
        "local_loss": local_loss,
        "global_loss": global_loss,
    }

hyperedge_embeddings(hyperedge_index, node_ids=None, num_hyperedges=None)

Generate hyperedge embeddings by averaging propagated hyperedge states. Every generation step computes hyperedge states from the current node states, then updates node states for the next step.

Parameters:

Name Type Description Default
hyperedge_index Tensor

Incidence tensor of shape (2, num_incidences).

required
node_ids Tensor | None

Optional global node ids matching local node ids the embedding table in the transductive setting. Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table. This is needed as the model keeps an internal embedding table with a row for every node in the global node space.

None
num_hyperedges int | None

Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges. If not provided, the hyperedge count is inferred from hyperedge_index.

None

Returns:

Type Description
Tensor

Hyperedge embeddings of shape (num_hyperedges, embedding_dim).

Source code in hyperbench/models/villain.py
def hyperedge_embeddings(
    self,
    hyperedge_index: Tensor,
    node_ids: Tensor | None = None,
    num_hyperedges: int | None = None,
) -> Tensor:
    """
    Generate hyperedge embeddings by averaging propagated hyperedge states.
    Every generation step computes hyperedge states from the current node states, then updates node states for the next step.

    Args:
        hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
        node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
            Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table.
            This is needed as the model keeps an internal embedding table with a row for every node in the global node space.
        num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges.
            If not provided, the hyperedge count is inferred from ``hyperedge_index``.

    Returns:
        Hyperedge embeddings of shape ``(num_hyperedges, embedding_dim)``.
    """
    return self.__embeddings(
        hyperedge_index=hyperedge_index,
        node_ids=node_ids,
        num_hyperedges=num_hyperedges,
        mode="hyperedge",
    )

node_embeddings(hyperedge_index, node_ids=None, num_hyperedges=None)

Generate node embeddings by averaging propagated node states.

Parameters:

Name Type Description Default
hyperedge_index Tensor

Incidence tensor of shape (2, num_incidences).

required
node_ids Tensor | None

Optional global node ids matching local node ids the embedding table in the transductive setting. Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table. This is needed as the model keeps an internal embedding table with a row for every node in the global node space.

None
num_hyperedges int | None

Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges. If not provided, the hyperedge count is inferred from hyperedge_index.

None

Returns:

Type Description
Tensor

Node embeddings of shape (num_local_nodes, embedding_dim).

Source code in hyperbench/models/villain.py
def node_embeddings(
    self,
    hyperedge_index: Tensor,
    node_ids: Tensor | None = None,
    num_hyperedges: int | None = None,
) -> Tensor:
    """
    Generate node embeddings by averaging propagated node states.

    Args:
        hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
        node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
            Use this when a batch has rebased local node ids but the learned logits live in the full transductive node table.
            This is needed as the model keeps an internal embedding table with a row for every node in the global node space.
        num_hyperedges: Optional explicit hyperedge count used during node-to-hyperedge pooling to preserve empty hyperedges.
            If not provided, the hyperedge count is inferred from ``hyperedge_index``.

    Returns:
        Node embeddings of shape ``(num_local_nodes, embedding_dim)``.
    """
    return self.__embeddings(
        hyperedge_index=hyperedge_index,
        node_ids=node_ids,
        num_hyperedges=num_hyperedges,
        mode="node",
    )

reset_parameters()

Initialize trainable virtual-label logits near zero.

Source code in hyperbench/models/villain.py
def reset_parameters(self) -> None:
    """Initialize trainable virtual-label logits near zero."""
    nn.init.normal_(self.node_embedding, mean=0.0, std=0.1)

__embeddings(hyperedge_index, node_ids, num_hyperedges, mode='node')

Generate final node or hyperedge embeddings for inference.

Parameters:

Name Type Description Default
hyperedge_index Tensor

Incidence tensor of shape (2, num_incidences).

required
node_ids Tensor | None

Optional global node ids matching local node ids the embedding table in the transductive setting.

required
num_hyperedges int | None

Optional explicit hyperedge count to preserve empty hyperedges during propagation.

required
mode Literal['node', 'hyperedge']

Selects whether to accumulate propagated node states or hyperedge states.

'node'

Returns:

Type Description
Tensor

Averaged embeddings truncated to embedding_dim.

Source code in hyperbench/models/villain.py
def __embeddings(
    self,
    hyperedge_index: Tensor,
    node_ids: Tensor | None,
    num_hyperedges: int | None,
    mode: Literal["node", "hyperedge"] = "node",
) -> Tensor:
    """
    Generate final node or hyperedge embeddings for inference.

    Args:
        hyperedge_index: Incidence tensor of shape ``(2, num_incidences)``.
        node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
        num_hyperedges: Optional explicit hyperedge count to preserve empty hyperedges during propagation.
        mode: Selects whether to accumulate propagated node states or hyperedge states.

    Returns:
        Averaged embeddings truncated to ``embedding_dim``.
    """
    with torch.no_grad():
        x = self.__get_initial_virtual_node_features(node_ids=node_ids)
        actual_num_hyperedges = self.__num_hyperedges(hyperedge_index, num_hyperedges)

        final_embeddings_size = (
            (x.size(0), self.raw_embedding_dim)
            if mode == "node"
            else (actual_num_hyperedges, self.raw_embedding_dim)
        )
        final_embeddings = x.new_zeros(size=final_embeddings_size)
        for _ in range(self.generation_steps):
            x, hyperedge_embeddings = self.__message_passing(
                x=x,
                hyperedge_index=hyperedge_index,
                num_hyperedges=actual_num_hyperedges,
            )

            # Suppose generation_steps = 100.
            # Average 100 propagated embeddings for each node/hyperedge to get more stable final embeddings.
            # Sum here and divide by generation_steps later to avoid storing all 100 embeddings in memory at once.
            final_embeddings = final_embeddings + (
                x if mode == "node" else hyperedge_embeddings
            )
        final_embeddings = final_embeddings / self.generation_steps

        # Example: final_embeddings.shape = (num_nodes/num_hyperedges, 8) with raw_embedding_dim=8
        #          -> returned shape = (num_nodes/num_hyperedges, 4) with embedding_dim=4
        #             as it takes the first 4 channels of the raw embedding as the final embedding.
        return final_embeddings[:, : self.embedding_dim]

__get_initial_virtual_node_features(node_ids=None)

Convert trainable node logits into flattened virtual-label probabilities.

Parameters:

Name Type Description Default
node_ids Tensor | None

Optional global node ids matching local node ids the embedding table in the transductive setting. If None, all node rows are used.

None

Returns:

Type Description
Tensor

A tensor of shape (num_selected_nodes, raw_embedding_dim).

Source code in hyperbench/models/villain.py
def __get_initial_virtual_node_features(self, node_ids: Tensor | None = None) -> Tensor:
    """
    Convert trainable node logits into flattened virtual-label probabilities.

    Args:
        node_ids: Optional global node ids matching local node ids the embedding table in the transductive setting.
            If ``None``, all node rows are used.

    Returns:
        A tensor of shape ``(num_selected_nodes, raw_embedding_dim)``.
    """
    logits = self.node_embedding if node_ids is None else self.node_embedding[node_ids]

    # Split flat logits into independent virtual-label subspaces.
    # Example: with raw_embedding_dim=8, num_subspaces=4, labels_per_subspace=2:
    #          logits.shape = (num_nodes, 8)
    #          -> viewed_logits shape = (num_nodes, 4, 2)
    #          viewed_logits[0] = [[l00, l01],  # node 0, subspace 0
    #                              [l02, l03],  # node 0, subspace 1
    #                              [l04, l05],  # node 0, subspace 2
    #                              [l06, l07]]  # node 0, subspace 3
    viewed_logits = logits.view(-1, self.num_subspaces, self.labels_per_subspace)

    # Convert each subspace's logits into a differentiable virtual-label assignment.
    # Example: viewed_logits[0, 0] = [0.03, -0.02]
    #          -> probs[0, 0] might be [0.47, 0.53] with tau=1.0
    #          probs.shape remains (num_nodes, 4, 2).
    probs = F.gumbel_softmax(viewed_logits, tau=self.tau, dim=2, hard=False)

    # Flatten subspaces back into a standard node-by-channel node feature matrix.
    # The aggregators expect matrices shaped (num_nodes, num_channels==raw_embedding_dim),
    # so propagation happens on the flattened channel dimension.
    # Example: probs.shape = (num_nodes, 4, 2) -> shape = (num_nodes, 8)
    return probs.reshape(-1, self.raw_embedding_dim)

__message_passing(x, hyperedge_index, num_hyperedges)

One round of message passing, where nodes send messages to hyperedges and then hyperedges send messages back to nodes.

Parameters:

Name Type Description Default
x Tensor

Virtual node features of shape (num_nodes, raw_embedding_dim).

required
hyperedge_index Tensor

Hyperedge index tensor of shape (2, num_edges).

required
num_hyperedges int

Total number of hyperedges.

required

Returns:

Type Description
tuple[Tensor, Tensor]

The updated node and hyperedge embeddings after one round of message passing.

Source code in hyperbench/models/villain.py
def __message_passing(
    self,
    x: Tensor,
    hyperedge_index: Tensor,
    num_hyperedges: int,
) -> tuple[Tensor, Tensor]:
    """
    One round of message passing, where nodes send messages to hyperedges and then hyperedges send messages back to nodes.

    Args:
        x: Virtual node features of shape (num_nodes, raw_embedding_dim).
        hyperedge_index: Hyperedge index tensor of shape (2, num_edges).
        num_hyperedges: Total number of hyperedges.

    Returns:
        The updated node and hyperedge embeddings after one round of message passing.
    """
    hyperedge_embeddings = HyperedgeAggregator(
        hyperedge_index=hyperedge_index,
        node_embeddings=x,
        num_hyperedges=num_hyperedges,
    ).pool("mean")

    node_embeddings = NodeAggregator(
        hyperedge_index=hyperedge_index,
        hyperedge_embeddings=hyperedge_embeddings,
        num_nodes=x.size(0),
    ).pool("mean")

    return node_embeddings, hyperedge_embeddings

__num_hyperedges(hyperedge_index, num_hyperedges)

Return the explicit hyperedge count or infer it from the hyperedge_index, if not provided. Explicit counts are required when empty hyperedges must remain in the hypergraph.

Source code in hyperbench/models/villain.py
def __num_hyperedges(
    self,
    hyperedge_index: Tensor,
    num_hyperedges: int | None,
) -> int:
    """
    Return the explicit hyperedge count or infer it from the ``hyperedge_index``, if not provided.
    Explicit counts are required when empty hyperedges must remain in the hypergraph.
    """
    if num_hyperedges is not None:
        return num_hyperedges
    return HyperedgeIndex(hyperedge_index).num_hyperedges