Skip to content

Classification Heads

dnallm.models.head

Classes

BasicCNNHead

BasicCNNHead(
    input_dim,
    num_classes,
    task_type="binary",
    num_filters=128,
    kernel_sizes=None,
    dropout=0.2,
    **kwargs,
)

Bases: Module

A CNN-based head for processing Transformer output sequences. This head applies multiple 1D convolutional layers with different kernel sizes to capture local patterns in the sequence data, followed by a fully connected layer for classification or regression tasks.

Parameters:

Name Type Description Default
input_dim int

Dimension of the input features

required
num_classes int

Number of output classes (for classification tasks)

required
task_type str

Type of task - 'binary', 'multiclass', 'multilabel', or 'regression'

'binary'
num_filters int

Number of filters for each convolutional layer

128
kernel_sizes list | None

List of kernel sizes for the convolutional layers

None
dropout float

Dropout probability

0.2
Source code in dnallm/models/head.py
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def __init__(
    self,
    input_dim: int,
    num_classes: int,
    task_type: str = "binary",
    num_filters: int = 128,
    kernel_sizes: list | None = None,
    dropout: float = 0.2,
    **kwargs: Any,
):
    super().__init__()
    self.task_type = task_type
    self.num_classes = num_classes
    if kernel_sizes is None:
        kernel_sizes = [3, 4, 5]

    # Define multiple parallel 1D convolutional layers
    self.convs = nn.ModuleList([
        nn.Conv1d(
            in_channels=input_dim, out_channels=num_filters, kernel_size=k
        )
        for k in kernel_sizes
    ])

    self.dropout = nn.Dropout(dropout)

    # CNN feature dimension is the concatenation of all conv outputs
    cnn_output_dim = num_filters * len(kernel_sizes)

    # Define the final output layer
    self.output_layer = nn.Linear(cnn_output_dim, num_classes)

BasicLSTMHead

BasicLSTMHead(
    input_dim,
    num_classes,
    task_type="binary",
    hidden_size=256,
    num_layers=1,
    dropout=0.1,
    bidirectional=True,
    **kwargs,
)

Bases: Module

A LSTM-based head for processing Transformer output sequences. This head applies a multi-layer LSTM to capture sequential dependencies in the sequence data, followed by a fully connected layer for classification or regression tasks.

Parameters:

Name Type Description Default
input_dim int

Dimension of the input features

required
num_classes int

Number of output classes (for classification tasks)

required
task_type str

Type of task - 'binary', 'multiclass', 'multilabel', or 'regression'

'binary'
hidden_size int

Number of features in the hidden state of the LSTM

256
num_layers int

Number of recurrent layers in the LSTM

1
dropout float

Dropout probability between LSTM layers

0.1
bidirectional bool

Whether to use a bidirectional LSTM

True
Source code in dnallm/models/head.py
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
def __init__(
    self,
    input_dim: int,
    num_classes: int,
    task_type: str = "binary",
    hidden_size: int = 256,
    num_layers: int = 1,
    dropout: float = 0.1,
    bidirectional: bool = True,
    **kwargs: Any,
):
    super().__init__()
    self.task_type = task_type
    self.num_classes = num_classes

    # Define the LSTM layer
    self.lstm = nn.LSTM(
        input_size=input_dim,
        hidden_size=hidden_size,
        num_layers=num_layers,
        bidirectional=bidirectional,
        dropout=dropout if num_layers > 1 else 0,
        batch_first=True,  # Accepts (batch, seq, feature) shaped inputs
    )

    # LSTM output feature dimension
    lstm_output_dim = hidden_size * 2 if bidirectional else hidden_size

    # Define the final output layer
    self.output_layer = nn.Linear(lstm_output_dim, num_classes)

BasicMLPHead

BasicMLPHead(
    input_dim,
    num_classes=2,
    task_type="binary",
    hidden_dims=None,
    activation_fn="relu",
    use_normalization=True,
    norm_type="layernorm",
    dropout=0.1,
    **kwargs,
)

Bases: Module

A universal and customizable MLP model designed to be appended after the embedding output of models like Transformers to perform various downstream tasks such as classification and regression.

Parameters:

Name Type Description Default
input_dim int

Dimension of the input features

required
num_classes int

Number of output classes (for classification tasks)

2
task_type str

Type of task - 'binary', 'multiclass', 'multilabel', or 'regression'

'binary'
hidden_dims list | None

List of hidden layer dimensions

None
activation_fn str

Activation function to use ('relu', 'gelu', 'silu', 'tanh', 'sigmoid')

'relu'
use_normalization bool

Whether to use normalization layers

True
norm_type str

Type of normalization - 'batchnorm' or 'layernorm'

'layernorm'
dropout float

Dropout probability

0.1
Source code in dnallm/models/head.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def __init__(
    self,
    input_dim: int,
    num_classes: int = 2,
    task_type: str = "binary",
    hidden_dims: list | None = None,
    activation_fn: str = "relu",
    use_normalization: bool = True,
    norm_type: str = "layernorm",
    dropout: float = 0.1,
    **kwargs: Any,
):
    super().__init__()
    if hidden_dims is None:
        hidden_dims = [512]
    if task_type not in [
        "binary",
        "multiclass",
        "multilabel",
        "regression",
    ]:
        raise ValueError(f"Unsupported task_type: {task_type}")
    if norm_type not in ["batchnorm", "layernorm"]:
        raise ValueError(f"Unsupported norm_type: {norm_type}")
    self.input_dim = input_dim
    self.num_classes = num_classes
    self.task_type = task_type
    activations = {
        "relu": nn.ReLU(),
        "gelu": nn.GELU(),
        "silu": nn.SiLU(),
        "tanh": nn.Tanh(),
        "sigmoid": nn.Sigmoid(),
    }
    activation_layer = activations.get(activation_fn.lower())
    if activation_layer is None:
        raise ValueError(f"Unsupported activation_fn: {activation_fn}")
    layers = []
    current_dim = input_dim
    for i, h_dim in enumerate(hidden_dims):
        layers.append((f"linear_{i}", nn.Linear(current_dim, h_dim)))
        if use_normalization:
            layers.append((
                f"norm_{i}",
                nn.LayerNorm(h_dim)
                if norm_type == "layernorm"
                else nn.BatchNorm1d(h_dim),
            ))
        layers.append((f"activation_{i}", activation_layer))
        layers.append((f"dropout_{i}", nn.Dropout(p=dropout)))
        current_dim = h_dim
    self.mlp = nn.Sequential(OrderedDict(layers))
    self.output_layer = nn.Linear(current_dim, num_classes)

BasicUNet1DHead

BasicUNet1DHead(
    input_dim,
    num_classes,
    task_type="binary",
    num_layers=2,
    initial_filters=64,
    **kwargs,
)

Bases: Module

An U-net architecture adapted for 1D sequence data, suitable for classification and regression tasks. This model consists of an encoder-decoder structure with skip connections, allowing it to capture both local and global features in the inputs.

Parameters:

Name Type Description Default
input_dim int

The number of input features (channels) in the inputs.

required
num_classes int

The number of output classes for the classification task.

required
task_type str

The type of task (e.g., "binary" or "multi-class").

'binary'
num_layers int

The number of downsampling/upsampling layers in the U-net.

2
initial_filters int

The number of filters in the first convolutional layer.

64
Source code in dnallm/models/head.py
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
def __init__(
    self,
    input_dim: int,
    num_classes: int,
    task_type: str = "binary",
    num_layers: int = 2,
    initial_filters: int = 64,
    **kwargs: Any,
):
    super().__init__()
    self.task_type = task_type
    self.num_classes = num_classes
    if initial_filters is None or initial_filters <= 0:
        initial_filters = input_dim

    self.downs = nn.ModuleList()
    self.ups = nn.ModuleList()

    # --- Encoder (downsampling path) ---
    in_c = input_dim
    out_c = initial_filters
    for _ in range(num_layers):
        self.downs.append(DoubleConv(in_c, out_c))
        in_c = out_c
        out_c *= 2

    # --- Bottleneck ---
    self.bottleneck = DoubleConv(in_c, out_c)

    # --- Decoder (upsampling path) ---
    in_c = out_c
    out_c //= 2
    for _ in range(num_layers):
        self.ups.append(
            nn.ConvTranspose1d(in_c, out_c, kernel_size=2, stride=2)
        )
        self.ups.append(DoubleConv(in_c, out_c))
        in_c = out_c
        out_c //= 2

    # --- Final output layer ---
    # After U-Net processing,
    # the number of channels becomes initial_filters
    # We perform average pooling on the enhanced sequence
    # and then pass it to the linear layer
    self.output_layer = nn.Linear(initial_filters, num_classes)

DoubleConv

DoubleConv(in_channels, out_channels)

Bases: Module

(Convolution => [BatchNorm] => ReLU) * 2

Source code in dnallm/models/head.py
254
255
256
257
258
259
260
261
262
263
def __init__(self, in_channels, out_channels):
    super().__init__()
    self.double_conv = nn.Sequential(
        nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1),
        nn.BatchNorm1d(out_channels),
        nn.ReLU(inplace=True),
        nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1),
        nn.BatchNorm1d(out_channels),
        nn.ReLU(inplace=True),
    )

EVOForSeqClsHead

EVOForSeqClsHead(
    base_model,
    num_classes=2,
    task_type="binary",
    target_layer=None,
    pooling_method="mean",
    dropout_prob=0.1,
    **kwargs,
)

Bases: Module

A classification head tailored for the embedding outputs of the EVO-series model.

Parameters:

Name Type Description Default
base_model any

The EVO model instance providing embeddings.

required
num_classes int

Number of output classes for classification.

2
task_type str

Type of task - 'binary', 'multiclass', 'multilabel', or 'regression'.

'binary'
target_layer str | list[str] | None

Specific layer(s) from which to extract embeddings. Can be 'all' to average all layers, a list of layer names, or a single layer name.

None
pooling_method str

Method to pool sequence embeddings.

'mean'
dropout_prob float

Dropout probability for regularization

0.1
Source code in dnallm/models/head.py
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
def __init__(
    self,
    base_model: any,
    num_classes: int = 2,
    task_type: str = "binary",
    target_layer: str | list[str] | None = None,
    pooling_method: str = "mean",
    dropout_prob: float = 0.1,
    **kwargs: Any,
):
    super().__init__()
    self.num_classes = num_classes
    self.task_type = task_type
    self.pooling_method = pooling_method

    if target_layer == "all" or target_layer is None:
        self.target_layers = []
        for name, _ in base_model.model.named_parameters():
            if name.startswith("blocks"):
                layer = "blocks." + name.split(".")[1]
                if layer not in self.target_layers:
                    self.target_layers.append(layer)
        if target_layer is None:
            # Find middle layer which performs better than
            # the last layer
            mid_layer = round(len(self.target_layers) * 26 / 32)
            self.target_layers = [self.target_layers[mid_layer]]
            self.use_layer_averaging = False
        else:
            self.use_layer_averaging = True

    elif isinstance(target_layer, list):
        self.target_layers = target_layer
        self.use_layer_averaging = True

    else:
        self.target_layers = [target_layer]
        self.use_layer_averaging = False

    if target_layer != "all":
        print(f"Use layers: {self.target_layers} embeddings.")

    self.dropout = nn.Dropout(dropout_prob)
    self.classifier = nn.Linear(base_model.config.hidden_size, num_classes)

MegaDNAMultiScaleHead

MegaDNAMultiScaleHead(
    embedding_dims=None,
    num_classes=2,
    task_type="binary",
    hidden_dims=None,
    dropout=0.2,
    **kwargs,
)

Bases: Module

A classification head tailored for the multi-scale embedding outputs of the MegaDNA model. It takes a list of embedding tensors, pools each tensor, and concatenates the results before passing them to an MLP for classification.

Parameters:

Name Type Description Default
embedding_dims list | None

A list of integers representing the dimensions of the input embeddings.

None
num_classes int

The number of output classes for classification.

2
task_type str

The type of task (e.g., "binary" or "multi-class").

'binary'
hidden_dims list | None

A list of integers representing the sizes of hidden layers in the MLP.

None
dropout float

Dropout probability for regularization.

0.2
Source code in dnallm/models/head.py
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
def __init__(
    self,
    embedding_dims: list | None = None,
    num_classes: int = 2,
    task_type: str = "binary",
    hidden_dims: list | None = None,
    dropout: float = 0.2,
    **kwargs: Any,
):
    super().__init__()
    self.embedding_dims = embedding_dims
    self.num_classes = num_classes
    self.task_type = task_type
    if hidden_dims is None:
        hidden_dims = [256]

    # Check that embedding_dims has exactly 3 elements
    if len(embedding_dims) != 3:
        raise ValueError(
            "embedding_dims list must contain 3 integers, "
            "corresponding to the outputs of 3 scales."
        )

    concatenated_dim = sum(embedding_dims)

    # --- Create MLP layers ---
    mlp_layers = []
    current_dim = concatenated_dim
    for i, h_dim in enumerate(hidden_dims):
        mlp_layers.append((f"linear_{i}", nn.Linear(current_dim, h_dim)))
        mlp_layers.append((f"norm_{i}", nn.LayerNorm(h_dim)))
        mlp_layers.append((f"activation_{i}", nn.GELU()))
        mlp_layers.append((f"dropout_{i}", nn.Dropout(p=dropout)))
        current_dim = h_dim

    self.mlp = nn.Sequential(OrderedDict(mlp_layers))

    # --- Final output layer ---
    self.output_layer = nn.Linear(current_dim, num_classes)