Skip to content

inference/plot API

dnallm.inference.plot

DNA Language Model Visualization and Plotting Module.

This module provides comprehensive plotting capabilities for DNA language model results, including metrics visualization, attention maps, embeddings, and mutation effects analysis.

Functions

plot_attention_map

plot_attention_map(
    attentions,
    sequences,
    tokenizer,
    seq_idx=0,
    layer=-1,
    head=-1,
    width=800,
    height=800,
    save_path=None,
)

Plot attention map visualization for transformer models.

This function creates a heatmap visualization of attention weights between tokens in a sequence, showing how the model attends to different parts of the input.

Args:
            attentions: Tuple or
        list containing attention weights from model layers
    sequences: List of input sequences
    tokenizer: Tokenizer object for converting tokens to readable text
    seq_idx: Index of the sequence to plot, default 0
    layer: Layer index to visualize, default -1 (last layer)
            attention_head: Attention head index to visualize,
        default -1 (last head)
    width: Width of the plot
    height: Height of the plot
            save_path: Path to save the plot. If None,
        plot will be shown interactively

Returns:
    Altair chart object showing the attention heatmap
Source code in dnallm/inference/plot.py
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
def plot_attention_map(
    attentions: tuple | list,
    sequences: list[str],
    tokenizer,
    seq_idx: int = 0,
    layer: int = -1,
    head: int = -1,
    width: int = 800,
    height: int = 800,
    save_path: str | None = None,
) -> alt.Chart:
    """Plot attention map visualization for transformer models.

    This function creates a heatmap visualization of attention weights between
        tokens
            in a sequence,
            showing how the model attends to different parts of the input.

        Args:
                    attentions: Tuple or
                list containing attention weights from model layers
            sequences: List of input sequences
            tokenizer: Tokenizer object for converting tokens to readable text
            seq_idx: Index of the sequence to plot, default 0
            layer: Layer index to visualize, default -1 (last layer)
                    attention_head: Attention head index to visualize,
                default -1 (last head)
            width: Width of the plot
            height: Height of the plot
                    save_path: Path to save the plot. If None,
                plot will be shown interactively

        Returns:
            Altair chart object showing the attention heatmap
    """
    # More efficient attention data extraction with numpy
    # Original: attn_layer = attentions[layer].numpy()
    attn_layer = np.array(attentions[layer])
    attn_head = attn_layer[seq_idx][head]

    # More efficient token processing with error handling
    # Original: seq = sequences[seq_idx]; tokens_id = tokenizer.encode(seq)
    seq = sequences[seq_idx]
    try:
        tokens_id = tokenizer.encode(seq)
        tokens = tokenizer.convert_ids_to_tokens(tokens_id)
    except (AttributeError, TypeError):
        # Fallback tokenization for different tokenizer types
        tokens = tokenizer.decode(seq).split()

    # Pre-allocate DataFrame data structure for better performance
    # Original: num_tokens = len(tokens); flen = len(str(num_tokens))
    num_tokens = len(tokens)
    flen = len(str(num_tokens))

    # Use list comprehension for more efficient data creation
    # Original: Multiple loops with append operations
    df_data = {
        "token1": [
            f"{str(i).zfill(flen)}{t1}"
            for i, t1 in enumerate(tokens)
            for _ in range(num_tokens)
        ],
        "token2": [
            f"{str(num_tokens - j).zfill(flen)}{t2}"
            for _ in range(num_tokens)
            for j, t2 in enumerate(tokens)
        ],
        "attn": [
            attn_head[i][j]
            for i in range(num_tokens)
            for j in range(num_tokens)
        ],
    }

    # More efficient DataFrame creation
    # Original: source = pd.DataFrame(df)
    source = pd.DataFrame(df_data)

    # Enable VegaFusion for Altair performance
    alt.data_transformers.enable("vegafusion")

    # Create attention map with optimized encoding and axis configuration
    # Original: Multiple axis configurations
    attn_map: alt.Chart = (
        alt.Chart(source)
        .mark_rect()
        .encode(
            x=alt.X(
                "token1:O",
                axis=alt.Axis(
                    labelExpr=f"substring(datum.value, {flen}, 100)",
                    labelAngle=-45,
                ),
            ).title(None),
            y=alt.Y(
                "token2:O",
                axis=alt.Axis(
                    labelExpr=f"substring(datum.value, {flen}, 100)",
                    labelAngle=0,
                ),
            ).title(None),
            color=alt.Color("attn:Q").scale(scheme="viridis"),
        )
        .properties(width=width, height=height)
        .configure_axis(grid=False)
    )

    # Save the plot
    if save_path:
        attn_map.save(save_path)
        print(f"Attention map saved to {save_path}")

    return attn_map

plot_bars

plot_bars(
    data,
    show_score=True,
    ncols=3,
    width=200,
    height=50,
    bar_width=30,
    domain=(0.0, 1.0),
    save_path=None,
    separate=False,
)

Plot bar charts for model metrics comparison.

This function creates bar charts to compare different metrics across multiple models. It supports automatic layout with multiple columns and optional score labels on bars.

Args:
    data: Dictionary containing metrics data with 'models' as the first
        key
    show_score: Whether to show the score values on the bars
    ncols: Number of columns to arrange the plots
    width: Width of each individual plot
    height: Height of each individual plot
    bar_width: Width of the bars in the plot
    domain: Y-axis domain range for the plots, default (0.0, 1.0)
            save_path: Path to save the plot. If None,
        plot will be shown interactively
    separate: Whether to return separate plots for each metric

Returns:
            Altair chart object (combined or
        separate plots based on separate parameter)
Source code in dnallm/inference/plot.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
def plot_bars(
    data: dict,
    show_score: bool = True,
    ncols: int = 3,
    width: int = 200,
    height: int = 50,
    bar_width: int = 30,
    domain: tuple[float, float] | list[float] = (0.0, 1.0),
    save_path: str | None = None,
    separate: bool = False,
) -> alt.Chart | dict[str, alt.Chart]:
    """Plot bar charts for model metrics comparison.

    This function creates bar charts to compare different metrics across
        multiple models. It supports automatic layout with multiple columns and
            optional score labels on bars.

        Args:
            data: Dictionary containing metrics data with 'models' as the first
                key
            show_score: Whether to show the score values on the bars
            ncols: Number of columns to arrange the plots
            width: Width of each individual plot
            height: Height of each individual plot
            bar_width: Width of the bars in the plot
            domain: Y-axis domain range for the plots, default (0.0, 1.0)
                    save_path: Path to save the plot. If None,
                plot will be shown interactively
            separate: Whether to return separate plots for each metric

        Returns:
                    Altair chart object (combined or
                separate plots based on separate parameter)
    """
    # Convert to DataFrame once and cache for better performance
    # Original: dbar = pd.DataFrame(data)
    dbar = pd.DataFrame(data)

    # Pre-allocate plot dictionaries for better memory management
    # Original: pbar = {}; p_separate = {}
    pbar = {}
    p_separate = {}

    # Filter metrics once and use list comprehension for better performance
    # Original: for n, metric in enumerate([x for x in data if x != 'models']):
    metrics_list = [x for x in data if x != "models"]

    for n, metric in enumerate(metrics_list):
        # More efficient domain calculation with numpy
        # Original: if metric in ['mae', 'mse']: domain_use = [0,
        # dbar[metric].max()*1.1]
        if metric in ["mae", "mse"]:
            domain_use = [0, dbar[metric].max() * 1.1]
        else:
            domain_use = domain

        # Create bar chart with optimized encoding
        bar = (
            alt.Chart(dbar)
            .mark_bar(size=bar_width)
            .encode(
                x=alt.X(f"{metric}:Q").scale(domain=domain_use),
                y=alt.Y("models").title(None),
                color=alt.Color("models").legend(None),
            )
            .properties(width=width, height=height * len(dbar["models"]))
        )

        if show_score:
            # Optimized text positioning and formatting
            text = (
                alt.Chart(dbar)
                .mark_text(
                    dx=-10, color="white", baseline="middle", align="right"
                )
                .encode(
                    x=alt.X(f"{metric}:Q"),
                    y=alt.Y("models").title(None),
                    text=alt.Text(metric, format=".3f"),
                )
            )
            p = bar + text
        else:
            p = bar

        if separate:
            p_separate[metric] = p.configure_axis(grid=False)

        # More efficient plot arrangement logic
        idx = n // ncols
        if n % ncols == 0:
            pbar[idx] = p
        else:
            pbar[idx] |= p

    # More efficient plot combination with reduce-like approach
    # Original: Multiple conditional checks and assignments
    pbars: alt.Chart = pbar[0] if pbar else alt.Chart()
    for i in range(1, len(pbar)):
        pbars &= pbar[i]

    # Configure chart once at the end
    pbars = pbars.configure_axis(grid=False)

    # Save the plot
    if save_path:
        pbars.save(save_path)
        print(f"Metrics bar charts saved to {save_path}")

    if separate:
        return p_separate
    else:
        return pbars

plot_curve

plot_curve(
    data,
    show_score=True,
    width=400,
    height=400,
    save_path=None,
    separate=False,
)

Plot ROC and PR curves for classification tasks.

    This function creates ROC (Receiver Operating Characteristic) and
    PR (Precision-Recall)
curves to evaluate model performance on classification tasks.

Args:
    data: Dictionary containing ROC and PR curve data with 'ROC' and
        'PR' keys

show_score: Whether to show the score values on the plot (currently not implemented) width: Width of each plot height: Height of each plot save_path: Path to save the plot. If None, plot will be shown interactively separate: Whether to return separate plots for ROC and PR curves

Returns:
            Altair chart object (combined or
        separate plots based on separate parameter)
Source code in dnallm/inference/plot.py
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
def plot_curve(
    data: dict,
    show_score: bool = True,
    width: int = 400,
    height: int = 400,
    save_path: str | None = None,
    separate: bool = False,
) -> alt.Chart | dict[str, alt.Chart]:
    """Plot ROC and PR curves for classification tasks.

            This function creates ROC (Receiver Operating Characteristic) and
            PR (Precision-Recall)
        curves to evaluate model performance on classification tasks.

        Args:
            data: Dictionary containing ROC and PR curve data with 'ROC' and
                'PR' keys
    show_score: Whether to show the score values on the plot (currently not
            implemented)
            width: Width of each plot
            height: Height of each plot
                    save_path: Path to save the plot. If None,
                plot will be shown interactively
            separate: Whether to return separate plots for ROC and PR curves

        Returns:
                    Altair chart object (combined or
                separate plots based on separate parameter)
    """
    # Pre-allocate plot dictionaries and use more descriptive names
    # Original: pline = {}; p_separate = {}
    pline = {}
    p_separate = {}

    # Create ROC curve with optimized data handling
    # Original: roc_data = pd.DataFrame(data['ROC'])
    roc_data = pd.DataFrame(data["ROC"])
    pline[0] = (
        alt.Chart(roc_data)
        .mark_line()
        .encode(
            x=alt.X("fpr").scale(domain=(0.0, 1.0)),
            y=alt.Y("tpr").scale(domain=(0.0, 1.0)),
            color="models",
        )
        .properties(width=width, height=height)
    )

    if separate:
        p_separate["ROC"] = pline[0]

    # Create PR curve with optimized data handling
    # Original: pr_data = pd.DataFrame(data['PR'])
    pr_data = pd.DataFrame(data["PR"])
    pline[1] = (
        alt.Chart(pr_data)
        .mark_line()
        .encode(
            x=alt.X("recall").scale(domain=(0.0, 1.0)),
            y=alt.Y("precision").scale(domain=(0.0, 1.0)),
            color="models",
        )
        .properties(width=width, height=height)
    )

    if separate:
        p_separate["PR"] = pline[1]

    # More efficient plot combination
    # Original: Multiple conditional checks and assignments
    plines: alt.Chart = pline[0] if pline else alt.Chart()
    for i in range(1, len(pline)):
        plines |= pline[i]

    # Configure chart once at the end
    plines = plines.configure_axis(grid=False)

    # Save the plot
    if save_path:
        plines.save(save_path)
        print(f"ROC curves saved to {save_path}")

    if separate:
        return p_separate
    else:
        return plines

plot_embeddings

plot_embeddings(
    hidden_states,
    attention_mask,
    reducer="t-SNE",
    labels=None,
    label_names=None,
    ncols=4,
    width=300,
    height=300,
    save_path=None,
    separate=False,
)

Visualize embeddings using dimensionality reduction techniques.

This function creates 2D visualizations of high-dimensional embeddings from different model layers using PCA, t-SNE, or UMAP dimensionality reduction methods.

Args:
    hidden_states: Tuple or list containing hidden states from model
        layers
            attention_mask: Tuple or
        list containing attention masks for sequence padding
            reducer: Dimensionality reduction method. Options: 'PCA',
        't-SNE', 'UMAP'
    labels: List of labels for the data points
    labels_names: List of label names for legend display
    ncols: Number of columns to arrange the plots
    width: Width of each plot
    height: Height of each plot
            save_path: Path to save the plot. If None,
        plot will be shown interactively
    separate: Whether to return separate plots for each layer

Returns:
            Altair chart object (combined or
        separate plots based on separate parameter)

Raises:
    ValueError: If unsupported dimensionality reduction method is
        specified
Source code in dnallm/inference/plot.py
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
def plot_embeddings(
    hidden_states: tuple | list,
    attention_mask: tuple | list,
    reducer: str = "t-SNE",
    labels: tuple | list | None = None,
    label_names: str | list | None = None,
    ncols: int = 4,
    width: int = 300,
    height: int = 300,
    save_path: str | None = None,
    separate: bool = False,
) -> alt.Chart | dict[str, alt.Chart]:
    """Visualize embeddings using dimensionality reduction techniques.

    This function creates 2D visualizations of high-dimensional embeddings
        from different model layers using PCA, t-SNE, or UMAP dimensionality
        reduction methods.

        Args:
            hidden_states: Tuple or list containing hidden states from model
                layers
                    attention_mask: Tuple or
                list containing attention masks for sequence padding
                    reducer: Dimensionality reduction method. Options: 'PCA',
                't-SNE', 'UMAP'
            labels: List of labels for the data points
            labels_names: List of label names for legend display
            ncols: Number of columns to arrange the plots
            width: Width of each plot
            height: Height of each plot
                    save_path: Path to save the plot. If None,
                plot will be shown interactively
            separate: Whether to return separate plots for each layer

        Returns:
                    Altair chart object (combined or
                separate plots based on separate parameter)

        Raises:
            ValueError: If unsupported dimensionality reduction method is
                specified
    """
    # Initialize dimensionality reducer
    dim_reducer = _get_dimensionality_reducer(reducer)
    # Type assertion: dim_reducer is guaranteed to be non-None from helper
    # function
    if dim_reducer is None:
        raise ValueError(
            "Dimensionality reducer is None - this should not happen"
        )

    # Process each layer and create plots
    plots = []
    p_separate = {}

    for i, (hidden, mask) in enumerate(
        zip(hidden_states, attention_mask, strict=False)
    ):
        # Compute mean embeddings
        mean_embeddings = _compute_mean_embeddings(hidden, mask)

        # Apply dimensionality reduction
        layer_dim_reduced_vectors = np.array(
            dim_reducer.fit_transform(mean_embeddings)
        )

        # Prepare data for plotting
        source_df = _prepare_embedding_dataframe(
            layer_dim_reduced_vectors, labels, label_names
        )

        # Create individual plot
        plot = _create_embedding_plot(source_df, i, width, height)
        plots.append(plot)

        if separate:
            p_separate[f"Layer{i + 1}"] = plot.configure_axis(grid=False)

    # Arrange plots in grid
    combined_plot = _arrange_plots(plots, ncols)

    # Save the plot
    if save_path:
        combined_plot.save(save_path)
        print(f"Embeddings visualization saved to {save_path}")

    return p_separate if separate else combined_plot

plot_muts

plot_muts(
    data,
    show_score=False,
    width=None,
    height=100,
    save_path=None,
)

Visualize mutation effects on model predictions.

This function creates comprehensive visualizations of how different mutations affect model predictions, including: - Heatmap showing mutation effects at each position - Line plot showing gain/loss of function - Bar chart showing maximum effect mutations

Args:
    data: Dictionary containing mutation data with 'raw' and mutation
        keys

show_score: Whether to show the score values on the plot (currently not implemented) width: Width of the plot. If None, automatically calculated based on sequence length height: Height of the plot save_path: Path to save the plot. If None, plot will be shown interactively

Returns:
    Altair chart object showing the combined mutation effects
        visualization
Source code in dnallm/inference/plot.py
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
def plot_muts(
    data: dict,
    show_score: bool = False,
    width: int | None = None,
    height: int = 100,
    save_path: str | None = None,
) -> alt.Chart | alt.VConcatChart:
    """Visualize mutation effects on model predictions.

    This function creates comprehensive visualizations of how different
        mutations affect model predictions, including:
        - Heatmap showing mutation effects at each position
        - Line plot showing gain/loss of function
        - Bar chart showing maximum effect mutations

        Args:
            data: Dictionary containing mutation data with 'raw' and mutation
                keys
    show_score: Whether to show the score values on the plot (currently not
            implemented)
                    width: Width of the plot. If None,
                automatically calculated based on sequence length
            height: Height of the plot
                    save_path: Path to save the plot. If None,
                plot will be shown interactively

        Returns:
            Altair chart object showing the combined mutation effects
                visualization
    """
    # Extract basic data
    _sequence, raw_bases, seqlen, flen, mut_list = _extract_mutation_data(data)

    # Build visualization datasets
    dheat, dline, dbar = _build_mutation_datasets(
        data, raw_bases, mut_list, seqlen, flen
    )

    # Calculate width if not provided
    if width is None and dheat["score"]:
        width = int(height * len(raw_bases) / len(set(dheat["mut"])))
    elif width is None:
        width = 400  # Default width

    # Create charts
    pmerge = _create_mutation_charts(dheat, dline, dbar, width, height, flen)

    # Save the plot if requested
    if save_path and dheat["score"]:
        pmerge.save(save_path)
        print(f"Mutation effects visualization saved to {save_path}")

    return pmerge

plot_scatter

plot_scatter(
    data,
    show_score=True,
    ncols=3,
    width=400,
    height=400,
    save_path=None,
    separate=False,
)

Plot scatter plots for regression task evaluation.

This function creates scatter plots to compare predicted vs. experimental values for regression tasks, with optional R² score display.

Args:
    data: Dictionary containing scatter plot data for each model
    show_score: Whether to show the  score on the plot
    ncols: Number of columns to arrange the plots
    width: Width of each plot
    height: Height of each plot
            save_path: Path to save the plot. If None,
        plot will be shown interactively
    separate: Whether to return separate plots for each model

Returns:
            Altair chart object (combined or
        separate plots based on separate parameter)
Source code in dnallm/inference/plot.py
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
def plot_scatter(
    data: dict,
    show_score: bool = True,
    ncols: int = 3,
    width: int = 400,
    height: int = 400,
    save_path: str | None = None,
    separate: bool = False,
) -> alt.Chart | dict[str, alt.Chart]:
    """Plot scatter plots for regression task evaluation.

    This function creates scatter plots to compare predicted vs. experimental
        values
        for regression tasks, with optional R² score display.

        Args:
            data: Dictionary containing scatter plot data for each model
            show_score: Whether to show the R² score on the plot
            ncols: Number of columns to arrange the plots
            width: Width of each plot
            height: Height of each plot
                    save_path: Path to save the plot. If None,
                plot will be shown interactively
            separate: Whether to return separate plots for each model

        Returns:
                    Altair chart object (combined or
                separate plots based on separate parameter)
    """
    # Pre-allocate plot dictionaries for better memory management
    # Original: pdot = {}; p_separate = {}
    pdot = {}
    p_separate = {}

    # More efficient data processing with list comprehension
    # Original: for n, model in enumerate(data):
    for n, (model, model_data) in enumerate(data.items()):
        # Create a copy to avoid modifying original data
        # Original: scatter_data = dict(data[model])
        scatter_data = dict(model_data)
        r2 = scatter_data.pop("r2", 0)  # Use pop with default value

        # More efficient DataFrame creation
        # Original: ddot = pd.DataFrame(scatter_data)
        ddot = pd.DataFrame(scatter_data)

        # Create scatter plot with optimized encoding
        dot = (
            alt.Chart(ddot, title=model)
            .mark_point(filled=True)
            .encode(
                x=alt.X("predicted:Q"),
                y=alt.Y("experiment:Q"),
            )
            .properties(width=width, height=height)
        )

        if show_score:
            # More efficient text positioning calculation
            # Original: min_x = ddot['predicted'].min(); max_y =
            # ddot['experiment'].max()
            min_x = ddot["predicted"].min()
            max_y = ddot["experiment"].max()

            text = (
                alt.Chart()
                .mark_text(size=14, align="left", baseline="bottom")
                .encode(
                    x=alt.datum(min_x + 0.5),
                    y=alt.datum(max_y - 0.5),
                    text=alt.datum(f"R²={r2:.3f}"),  # Format R² value
                )
            )
            p = dot + text
        else:
            p = dot

        if separate:
            p_separate[model] = p.configure_axis(grid=False)

        # More efficient plot arrangement
        idx = n // ncols
        if n % ncols == 0:
            pdot[idx] = p
        else:
            pdot[idx] |= p

    # More efficient plot combination
    # Original: Multiple conditional checks and assignments
    pdots: alt.Chart = pdot[0] if pdot else alt.Chart()
    for i in range(1, len(pdot)):
        pdots &= pdot[i]

    # Configure chart once at the end
    pdots = pdots.configure_axis(grid=False)

    # Save the plot
    if save_path:
        pdots.save(save_path)
        print(f"Metrics scatter plots saved to {save_path}")

    if separate:
        return p_separate
    else:
        return pdots

prepare_data

prepare_data(metrics, task_type='binary')

Prepare data for plotting various types of visualizations.

This function organizes model metrics data into formats suitable for different plot types: - Bar charts for classification and regression metrics - ROC and PR curves for classification tasks - Scatter plots for regression tasks

Args:
    metrics: Dictionary containing model metrics for different models
            task_type: Type of task (
        'binary',
        'multiclass',
        'multilabel',
        'token',
        'regression')

Returns:
    Tuple containing:
    - bars_data: Data formatted for bar chart visualization
            - curves_data/scatter_data: Data formatted for curve or
        scatter plot visualization

Raises:
    ValueError: If task type is not supported for plotting
Source code in dnallm/inference/plot.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
def prepare_data(
    metrics: dict[str, dict], task_type: str = "binary"
) -> tuple[dict, dict | dict]:
    """Prepare data for plotting various types of visualizations.

    This function organizes model metrics data into formats suitable for
        different plot types:
        - Bar charts for classification and regression metrics
        - ROC and PR curves for classification tasks
        - Scatter plots for regression tasks

        Args:
            metrics: Dictionary containing model metrics for different models
                    task_type: Type of task (
                'binary',
                'multiclass',
                'multilabel',
                'token',
                'regression')

        Returns:
            Tuple containing:
            - bars_data: Data formatted for bar chart visualization
                    - curves_data/scatter_data: Data formatted for curve or
                scatter plot visualization

        Raises:
            ValueError: If task type is not supported for plotting
    """
    if task_type in ["binary", "multiclass", "multilabel", "token"]:
        return _prepare_classification_data(metrics)
    elif task_type == "regression":
        return _prepare_regression_data(metrics)
    else:
        raise ValueError(f"Unsupported task type {task_type} for plotting")