Benchmark Demo

In [ ]:

Copied!

import marimo
import marimo

In [ ]:

Copied!

__generated_with = "0.11.17"
app = marimo.App(width="medium")
__generated_with = "0.11.17"
app = marimo.App(width="medium")

In [ ]:

Copied!





@app.cell
def __(__file__):
    import sys
    # from os import path
    # sys.path.append(path.abspath(path.join(path.dirname(__file__), '../../..')))
    import marimo as mo
    import pandas as pd
    from dnallm import load_config, load_model_and_tokenizer, Benchmark

    return sys, pd, mo, load_config, load_model_and_tokenizer, Benchmark
@app.cell
def __(__file__):
    import sys
    # from os import path
    # sys.path.append(path.abspath(path.join(path.dirname(__file__), '../../..')))
    import marimo as mo
    import pandas as pd
    from dnallm import load_config, load_model_and_tokenizer, Benchmark

    return sys, pd, mo, load_config, load_model_and_tokenizer, Benchmark

In [ ]:

Copied!





@app.cell
def __(mo):
    title = mo.md(
        "<center><h2>Benchmark of multiple DNA models</h2></center>"
    )
    config_text = mo.ui.text(value="config.yaml", placeholder="config.yaml",
                             label="Config file (*.yaml)", full_width=True)
    datasets_text = mo.ui.text(value="test.csv", placeholder="local dataset path",
                               label="Datasets file", full_width=True)
    source_text = mo.ui.dropdown(['local', 'huggingface', 'modelscope'], value="modelscope",
                                 label="Model source", full_width=True)
    number_text = mo.ui.dropdown(list(map(str, range(2, 13))), value="4", label="Number of models", full_width=True)
    seq_col_text = mo.ui.text(value="sequence", placeholder="sequence", label="Sequence column name", full_width=True)
    label_col_text = mo.ui.text(value="labels", placeholder="labels", label="Label column name", full_width=True)
    input_stack = mo.hstack([config_text.style(width="35ch"), datasets_text.style(width="55ch")],
                            align='center', justify='center')
    option_stack = mo.hstack([seq_col_text.style(width="22.5ch"), label_col_text.style(width="22.5ch"),
                              source_text.style(width="22ch"), number_text.style(width="22ch")],
                             align='center', justify='center')
    mo.vstack([title, input_stack, option_stack], align='center', justify='center')
    return (config_text, datasets_text, source_text, seq_col_text, label_col_text,)
@app.cell
def __(mo):
    title = mo.md(
        "Benchmark of multiple DNA models"
    )
    config_text = mo.ui.text(value="config.yaml", placeholder="config.yaml",
                             label="Config file (*.yaml)", full_width=True)
    datasets_text = mo.ui.text(value="test.csv", placeholder="local dataset path",
                               label="Datasets file", full_width=True)
    source_text = mo.ui.dropdown(['local', 'huggingface', 'modelscope'], value="modelscope",
                                 label="Model source", full_width=True)
    number_text = mo.ui.dropdown(list(map(str, range(2, 13))), value="4", label="Number of models", full_width=True)
    seq_col_text = mo.ui.text(value="sequence", placeholder="sequence", label="Sequence column name", full_width=True)
    label_col_text = mo.ui.text(value="labels", placeholder="labels", label="Label column name", full_width=True)
    input_stack = mo.hstack([config_text.style(width="35ch"), datasets_text.style(width="55ch")],
                            align='center', justify='center')
    option_stack = mo.hstack([seq_col_text.style(width="22.5ch"), label_col_text.style(width="22.5ch"),
                              source_text.style(width="22ch"), number_text.style(width="22ch")],
                             align='center', justify='center')
    mo.vstack([title, input_stack, option_stack], align='center', justify='center')
    return (config_text, datasets_text, source_text, seq_col_text, label_col_text,)

In [ ]:

Copied!





@app.cell
def __(mo, number_text):
    model_texts = {}
    name_texts = {}
    # model_stacks = {}
    number_of_models = int(number_text.value)
    default_models = [["Plant DNABERT", "zhangtaolab/plant-dnabert-BPE-promoter"],
                      ["Plant DNAGPT", "zhangtaolab/plant-dnagpt-BPE-promoter"]] + [["", ""]] * 10
    model_texts = mo.ui.dictionary({
        i: mo.ui.text(value=default_models[i][1], placeholder=default_models[i][1],
                      label=f"Model{i+1}", full_width=True)
        for i in range(number_of_models)
    })
    name_texts = mo.ui.dictionary({
        i: mo.ui.text(value=default_models[i][0], placeholder=default_models[i][0],
                      label=f"Model{i+1} name", full_width=True)
        for i in range(number_of_models)
    })
    # model_stacks = mo.ui.dictionary({
    #     i: mo.hstack([model_texts.value[i].style(width="60ch"), name_texts.value[i].style(width="30ch")],
    #                  align='start', justify='center')
    #     for i in range(number_of_models)
    # })
    # for i in range(int(number_of_models)):
    #     if i == 0:
    #         value1 = "Plant DNABERT"
    #         value2 = "zhangtaolab/plant-dnabert-BPE-promoter"
    #     elif i == 1:
    #         value1 = "Plant DNAGPT"
    #         value2 = "zhangtaolab/plant-dnagpt-BPE-promoter"
    #     else:
    #         value1 = ""
    #         value2 = ""
    #     model_texts[i] = mo.ui.text(value=value2, placeholder="zhangtaolab/plant-dnagpt-BPE",
    #                                 label=f"Model{i+1}", full_width=True)
    #     name_texts[i] = mo.ui.text(value=value1, placeholder="Plant DNAGPT",
    #                                label=f"Model{i+1} name", full_width=True)
    #     model_stacks[i] = mo.hstack([model_texts[i].style(width="60ch"), name_texts[i].style(width="30ch")],
    #                                 align='start', justify='center')
    # mo.vstack([model_stacks.value[i] for i in range(int(number_of_models))],
    #           align='center', justify='center')
    mo.hstack([model_texts.vstack(align='stretch', gap=0.5),
               name_texts.vstack(align='stretch', gap=0.5)],
              widths=[2, 1], align='stretch', gap=0.5)
    return (number_of_models, model_texts, name_texts,)
@app.cell
def __(mo, number_text):
    model_texts = {}
    name_texts = {}
    # model_stacks = {}
    number_of_models = int(number_text.value)
    default_models = [["Plant DNABERT", "zhangtaolab/plant-dnabert-BPE-promoter"],
                      ["Plant DNAGPT", "zhangtaolab/plant-dnagpt-BPE-promoter"]] + [["", ""]] * 10
    model_texts = mo.ui.dictionary({
        i: mo.ui.text(value=default_models[i][1], placeholder=default_models[i][1],
                      label=f"Model{i+1}", full_width=True)
        for i in range(number_of_models)
    })
    name_texts = mo.ui.dictionary({
        i: mo.ui.text(value=default_models[i][0], placeholder=default_models[i][0],
                      label=f"Model{i+1} name", full_width=True)
        for i in range(number_of_models)
    })
    # model_stacks = mo.ui.dictionary({
    #     i: mo.hstack([model_texts.value[i].style(width="60ch"), name_texts.value[i].style(width="30ch")],
    #                  align='start', justify='center')
    #     for i in range(number_of_models)
    # })
    # for i in range(int(number_of_models)):
    #     if i == 0:
    #         value1 = "Plant DNABERT"
    #         value2 = "zhangtaolab/plant-dnabert-BPE-promoter"
    #     elif i == 1:
    #         value1 = "Plant DNAGPT"
    #         value2 = "zhangtaolab/plant-dnagpt-BPE-promoter"
    #     else:
    #         value1 = ""
    #         value2 = ""
    #     model_texts[i] = mo.ui.text(value=value2, placeholder="zhangtaolab/plant-dnagpt-BPE",
    #                                 label=f"Model{i+1}", full_width=True)
    #     name_texts[i] = mo.ui.text(value=value1, placeholder="Plant DNAGPT",
    #                                label=f"Model{i+1} name", full_width=True)
    #     model_stacks[i] = mo.hstack([model_texts[i].style(width="60ch"), name_texts[i].style(width="30ch")],
    #                                 align='start', justify='center')
    # mo.vstack([model_stacks.value[i] for i in range(int(number_of_models))],
    #           align='center', justify='center')
    mo.hstack([model_texts.vstack(align='stretch', gap=0.5),
               name_texts.vstack(align='stretch', gap=0.5)],
              widths=[2, 1], align='stretch', gap=0.5)
    return (number_of_models, model_texts, name_texts,)

In [ ]:

Copied!





@app.cell
def __(config_text, load_config):
    configs = load_config(config_text.value)
    return configs
@app.cell
def __(config_text, load_config):
    configs = load_config(config_text.value)
    return configs

In [ ]:

Copied!





@app.cell
def __(configs, datasets_text, seq_col_text, label_col_text, 
       Benchmark):
    benchmark = Benchmark(config=configs)
    if datasets_text.value:
        # Load the dataset
        dataset = benchmark.get_dataset(datasets_text.value,
                                        seq_col=seq_col_text.value,
                                        label_col=label_col_text.value)
    else:
        dataset = None
    return (dataset, benchmark)
@app.cell
def __(configs, datasets_text, seq_col_text, label_col_text, 
       Benchmark):
    benchmark = Benchmark(config=configs)
    if datasets_text.value:
        # Load the dataset
        dataset = benchmark.get_dataset(datasets_text.value,
                                        seq_col=seq_col_text.value,
                                        label_col=label_col_text.value)
    else:
        dataset = None
    return (dataset, benchmark)

In [ ]:

Copied!





@app.cell
def __(model_texts, name_texts):
    model_names = {
        name_texts.value[i]: model_texts.value[i]
        for i in range(len(model_texts.value))
        if (model_texts.value[i] and name_texts.value[i])
    }
    return (model_names, )
@app.cell
def __(model_texts, name_texts):
    model_names = {
        name_texts.value[i]: model_texts.value[i]
        for i in range(len(model_texts.value))
        if (model_texts.value[i] and name_texts.value[i])
    }
    return (model_names, )

In [ ]:

Copied!





@app.cell
def __(mo, model_names, source_text, benchmark):
    # Benchmark the models
    predict_button = mo.ui.button(label="Start Benchmark",
                                    on_click=lambda value: benchmark.run(
                                        model_names, source=source_text.value)
                                    )
    mo.hstack([predict_button], align='center', justify='center')
    return (predict_button, )
@app.cell
def __(mo, model_names, source_text, benchmark):
    # Benchmark the models
    predict_button = mo.ui.button(label="Start Benchmark",
                                    on_click=lambda value: benchmark.run(
                                        model_names, source=source_text.value)
                                    )
    mo.hstack([predict_button], align='center', justify='center')
    return (predict_button, )

In [ ]:

Copied!





@app.cell
def __(predict_button):
    if predict_button.value:
        results = predict_button.value
    else:
        results = None
    results
    return (results, )
@app.cell
def __(predict_button):
    if predict_button.value:
        results = predict_button.value
    else:
        results = None
    results
    return (results, )

In [ ]:

Copied!





@app.cell
def __(mo, ):
    figure_size = mo.ui.number(start=200, stop=5120, step=10, label='Figure size',
                            value = 800)
    return (figure_size, )
@app.cell
def __(mo, ):
    figure_size = mo.ui.number(start=200, stop=5120, step=10, label='Figure size',
                            value = 800)
    return (figure_size, )

In [ ]:

Copied!





@app.cell
def __(mo, figure_size, results, benchmark):
    plot_button = mo.ui.button(label="Plot metrics",
                            on_click=lambda value: benchmark.plot(results, separate=True)
                            )
    mo.hstack([figure_size, plot_button], align='center', justify='center')
    return (plot_button,)
@app.cell
def __(mo, figure_size, results, benchmark):
    plot_button = mo.ui.button(label="Plot metrics",
                            on_click=lambda value: benchmark.plot(results, separate=True)
                            )
    mo.hstack([figure_size, plot_button], align='center', justify='center')
    return (plot_button,)

In [ ]:

Copied!





@app.cell
def __(mo, model_names, plot_button, figure_size):
    plot_out = plot_button.value
    if plot_out:
        num_models = len(model_names)
        charts1 = mo.ui.tabs(
                {
                    metric: mo.ui.altair_chart(plot_out[0][metric]).properties(
                        width=figure_size.value, height=figure_size.value * num_models / 10
                        ) for metric in plot_out[0]
                }, 
            )
        charts2 = mo.ui.tabs(
                {
                    name: mo.ui.altair_chart(plot_out[1][name]).properties(
                        width=figure_size.value, height=figure_size.value
                        ) for name in plot_out[1]
                }
            )
    else:
        charts1 = ""
        charts2 = ""
    mo.vstack([charts1, charts2], align='center', justify='center')
    return
@app.cell
def __(mo, model_names, plot_button, figure_size):
    plot_out = plot_button.value
    if plot_out:
        num_models = len(model_names)
        charts1 = mo.ui.tabs(
                {
                    metric: mo.ui.altair_chart(plot_out[0][metric]).properties(
                        width=figure_size.value, height=figure_size.value * num_models / 10
                        ) for metric in plot_out[0]
                }, 
            )
        charts2 = mo.ui.tabs(
                {
                    name: mo.ui.altair_chart(plot_out[1][name]).properties(
                        width=figure_size.value, height=figure_size.value
                        ) for name in plot_out[1]
                }
            )
    else:
        charts1 = ""
        charts2 = ""
    mo.vstack([charts1, charts2], align='center', justify='center')
    return

In [ ]:

Copied!

if __name__ == "__main__":
    app.run()
if __name__ == "__main__":
    app.run()