Sequence Generation
In [1]:
Copied!
from dnallm import load_config
from dnallm import load_model_and_tokenizer, DNAInference
from dnallm import load_config
from dnallm import load_model_and_tokenizer, DNAInference
In [2]:
Copied!
# Load configurations
configs = load_config("./generation_config.yaml")
# Load configurations
configs = load_config("./generation_config.yaml")
In [ ]:
Copied!
model_name = "zhangtaolab/plant-dnagpt-BPE"
model, tokenizer = load_model_and_tokenizer(model_name, task_config=configs['task'], source="modelscope")
model_name = "zhangtaolab/plant-dnagpt-BPE"
model, tokenizer = load_model_and_tokenizer(model_name, task_config=configs['task'], source="modelscope")
Downloading Model from https://www.modelscope.cn to directory: /home/liuguanqing/.cache/modelscope/hub/models/zhangtaolab/plant-dnagpt-BPE 02:16:42 - dnallm.models.model - INFO - Model files are stored in /home/liuguanqing/.cache/modelscope/hub/models/zhangtaolab/plant-dnagpt-BPE 02:16:42 - dnallm.models.model - WARNING - Generation task does not require num_labels, but got 1. Setting to 0. Downloading Model from https://www.modelscope.cn to directory: /home/liuguanqing/.cache/modelscope/hub/models/zhangtaolab/plant-dnagpt-BPE Downloading Model from https://www.modelscope.cn to directory: /home/liuguanqing/.cache/modelscope/hub/models/zhangtaolab/plant-dnagpt-BPE
In [4]:
Copied!
# Create inference engine
inference_engine = DNAInference(
model=model,
tokenizer=tokenizer,
config=configs
)
# Create inference engine
inference_engine = DNAInference(
model=model,
tokenizer=tokenizer,
config=configs
)
02:16:44 - dnallm.models.model - INFO - Using device: cuda
In [5]:
Copied!
output = inference_engine.generate(["ACGT"], n_tokens=512, temperature=0.8, top_p=0.9)
output = inference_engine.generate(["ACGT"], n_tokens=512, temperature=0.8, top_p=0.9)
Setting `pad_token_id` to `eos_token_id`:8000 for open-end generation.
In [6]:
Copied!
for seq in output:
print(f"Input Sequence: {seq['Prompt']}")
print(f"Generated Sequence: {seq['Output']}")
print()
for seq in output:
print(f"Input Sequence: {seq['Prompt']}")
print(f"Generated Sequence: {seq['Output']}")
print()
Input Sequence: ACGT Generated Sequence: ACGTAGTAAAAAAGAAAGAAGGAAAGGGAAAAAGAGAAAGAGAAGGAAAAGGAAAAAGGAGAAAGGAAAGAAAGGGGAAAGAAGAAAAAAGGAAAGAAGAAAGAAAAAAAAGAAGGAAGAAAAAAAAAGGAGAAGGAGGGGGGAAAAAAAAGAAAGAAAAAAAAAGAAGAAAAAAAAAGAAAAAGAAAAAAAGAGAAAAAAAAAAAAAAAGAGGGAAAAAGAAAAAAGGAAGAGAAAGAGGAAAAAAGAAAAGAAGAAAAGGGAGAAGAGAAAAAAGAAAAAGGAAAGAAGAAAAGGAGAGAAAGGAAAAAAAAAAAGAAGGAAAAAAAAGAAGGAAGAAGAAAAAAAAAAAAAAAGAGAAAAGAAGAAGAAGGAAAAAGGAAAAGGGAAAAAAAGAAAAAGAAGGAAAGAAAAAAAAAAAAAGAAAGAAGAAGGAGAGAGAAAAAAGAGAGAAAGAAGAAAAAGAAAAAGGGAGAAAAAAGGGAAAAGGAAAAAAAAAAGAAAAAAGAGAAAAAGAA
In [ ]:
Copied!