tRNA Inference
In [1]:
Copied!
from dnallm import load_config
from dnallm import load_model_and_tokenizer, DNAInference
from dnallm import load_config
from dnallm import load_model_and_tokenizer, DNAInference
tRNADetector¶
In [2]:
Copied!
# load config
configs = load_config("./inference_model_config_tRNADetector.yaml")
model_name = "zhangtaolab/tRNADetector"
model, tokenizer = load_model_and_tokenizer(model_name, task_config=configs['task'], source="modelscope")
predictor = DNAInference(
model=model,
tokenizer=tokenizer,
config=configs
)
# load config
configs = load_config("./inference_model_config_tRNADetector.yaml")
model_name = "zhangtaolab/tRNADetector"
model, tokenizer = load_model_and_tokenizer(model_name, task_config=configs['task'], source="modelscope")
predictor = DNAInference(
model=model,
tokenizer=tokenizer,
config=configs
)
Downloading Model from https://www.modelscope.cn to directory: /Users/forrest/.cache/modelscope/hub/models/zhangtaolab/tRNADetector Model files are stored in /Users/forrest/.cache/modelscope/hub/models/zhangtaolab/tRNADetector Downloading Model from https://www.modelscope.cn to directory: /Users/forrest/.cache/modelscope/hub/models/zhangtaolab/tRNADetector Downloading Model from https://www.modelscope.cn to directory: /Users/forrest/.cache/modelscope/hub/models/zhangtaolab/tRNADetector
Importing `MambaCache` from `transformers.cache_utils` is deprecated and will be removed in a future version. Please import it from `transformers` or `transformers.models.mamba.cache_mamba` instead. The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)` is None. Falling back to the mamba.py backend. To install follow https://github.com/state-spaces/mamba/#installation and https://github.com/Dao-AILab/causal-conv1d
15:04:25 - dnallm.inference.inference - INFO - Using device: cpu
In [3]:
Copied!
seq = ['AAGAAAGCTCAAATAGTATACGAAGAACTCGAAGCTAAGCAACTGTGAAGAGAAATTAAGTAGCTACAATTAGGTTATAAATAATTTGATTTCTACTCTAACTGTGACGTGGGGATGTAGCTCAGATGGTAGAGCGCTCGCTTAGCATGCGAGAGGTACGGGGATCGATACCCCGCATCTCCATTTTTTTATTTTTTTTTAGAATTCTACTTTTTCTAAAATTGACCCTTTAATTTTGTATTTATATTTCTTTTATAATGTATATGCATTCTGCATTTTATTTTTCCTTTACATTTTTTCTTATATAATGTAAGTTATGCATTCTGCATTTTCTTTTGTCTTTTTTTTTTCTTATAAGTGGTTGG', 'AAAACCCCAACTAGCTAGCATCGATCGAGCTAGCATGCATCGATCGATCGATCGATCGATCGATCGATCGAACACCCCGCGCGTAGCTACGGCTCAGAGCATCGATGCGCAGTCGAGCCGGGGGGGACATCGATCGATCGATCGATCGAGTCGACGATCGATCGAGCATATAATCGAGTCGACTGATCGATCGAGCGTACGATCGATCGATCGATGCATCCCCGATCGATCGATCGATCTTATAACACACACACACACACACGGAAAA']
results = predictor.infer_file(seq, evaluate=False)
seq = ['AAGAAAGCTCAAATAGTATACGAAGAACTCGAAGCTAAGCAACTGTGAAGAGAAATTAAGTAGCTACAATTAGGTTATAAATAATTTGATTTCTACTCTAACTGTGACGTGGGGATGTAGCTCAGATGGTAGAGCGCTCGCTTAGCATGCGAGAGGTACGGGGATCGATACCCCGCATCTCCATTTTTTTATTTTTTTTTAGAATTCTACTTTTTCTAAAATTGACCCTTTAATTTTGTATTTATATTTCTTTTATAATGTATATGCATTCTGCATTTTATTTTTCCTTTACATTTTTTCTTATATAATGTAAGTTATGCATTCTGCATTTTCTTTTGTCTTTTTTTTTTCTTATAAGTGGTTGG', 'AAAACCCCAACTAGCTAGCATCGATCGAGCTAGCATGCATCGATCGATCGATCGATCGATCGATCGATCGAACACCCCGCGCGTAGCTACGGCTCAGAGCATCGATGCGCAGTCGAGCCGGGGGGGACATCGATCGATCGATCGATCGAGTCGACGATCGATCGAGCATATAATCGAGTCGACTGATCGATCGAGCGTACGATCGATCGATCGATGCATCCCCGATCGATCGATCGATCTTATAACACACACACACACACACGGAAAA']
results = predictor.infer_file(seq, evaluate=False)
Encoding inputs: 0%| | 0/2 [00:00<?, ? examples/s]
Inferring: 100%|██████████| 1/1 [00:09<00:00, 9.68s/it]
In [4]:
Copied!
for i in results:
sequence = results[i]['sequence']
label = results[i]['label']
score = results[i]['scores'][label]
print(f'input sequence:{sequence}\n',
f'predict label:{label}\n',
f'predict score:{score}\n',
f'*'*20)
for i in results:
sequence = results[i]['sequence']
label = results[i]['label']
score = results[i]['scores'][label]
print(f'input sequence:{sequence}\n',
f'predict label:{label}\n',
f'predict score:{score}\n',
f'*'*20)
input sequence:AAGAAAGCTCAAATAGTATACGAAGAACTCGAAGCTAAGCAACTGTGAAGAGAAATTAAGTAGCTACAATTAGGTTATAAATAATTTGATTTCTACTCTAACTGTGACGTGGGGATGTAGCTCAGATGGTAGAGCGCTCGCTTAGCATGCGAGAGGTACGGGGATCGATACCCCGCATCTCCATTTTTTTATTTTTTTTTAGAATTCTACTTTTTCTAAAATTGACCCTTTAATTTTGTATTTATATTTCTTTTATAATGTATATGCATTCTGCATTTTATTTTTCCTTTACATTTTTTCTTATATAATGTAAGTTATGCATTCTGCATTTTCTTTTGTCTTTTTTTTTTCTTATAAGTGGTTGG predict label:tRNA predict score:0.9999682903289795 ******************** input sequence:AAAACCCCAACTAGCTAGCATCGATCGAGCTAGCATGCATCGATCGATCGATCGATCGATCGATCGATCGAACACCCCGCGCGTAGCTACGGCTCAGAGCATCGATGCGCAGTCGAGCCGGGGGGGACATCGATCGATCGATCGATCGAGTCGACGATCGATCGAGCATATAATCGAGTCGACTGATCGATCGAGCGTACGATCGATCGATCGATGCATCCCCGATCGATCGATCGATCTTATAACACACACACACACACACGGAAAA predict label:Partial tRNA predict score:1.0 ********************
In [ ]:
Copied!
tRNAPointer¶
In [ ]:
Copied!
# load config
configs = load_config("./inference_model_config_tRNAPointer.yaml")
model_name = "zhangtaolab/tRNAPointer"
model, tokenizer = load_model_and_tokenizer(model_name, task_config=configs['task'], source="modelscope")
predictor = DNAInference(
model=model,
tokenizer=tokenizer,
config=configs
)
# load config
configs = load_config("./inference_model_config_tRNAPointer.yaml")
model_name = "zhangtaolab/tRNAPointer"
model, tokenizer = load_model_and_tokenizer(model_name, task_config=configs['task'], source="modelscope")
predictor = DNAInference(
model=model,
tokenizer=tokenizer,
config=configs
)
Downloading Model from https://www.modelscope.cn to directory: /Users/forrest/.cache/modelscope/hub/models/zhangtaolab/tRNAPointer Model files are stored in /Users/forrest/.cache/modelscope/hub/models/zhangtaolab/tRNAPointer Downloading Model from https://www.modelscope.cn to directory: /Users/forrest/.cache/modelscope/hub/models/zhangtaolab/tRNAPointer Downloading Model from https://www.modelscope.cn to directory: /Users/forrest/.cache/modelscope/hub/models/zhangtaolab/tRNAPointer Downloading Model from https://www.modelscope.cn to directory: /Users/forrest/.cache/modelscope/hub/models/zhangtaolab/tRNAPointer 15:04:44 - dnallm.inference.inference - INFO - Using device: cpu
In [ ]:
Copied!
seq = ['AAGAAAGCTCAAATAGTATACGAAGAACTCGAAGCTAAGCAACTGTGAAGAGAAATTAAGTAGCTACAATTAGGTTATAAATAATTTGATTTCTACTCTAACTGTGACGTGGGGATGTAGCTCAGATGGTAGAGCGCTCGCTTAGCATGCGAGAGGTACGGGGATCGATACCCCGCATCTCCATTTTTTTATTTTTTTTTAGAATTCTACTTTTTCTAAAATTGACCCTTTAATTTTGTATTTATATTTCTTTTATAATGTATATGCATTCTGCATTTTATTTTTCCTTTACATTTTTTCTTATATAATGTAAGTTATGCATTCTGCATTTTCTTTTGTCTTTTTTTTTTCTTATAAGTGGTTGG', 'AAAACCCCAACTAGCTAGCATCGATCGAGCTAGCATGCATCGATCGATCGATCGATCGATCGATCGATCGAACACCCCGCGCGTAGCTACGGCTCAGAGCATCGATGCGCAGTCGAGCCGGGGGGGACATCGATCGATCGATCGATCGAGTCGACGATCGATCGAGCATATAATCGAGTCGACTGATCGATCGAGCGTACGATCGATCGATCGATGCATCCCCGATCGATCGATCGATCTTATAACACACACACACACACACGGAAAA']
seq_token = []
for _ in seq:
seq_token.append([base for base in _])
results = predictor.infer_file(seq_token, evaluate=False)
seq = ['AAGAAAGCTCAAATAGTATACGAAGAACTCGAAGCTAAGCAACTGTGAAGAGAAATTAAGTAGCTACAATTAGGTTATAAATAATTTGATTTCTACTCTAACTGTGACGTGGGGATGTAGCTCAGATGGTAGAGCGCTCGCTTAGCATGCGAGAGGTACGGGGATCGATACCCCGCATCTCCATTTTTTTATTTTTTTTTAGAATTCTACTTTTTCTAAAATTGACCCTTTAATTTTGTATTTATATTTCTTTTATAATGTATATGCATTCTGCATTTTATTTTTCCTTTACATTTTTTCTTATATAATGTAAGTTATGCATTCTGCATTTTCTTTTGTCTTTTTTTTTTCTTATAAGTGGTTGG', 'AAAACCCCAACTAGCTAGCATCGATCGAGCTAGCATGCATCGATCGATCGATCGATCGATCGATCGATCGAACACCCCGCGCGTAGCTACGGCTCAGAGCATCGATGCGCAGTCGAGCCGGGGGGGACATCGATCGATCGATCGATCGAGTCGACGATCGATCGAGCATATAATCGAGTCGACTGATCGATCGAGCGTACGATCGATCGATCGATGCATCCCCGATCGATCGATCGATCTTATAACACACACACACACACACGGAAAA']
seq_token = []
for _ in seq:
seq_token.append([base for base in _])
results = predictor.infer_file(seq_token, evaluate=False)
Encoding inputs: 0%| | 0/2 [00:00<?, ? examples/s]
Inferring: 100%|██████████| 1/1 [00:08<00:00, 8.74s/it]
In [7]:
Copied!
for i in results:
sequence = ''.join(results[i]['sequence'])
label = results[i]['label']
try:
start = label.index("B-tRNA")
end = len(label) - 1 - label[::-1].index("I-tRNA")
tRNA_sequence = sequence[start:end+1]
print(f'input sequence:{sequence}\n',
f'tRNA start index in sequence:{start}\n',
f'tRNA end index in sequence:{end}\n',
f'tRNA sequence:{tRNA_sequence}\n',
f'*'*20)
except:
print(f'input sequence:{sequence}\n',
'No tRNA found\n',
f'*'*20)
for i in results:
sequence = ''.join(results[i]['sequence'])
label = results[i]['label']
try:
start = label.index("B-tRNA")
end = len(label) - 1 - label[::-1].index("I-tRNA")
tRNA_sequence = sequence[start:end+1]
print(f'input sequence:{sequence}\n',
f'tRNA start index in sequence:{start}\n',
f'tRNA end index in sequence:{end}\n',
f'tRNA sequence:{tRNA_sequence}\n',
f'*'*20)
except:
print(f'input sequence:{sequence}\n',
'No tRNA found\n',
f'*'*20)
input sequence:AAGAAAGCTCAAATAGTATACGAAGAACTCGAAGCTAAGCAACTGTGAAGAGAAATTAAGTAGCTACAATTAGGTTATAAATAATTTGATTTCTACTCTAACTGTGACGTGGGGATGTAGCTCAGATGGTAGAGCGCTCGCTTAGCATGCGAGAGGTACGGGGATCGATACCCCGCATCTCCATTTTTTTATTTTTTTTTAGAATTCTACTTTTTCTAAAATTGACCCTTTAATTTTGTATTTATATTTCTTTTATAATGTATATGCATTCTGCATTTTATTTTTCCTTTACATTTTTTCTTATATAATGTAAGTTATGCATTCTGCATTTTCTTTTGTCTTTTTTTTTTCTTATAAGTGGTTGG tRNA start index in sequence:111 tRNA end index in sequence:183 tRNA sequence:GGGATGTAGCTCAGATGGTAGAGCGCTCGCTTAGCATGCGAGAGGTACGGGGATCGATACCCCGCATCTCCAT ******************** input sequence:AAAACCCCAACTAGCTAGCATCGATCGAGCTAGCATGCATCGATCGATCGATCGATCGATCGATCGATCGAACACCCCGCGCGTAGCTACGGCTCAGAGCATCGATGCGCAGTCGAGCCGGGGGGGACATCGATCGATCGATCGATCGAGTCGACGATCGATCGAGCATATAATCGAGTCGACTGATCGATCGAGCGTACGATCGATCGATCGATGCATCCCCGATCGATCGATCGATCTTATAACACACACACACACACACGGAAAA No tRNA found ********************
In [ ]:
Copied!