extra slow chat
This commit is contained in:
parent
4c2996d654
commit
fe6169cb0f
|
@ -0,0 +1,29 @@
|
|||
import logging
|
||||
import torch
|
||||
import sys
|
||||
import os
|
||||
|
||||
from loader import load_frozen
|
||||
from utils import Tokenizer, greedy_gen2
|
||||
from conf_fp16 import *
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.WARN)
|
||||
|
||||
lora_weights = sys.argv[1] if len(sys.argv) > 1 else None
|
||||
|
||||
tokenizer_path = os.path.join(frozen_model_path, 'tokenizer.model')
|
||||
tokenizer = Tokenizer(tokenizer_path)
|
||||
|
||||
model = load_frozen(frozen_model_path, dropout=0.0, lora_rank=4, frozen_dtype=frozen_dtype, compute_dtype=compute_dtype).to(device)
|
||||
if lora_weights is not None:
|
||||
logging.debug(model.load_state_dict(torch.load(lora_weights), strict=False))
|
||||
|
||||
print(f'Model {frozen_model_path} loaded')
|
||||
|
||||
while True:
|
||||
prompt = input("> ")
|
||||
while True:
|
||||
for next in greedy_gen2(model, tokenizer, device, prompt, max_new_tokens=100):
|
||||
sys.stdout.write(next)
|
||||
sys.stdout.flush()
|
||||
|
11
utils.py
11
utils.py
|
@ -61,6 +61,17 @@ def greedy_gen(model, tokenizer, device, prompt, max_new_tokens=50):
|
|||
for i, output in enumerate(tokens):
|
||||
logging.info(f'{i} - {tokenizer.decode(output.tolist())}')
|
||||
|
||||
def greedy_gen2(model, tokenizer, device, prompt, max_new_tokens=50):
|
||||
tokens = torch.tensor(tokenizer.encode(prompt, True, False)).view(1, -1).to(device)
|
||||
model.eval()
|
||||
for _ in range(max_new_tokens):
|
||||
logits = model(tokens)
|
||||
logits = logits[:, -1, :]
|
||||
_, next_token = torch.topk(logits, k=1, dim=-1)
|
||||
logging.info(f'next token: {next_token} {tokenizer.decode(next_token.tolist())}')
|
||||
yield tokenizer.decode(next_token.tolist())[0]
|
||||
tokens = torch.cat((tokens, next_token), dim=1)
|
||||
|
||||
def cleanup_cache(device='cpu'):
|
||||
if device.startswith('mps'):
|
||||
import torch.mps
|
||||
|
|
Loading…
Reference in New Issue