slowllama: comments in conf.py

This commit is contained in:
Oleksandr Kuvshynov 2023-10-31 18:10:44 -04:00
parent 048669061b
commit cffad8c619
4 changed files with 29 additions and 7 deletions

32
conf.py
View File

@ -1,30 +1,54 @@
import logging
offload_to = 'disk'
# which device to use for finetuning
# 'cpu', 'mps' (for Apple devices) or 'cuda'
device = 'mps'
# random seed to use. Makes runs reproducible.
seed = 54321
# learning rate
lr = 1e-4
# logging gradient and weight distribution to log file
# useful for debugging, but makes more
log_lora_grad = False
log_lora_weight = False
# how wide would LoRA layers be? (N x lora_rank) and (lora_rank x M).
# Larger number - larger layer - more capacity.
lora_rank = 4
log_level = logging.DEBUG
# training settings
# total number of iterations to run. No microbatching so far
iters = 20
# how long should be the sequence to train on?
# we pick seq_len tokens and try to predict token [seq_len + 1]
seq_len = 128
# how large should be the batch size?
batch_size = 16
# current script doesn't have validation set at all.
# instead, we run prompt completion every eval_period iterations
# and check how the completion look like
eval_before_training = False
eval_period = 20
gen_tokens = 32
snapshots_path = 'out'
finetune_file = './test_data/cubestat.txt'
# how many tokens to generate for such test completion
gen_tokens = 32
# what prompt to use for test completion
prompt = 'Cubestat reports the following metrics: '
# where to save LoRA snapshots
snapshots_path = 'out'
# plaintext input file which will be tokenized and used for training
finetune_file = './test_data/cubestat.txt'
# which model to use - path to raw model
llama2_model_path = '/Volumes/LLAMAS/llama-2-7b'

View File

@ -15,7 +15,6 @@ device = 'mps' # mps for macbooks
seq_len = 1024
batch_size = 4
lr = 1e-4
offload_to = 'disk'
# type used for computation. Might be different from storage type (which is bfloat16)
compute_dtype = torch.float32 # float32 for macbooks

View File

@ -15,7 +15,6 @@ class ModelArgs:
dropout: float = 0.0 # unless we bring back
ffn_dim_multiplier: Optional[float] = None
compute_dtype: torch.dtype = torch.float32
offload_location: str = 'disk' # 'disk' or 'ram'
rope_theta: float = 10000.0
lora_rank: int = 8
lora_alpha: int = 64

View File

@ -11,4 +11,4 @@ logging.basicConfig(format='%(asctime)s %(message)s',
torch.random.manual_seed(seed)
prepare_model(llama2_path=llama2_model_path, frozen_path=frozen_model_path, compute_dtype=compute_dtype,
offload_location=offload_to, lora_rank=lora_rank, frozen_dtype=frozen_dtype)
lora_rank=lora_rank, frozen_dtype=frozen_dtype)