slowllama: comments in conf.py
This commit is contained in:
parent
048669061b
commit
cffad8c619
32
conf.py
32
conf.py
|
@ -1,30 +1,54 @@
|
|||
import logging
|
||||
|
||||
offload_to = 'disk'
|
||||
# which device to use for finetuning
|
||||
# 'cpu', 'mps' (for Apple devices) or 'cuda'
|
||||
device = 'mps'
|
||||
|
||||
# random seed to use. Makes runs reproducible.
|
||||
seed = 54321
|
||||
|
||||
# learning rate
|
||||
lr = 1e-4
|
||||
|
||||
# logging gradient and weight distribution to log file
|
||||
# useful for debugging, but makes more
|
||||
log_lora_grad = False
|
||||
log_lora_weight = False
|
||||
|
||||
# how wide would LoRA layers be? (N x lora_rank) and (lora_rank x M).
|
||||
# Larger number - larger layer - more capacity.
|
||||
lora_rank = 4
|
||||
|
||||
log_level = logging.DEBUG
|
||||
|
||||
# training settings
|
||||
|
||||
# total number of iterations to run. No microbatching so far
|
||||
iters = 20
|
||||
|
||||
# how long should be the sequence to train on?
|
||||
# we pick seq_len tokens and try to predict token [seq_len + 1]
|
||||
seq_len = 128
|
||||
|
||||
# how large should be the batch size?
|
||||
batch_size = 16
|
||||
|
||||
# current script doesn't have validation set at all.
|
||||
# instead, we run prompt completion every eval_period iterations
|
||||
# and check how the completion look like
|
||||
eval_before_training = False
|
||||
eval_period = 20
|
||||
gen_tokens = 32
|
||||
|
||||
snapshots_path = 'out'
|
||||
finetune_file = './test_data/cubestat.txt'
|
||||
# how many tokens to generate for such test completion
|
||||
gen_tokens = 32
|
||||
# what prompt to use for test completion
|
||||
prompt = 'Cubestat reports the following metrics: '
|
||||
|
||||
# where to save LoRA snapshots
|
||||
snapshots_path = 'out'
|
||||
|
||||
# plaintext input file which will be tokenized and used for training
|
||||
finetune_file = './test_data/cubestat.txt'
|
||||
|
||||
# which model to use - path to raw model
|
||||
llama2_model_path = '/Volumes/LLAMAS/llama-2-7b'
|
||||
|
|
|
@ -15,7 +15,6 @@ device = 'mps' # mps for macbooks
|
|||
seq_len = 1024
|
||||
batch_size = 4
|
||||
lr = 1e-4
|
||||
offload_to = 'disk'
|
||||
|
||||
# type used for computation. Might be different from storage type (which is bfloat16)
|
||||
compute_dtype = torch.float32 # float32 for macbooks
|
||||
|
|
|
@ -15,7 +15,6 @@ class ModelArgs:
|
|||
dropout: float = 0.0 # unless we bring back
|
||||
ffn_dim_multiplier: Optional[float] = None
|
||||
compute_dtype: torch.dtype = torch.float32
|
||||
offload_location: str = 'disk' # 'disk' or 'ram'
|
||||
rope_theta: float = 10000.0
|
||||
lora_rank: int = 8
|
||||
lora_alpha: int = 64
|
||||
|
|
|
@ -11,4 +11,4 @@ logging.basicConfig(format='%(asctime)s %(message)s',
|
|||
torch.random.manual_seed(seed)
|
||||
|
||||
prepare_model(llama2_path=llama2_model_path, frozen_path=frozen_model_path, compute_dtype=compute_dtype,
|
||||
offload_location=offload_to, lora_rank=lora_rank, frozen_dtype=frozen_dtype)
|
||||
lora_rank=lora_rank, frozen_dtype=frozen_dtype)
|
||||
|
|
Loading…
Reference in New Issue