almugabo · October 6, 2024 14:25
diff --git a/example_config.yaml b/example_config.yaml
 ## to make it easier to  work with the paths in torchtune 


 path_model_downloaded: /home/mike/_torch_tune/_xmodel_downloaded/MetaLlama-3.1-8B
 path_model_finetuned:  /home/mike/_torch_tune/_xmodel_finetuned/MetaLlama-3.1-8B_finetuned

 # Tokenizer
 tokenizer:
  _component_: torchtune.models.llama3.llama3_tokenizer
  path: ${path_model_downloaded}/original/tokenizer.model
  max_seq_len: 2048

 # Dataset
 # Dataset
 dataset:
  _component_: torchtune.datasets.text_completion_dataset
  source: json
  data_files: /home/mike/data_infrastructure/text_monolingual_train.json
  column: text
  split: train
 seed: null
 shuffle: True

 # Model Arguments
 model:
  _component_: torchtune.models.llama3_1.llama3_1_8b

 checkpointer:
  _component_: torchtune.training.FullModelHFCheckpointer
  checkpoint_dir: ${path_model_downloaded}
  checkpoint_files: [
    model-00001-of-00004.safetensors,
    model-00002-of-00004.safetensors,
    model-00003-of-00004.safetensors,
    model-00004-of-00004.safetensors
  ]
  recipe_checkpoint: null
  output_dir: ${path_model_finetuned}
  model_type: LLAMA3
 resume_from_checkpoint: False

 # Fine-tuning arguments
 batch_size: 2
 epochs: 1
 optimizer:
  _component_: bitsandbytes.optim.PagedAdamW8bit
  lr: 2e-5
 loss:
  _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
 max_steps_per_epoch: null
 gradient_accumulation_steps: 1
 optimizer_in_bwd: True
 compile: False # set it to True for better memory and performance

 # Training environment
 device: cuda

 # Memory management
 enable_activation_checkpointing: True

 # Reduced precision
 dtype: bf16

 # Logging
 metric_logger:
  _component_: torchtune.training.metric_logging.DiskLogger
  log_dir:  ${path_model_finetuned}/logs
 output_dir: ${path_model_finetuned}
 log_every_n_steps: 1
 log_peak_memory_stats: False

 # Profiler (disabled)
 profiler:
  _component_: torchtune.training.setup_torch_profiler
  enabled: False

  #Output directory of trace artifacts
  output_dir: ${path_model_finetuned}/profiling_outputs

  #`torch.profiler.ProfilerActivity` types to trace
  cpu: True
  cuda: True

  #trace options passed to `torch.profiler.profile`
  profile_memory: True
  with_stack: False
  record_shapes: True
  with_flops: False

  # `torch.profiler.schedule` options:
  # wait_steps -> wait, warmup_steps -> warmup, active_steps -> active, num_cycles -> repeat
  wait_steps: 1
  warmup_steps: 2
  active_steps: 1
  num_cycles: 1
	## to make it easier to work with the paths in torchtune


	path_model_downloaded: /home/mike/_torch_tune/_xmodel_downloaded/MetaLlama-3.1-8B
	path_model_finetuned: /home/mike/_torch_tune/_xmodel_finetuned/MetaLlama-3.1-8B_finetuned

	# Tokenizer
	tokenizer:
	_component_: torchtune.models.llama3.llama3_tokenizer
	path: ${path_model_downloaded}/original/tokenizer.model
	max_seq_len: 2048

	# Dataset
	# Dataset
	dataset:
	_component_: torchtune.datasets.text_completion_dataset
	source: json
	data_files: /home/mike/data_infrastructure/text_monolingual_train.json
	column: text
	split: train
	seed: null
	shuffle: True

	# Model Arguments
	model:
	_component_: torchtune.models.llama3_1.llama3_1_8b

	checkpointer:
	_component_: torchtune.training.FullModelHFCheckpointer
	checkpoint_dir: ${path_model_downloaded}
	checkpoint_files: [
	model-00001-of-00004.safetensors,
	model-00002-of-00004.safetensors,
	model-00003-of-00004.safetensors,
	model-00004-of-00004.safetensors
	]
	recipe_checkpoint: null
	output_dir: ${path_model_finetuned}
	model_type: LLAMA3
	resume_from_checkpoint: False

	# Fine-tuning arguments
	batch_size: 2
	epochs: 1
	optimizer:
	_component_: bitsandbytes.optim.PagedAdamW8bit
	lr: 2e-5
	loss:
	_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
	max_steps_per_epoch: null
	gradient_accumulation_steps: 1
	optimizer_in_bwd: True
	compile: False # set it to True for better memory and performance

	# Training environment
	device: cuda

	# Memory management
	enable_activation_checkpointing: True

	# Reduced precision
	dtype: bf16

	# Logging
	metric_logger:
	_component_: torchtune.training.metric_logging.DiskLogger
	log_dir: ${path_model_finetuned}/logs
	output_dir: ${path_model_finetuned}
	log_every_n_steps: 1
	log_peak_memory_stats: False

	# Profiler (disabled)
	profiler:
	_component_: torchtune.training.setup_torch_profiler
	enabled: False

	#Output directory of trace artifacts
	output_dir: ${path_model_finetuned}/profiling_outputs

	#`torch.profiler.ProfilerActivity` types to trace
	cpu: True
	cuda: True

	#trace options passed to `torch.profiler.profile`
	profile_memory: True
	with_stack: False
	record_shapes: True
	with_flops: False

	# `torch.profiler.schedule` options:
	# wait_steps -> wait, warmup_steps -> warmup, active_steps -> active, num_cycles -> repeat
	wait_steps: 1
	warmup_steps: 2
	active_steps: 1
	num_cycles: 1
No results found