# Chat configuration section.
chat:
  # Predefined setting or environment that influences the behavior and responses of
  # the chat assistant. Each context is associated with a specific prompt that
  # guides the assistant on how to respond to user inputs. Available contexts:
  # default, cli_helper.
  # Default: default
  context: default
  # Directory where chat logs are stored.
  # Default: /mnt/.local/share/instructlab/chatlogs
  logs_dir: /mnt/.local/share/instructlab/chatlogs
  # The maximum number of tokens that can be generated in the chat completion. Be
  # aware that larger values use more memory.
  # Default: None
  max_tokens:
  # Model to be used for chatting with.
  # Default: /mnt/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf
  model: /mnt/.cache/instructlab/models/granite-3.1-8b-lab-v2
  # Filepath of a dialog session file.
  # Default: None
  session:
  # Controls the randomness of the model's responses. Lower values make the output
  # more deterministic, while higher values produce more random results.
  # Default: 1.0
  temperature: 1.0
  # Enable vim keybindings for chat.
  # Default: False
  vi_mode: false
  # Renders vertical overflow if enabled, displays ellipses otherwise.
  # Default: True
  visible_overflow: true
# Evaluate configuration section.
evaluate:
  # Base taxonomy branch
  # Default: None
  base_branch:
  # Base model to compare with 'model' for mt_bench_branch and mmlu_branch.
  # Default: instructlab/granite-7b-lab
  base_model: /mnt/.cache/instructlab/models/granite-3.1-8b-starter-v2
  # Taxonomy branch containing custom skills/knowledge that should be used for
  # evaluation runs.
  # Default: None
  branch:
  # Settings to run DK-Bench against a file of user created questions, reference
  # answers, and responses. If responses are not provided they are generated from a
  # model
  dk_bench:
    # File with questions and reference answers used for evaluation during DK-Bench.
    # The file must be valid a '.jsonl' file with the fields 'user_input' and
    # 'reference' in each entry
    # Default: None
    input_questions:
    # Judge model for DK-Bench.
    # Default: gpt-4o
    judge_model: gpt-4o
    # Directory where DK-Bench evaluation results are stored.
    # Default: /mnt/.local/share/instructlab/internal/eval_data/dk_bench
    output_dir: /mnt/.local/share/instructlab/internal/eval_data/dk_bench
    # Comma-separated list of file formats for results of the DK-Bench evaluation. Ex:
    # 'csv,jsonl'. Valid options in the list are csv, jsonl, and xlsx. If this option
    # is not provided the results are written as a .jsonl file
    # Default: jsonl
    output_file_formats: jsonl
  # Number of GPUs to use for running evaluation.
  # Default: None
  gpus: 8
  # MMLU benchmarking settings
  mmlu:
    # Batch size for evaluation. Valid values are a positive integer or 'auto' to
    # select the largest batch size that will fit in memory.
    # Default: auto
    batch_size: auto
    # Number of question-answer pairs provided in the context preceding the question
    # used for evaluation.
    # Default: 5
    few_shots: 5
  # Settings to run MMLU against a branch of taxonomy containing custom
  # skills/knowledge used for training.
  mmlu_branch:
    # Directory where custom MMLU tasks are stored.
    # Default: /mnt/.local/share/instructlab/datasets
    tasks_dir: /mnt/.local/share/instructlab/datasets
  # Model to be evaluated
  # Default: None
  model:
  # Multi-turn benchmarking settings for skills.
  mt_bench:
    # Judge model for MT-Bench.
    # Default: prometheus-eval/prometheus-8x7b-v2.0
    judge_model: /mnt/.cache/instructlab/models/prometheus-8x7b-v2-0
    # Number of workers to use for evaluation with mt_bench or mt_bench_branch. Must
    # be a positive integer or 'auto'.
    # Default: auto
    max_workers: auto
    # Directory where MT-Bench evaluation results are stored.
    # Default: /mnt/.local/share/instructlab/internal/eval_data/mt_bench
    output_dir: /mnt/.local/share/instructlab/internal/eval_data/mt_bench
  # Settings to run MT-Bench against a branch of taxonomy containing custom
  # skills/knowledge used for training
  mt_bench_branch:
    # Judge model for MT-Bench-Branch.
    # Default: prometheus-eval/prometheus-8x7b-v2.0
    judge_model: /mnt/.cache/instructlab/models/prometheus-8x7b-v2-0
    # Directory where MT-Bench-Branch evaluation results are stored.
    # Default: /mnt/.local/share/instructlab/internal/eval_data/mt_bench_branch
    output_dir: /mnt/.local/share/instructlab/internal/eval_data/mt_bench_branch
    # Path to where base taxonomy is stored.
    # Default: /mnt/.local/share/instructlab/taxonomy
    taxonomy_path: /mnt/.local/share/instructlab/taxonomy
  # System prompt for model getting responses during DK-Bench.
  # Default: None
  system_prompt:
  # Temperature for model getting responses during DK-Bench. Temperature controls
  # the randomness of the model's responses. Lower values make the output more
  # deterministic, while higher values produce more random results.
  # Default: 0.0
  temperature: 0.0
# General configuration section.
general:
  # Debug level for logging.
  # Default: 0
  debug_level: 0
  # Log format. https://docs.python.org/3/library/logging.html#logrecord-attributes
  # Default: %(levelname)s %(asctime)s %(name)s:%(lineno)d: %(message)s
  log_format: '%(levelname)s %(asctime)s %(name)s:%(lineno)d: %(message)s'
  # Log level for logging.
  # Default: INFO
  log_level: INFO
  # ID of the student model to be used for training.
  # Default: None
  student_model_id: granite-3.1-starter-v2
  # ID of the teacher model to be used for data generation.
  # Default: None
  teacher_model_id:
  # Use legacy IBM Granite chat template (default uses 3.0 Instruct template)
  # Default: False
  use_legacy_tmpl: false
# Generate configuration section.
generate:
  # Number of Batches to send for generation on each core.
  # Default: 8
  batch_size: 256
  # Maximum number of words per chunk.
  # Default: 1000
  chunk_word_count: 1000
  # The maximum amount of tokens for the model to generate during knowledge
  # generation. A lower number yields less data but a faster SDG run. It is
  # reccomended to use this on consumer hardware
  # Default: 4096
  max_num_tokens: 4096
  # Teacher model that will be used to synthetically generate training data.
  # Default: /mnt/.cache/instructlab/models/mistral-7b-instruct-v0.2.Q4_K_M.gguf
  model: /mnt/.cache/instructlab/models/mixtral-8x7b-instruct-v0-1
  # Number of CPU cores to use for generation.
  # Default: 10
  num_cpus: 2
  # Number of instructions to use
  # Default: -1
  # Deprecated: see 'sdg_scale_factor' instead
  num_instructions: -1
  # Directory where generated datasets are stored.
  # Default: /mnt/.local/share/instructlab/datasets
  output_dir: /mnt/.local/share/instructlab/datasets
  # Data generation pipeline to use. Available: 'simple', 'full', or a valid path to
  # a directory of pipeline workflow YAML files. Note that 'full' requires a larger
  # teacher model, Mixtral-8x7b.
  # Default: full
  pipeline: /usr/share/instructlab/sdg/pipelines/agentic
  # The total number of instructions to be generated.
  # Default: 30
  sdg_scale_factor: 30
  # Branch of taxonomy used to calculate diff against.
  # Default: origin/main
  taxonomy_base: empty
  # Directory where taxonomy is stored and accessed from.
  # Default: /mnt/.local/share/instructlab/taxonomy
  taxonomy_path: /mnt/.local/share/instructlab/taxonomy
  # Teacher configuration
  teacher:
    # Serving backend to use to host the model.
    # Default: None
    # Examples:
    #   - vllm
    #   - llama-cpp
    backend: vllm
    # Chat template to supply to the model. Possible values: 'auto'(default),
    # 'tokenizer', a path to a jinja2 file.
    # Default: None
    # Examples:
    #   - auto
    #   - tokenizer
    #   - A filesystem path expressing the location of a custom template
    chat_template: tokenizer
    # llama-cpp serving settings.
    llama_cpp:
      # Number of model layers to offload to GPU. -1 means all layers.
      # Default: -1
      gpu_layers: -1
      # Large Language Model Family
      # Default: ''
      # Examples:
      #   - granite
      #   - mixtral
      llm_family: ''
      # Maximum number of tokens that can be processed by the model.
      # Default: 4096
      max_ctx_size: 4096
    # Directory where model to be served is stored.
    # Default: /mnt/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf
    model_path: /mnt/.cache/instructlab/models/mixtral-8x7b-instruct-v0-1
    # Server configuration including host and port.
    # Default: host='127.0.0.1' port=8000 backend_type='' current_max_ctx_size=4096
    server:
      # Backend Instance Type
      # Default: ''
      # Examples:
      #   - llama-cpp
      #   - vllm
      backend_type: ''
      # Maximum number of tokens that can be processed by the currently served model.
      # Default: 4096
      current_max_ctx_size: 4096
      # Host to serve on.
      # Default: 127.0.0.1
      host: 127.0.0.1
      # Port to serve on.
      # Default: 8000
      port: 8000
    # vLLM serving settings.
    vllm:
      # Number of GPUs to use.
      # Default: None
      gpus: 8
      # Large Language Model Family
      # Default: ''
      # Examples:
      #   - granite
      #   - mixtral
      llm_family: mixtral
      # Maximum number of attempts to start the vLLM server.
      # Default: 120
      max_startup_attempts: 120
      vllm_args:
        - --max-num-seqs
        - '512'
        - --enable-lora
        - --enable-prefix-caching
        - --max-lora-rank
        - '64'
        - --dtype
        - bfloat16
        - --lora-dtype
        - bfloat16
        - --fully-sharded-loras
        - --lora-modules
        - skill-classifier-v3-clm=/mnt/.cache/instructlab/models/skills-adapter-v3
        - text-classifier-knowledge-v3-clm=/mnt/.cache/instructlab/models/knowledge-adapter-v3
# Metadata pertaining to the specifics of the system which the Configuration is
# meant to be applied to.
metadata:
  # Manufacturer, Family, and SKU of the system CPU, ex: Apple M3 Max
  # Default: None
  cpu_info:
  # Amount of GPUs on the system, ex: 8
  # Default: None
  gpu_count: 8
  # Family of the system GPU, ex: H100
  # Default: None
  gpu_family: MI300X
  # Manufacturer of the system GPU, ex: Nvidia
  # Default: None
  gpu_manufacturer: AMD
  # Specific SKU related information about the given GPU, ex: PCIe, NVL
  # Default: None
  gpu_sku:
models:
  -
  # Family the model belongs to.
    family: llama
  # Internal ID referring to a particular model from the list.
    id: llama-3.3
  # Path to where the model can be found. Can either be a HF reference or local
  # filepath.
    path: /mnt/.cache/instructlab/models/meta-llama/Llama-3.3-70B-Instruct
  # The initial message used to prompt the conversation with this model. E.g. "You
  # are a helfpul AI assistant..."
  # Default: None
    system_prompt:
  -
  # Family the model belongs to.
    family: granite
  # Internal ID referring to a particular model from the list.
    id: granite-3.1-starter-v2
  # Path to where the model can be found. Can either be a HF reference or local
  # filepath.
    path: /mnt/.cache/instructlab/models/granite-3.1-8b-starter-v2
  # The initial message used to prompt the conversation with this model. E.g. "You
  # are a helfpul AI assistant..."
  # Default: None
    system_prompt: You are a Red Hat® Instruct Model, an AI language model developed
      by Red Hat and IBM Research based on the granite-3.1-8b-base model. Your primary
      role is to serve as a chat assistant.
# RAG configuration section.
rag:
  # RAG convert configuration section.
  convert:
    # Directory where converted documents are stored.
    # Default: /mnt/.local/share/instructlab/converted_documents
    output_dir: /mnt/.local/share/instructlab/converted_documents
    # Branch of taxonomy used to calculate diff against.
    # Default: origin/main
    taxonomy_base: origin/main
    # Directory where taxonomy is stored and accessed from.
    # Default: /mnt/.local/share/instructlab/taxonomy
    taxonomy_path: /mnt/.local/share/instructlab/taxonomy
  # Document store configuration for RAG.
  document_store:
    # Document store collection name.
    # Default: ilab
    collection_name: ilab
    # Document store service URI.
    # Default: /mnt/.local/share/instructlab/embeddings.db
    uri: /mnt/.local/share/instructlab/embeddings.db
  # Embedding model configuration for RAG
  embedding_model:
    # Embedding model to use for RAG.
    # Default: /mnt/.cache/instructlab/models/ibm-granite/granite-embedding-125m-english
    embedding_model_path: /mnt/.cache/instructlab/models/ibm-granite/granite-embedding-125m-english
  # Flag for enabling RAG functionality.
  # Default: False
  enabled: false
  # Retrieval configuration parameters for RAG
  retriever:
    # The maximum number of documents to retrieve.
    # Default: 3
    top_k: 3
# Serve configuration section.
serve:
  # Serving backend to use to host the model.
  # Default: None
  # Examples:
  #   - vllm
  #   - llama-cpp
  backend: vllm
  # Chat template to supply to the model. Possible values: 'auto'(default),
  # 'tokenizer', a path to a jinja2 file.
  # Default: None
  # Examples:
  #   - auto
  #   - tokenizer
  #   - A filesystem path expressing the location of a custom template
  chat_template: auto
  # llama-cpp serving settings.
  llama_cpp:
    # Number of model layers to offload to GPU. -1 means all layers.
    # Default: -1
    gpu_layers: -1
    # Large Language Model Family
    # Default: ''
    # Examples:
    #   - granite
    #   - mixtral
    llm_family: ''
    # Maximum number of tokens that can be processed by the model.
    # Default: 4096
    max_ctx_size: 4096
  # Directory where model to be served is stored.
  # Default: /mnt/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf
  model_path: /mnt/.cache/instructlab/models/granite-3.1-8b-lab-v2
  # Server configuration including host and port.
  # Default: host='127.0.0.1' port=8000 backend_type='' current_max_ctx_size=4096
  server:
    # Backend Instance Type
    # Default: ''
    # Examples:
    #   - llama-cpp
    #   - vllm
    backend_type: ''
    # Maximum number of tokens that can be processed by the currently served model.
    # Default: 4096
    current_max_ctx_size: 4096
    # Host to serve on.
    # Default: 127.0.0.1
    host: 127.0.0.1
    # Port to serve on.
    # Default: 8000
    port: 8000
  # vLLM serving settings.
  vllm:
    # Number of GPUs to use.
    # Default: None
    gpus: 8
    # Large Language Model Family
    # Default: ''
    # Examples:
    #   - granite
    #   - mixtral
    llm_family: ''
    # Maximum number of attempts to start the vLLM server.
    # Default: 120
    max_startup_attempts: 120
    vllm_args:
      - --tensor-parallel-size
      - '8'
# Train configuration section.
train:
  # Additional arguments to pass to the training script. These arguments are passed
  # as key-value pairs to the training script.
  # Default: {}
  additional_args:
    learning_rate: 6e-6
    lora_alpha: 32
    lora_dropout: 0.1
    warmup_steps: 25
    use_dolomite: false
  # Save a checkpoint at the end of each epoch.
  # Default: True
  checkpoint_at_epoch: true
  # Directory where periodic training checkpoints are stored.
  # Default: /mnt/.local/share/instructlab/checkpoints
  ckpt_output_dir: /mnt/.local/share/instructlab/checkpoints
  # Directory where the processed training data is stored (post
  # filtering/tokenization/masking).
  # Default: /mnt/.local/share/instructlab/internal
  data_output_dir: /mnt/.local/share/instructlab/internal
  # For the training library (pipelines 'full' or 'accelerated'), this must specify
  # the path to the dataset '.jsonl' file. For legacy training (pipeline 'simple'),
  # this specifies the path to the directory.
  # Default: /mnt/.local/share/instructlab/datasets
  data_path: /mnt/.local/share/instructlab/datasets
  # Allow CPU offload for deepspeed optimizer.
  # Default: False
  deepspeed_cpu_offload_optimizer: false
  # PyTorch device to use. Use 'cpu' for 'simple' and 'full' training on Linux. Use
  # 'mps' for 'full' training on MacOS Metal Performance Shader. Use 'cuda' for
  # Nvidia CUDA / AMD ROCm GPUs. Use 'hpu' for Intel Gaudi GPUs.
  # Default: cpu
  # Examples:
  #   - cpu
  #   - mps
  #   - cuda
  #   - hpu
  device: cuda
  # Whether or not we should disable the use of flash-attention during training.
  # This is useful when using older GPUs.
  # Default: False
  disable_flash_attn: false
  # Pick a distributed training backend framework for GPU accelerated full fine-
  # tuning.
  # Default: fsdp
  distributed_backend: fsdp
  # The number of samples in a batch that the model should see before its parameters
  # are updated.
  # Default: 64
  effective_batch_size: 128
  # Allow CPU offload for FSDP optimizer.
  # Default: False
  fsdp_cpu_offload_optimizer: false
  # Boolean to indicate if the model being trained is a padding-free transformer
  # model such as Granite.
  # Default: False
  is_padding_free: false
  # The data type for quantization in LoRA training. Valid options are 'None' and
  # 'nf4'.
  # Default: nf4
  # Examples:
  #   - nf4
  lora_quantize_dtype:
  # Rank of low rank matrices to be used during training.
  # Default: 0
  lora_rank: 0
  # Maximum tokens per gpu for each batch that will be handled in a single step. If
  # running into out-of-memory errors, this value can be lowered but not below the
  # `max_seq_len`.
  # Default: 5000
  max_batch_len: 120000
  # Maximum sequence length to be included in the training set. Samples exceeding
  # this length will be dropped.
  # Default: 4096
  max_seq_len: 10000
  # Directory where the model to be trained is stored.
  # Default: instructlab/granite-7b-lab
  model_path: /mnt/.cache/instructlab/models/granite-3.1-8b-starter-v2
  # Number of GPUs to use for training. This value is not supported in legacy
  # training or MacOS.
  # Default: 1
  nproc_per_node: 8
  # Number of epochs to run training for.
  # Default: 10
  num_epochs: 8
  # Base directory for organization of end-to-end intermediate outputs.
  # Default: /mnt/.local/share/instructlab/phased
  phased_base_dir: /mnt/.local/share/instructlab/phased
  # Judge model path for phased MT-Bench evaluation.
  # Default: /mnt/.cache/instructlab/models/prometheus-eval/prometheus-8x7b-v2.0
  phased_mt_bench_judge: /mnt/.cache/instructlab/models/prometheus-8x7b-v2-0
  # Phased phase1 effective batch size.
  # Default: 128
  phased_phase1_effective_batch_size: 128
  # Learning rate for phase1 knowledge training.
  # Default: 2e-05
  phased_phase1_learning_rate: 2e-05
  # Number of epochs to run training for during phase1 (experimentally optimal
  # number is 7).
  # Default: 7
  phased_phase1_num_epochs: 7
  # Number of samples the model should see before saving a checkpoint during phase1.
  # Disabled when set to 0.
  # Default: 0
  phased_phase1_samples_per_save: 0
  # Phased phase2 effective batch size.
  # Default: 3840
  phased_phase2_effective_batch_size: 3840
  # Learning rate for phase2 skills training.
  # Default: 6e-06
  phased_phase2_learning_rate: 6e-06
  # Number of epochs to run training for during phase2.
  # Default: 10
  phased_phase2_num_epochs: 10
  # Number of samples the model should see before saving a checkpoint during phase2.
  # Disabled when set to 0.
  # Default: 0
  phased_phase2_samples_per_save: 0
  # Training pipeline to use. Simple is for systems with limited resources, full is
  # for more capable consumer systems (64 GB of RAM), and accelerated is for systems
  # with a dedicated GPU.
  # Default: full
  # Examples:
  #   - simple
  #   - full
  #   - accelerated
  pipeline: accelerated
  # Number of samples the model should see before saving a checkpoint.
  # Default: 250000
  save_samples: 0
  # Optional path to a yaml file that tracks the progress of multiphase training.
  # Default: None
  training_journal:
# Configuration file structure version.
# Default: 1.0.0
version: 1.0.0