# Chat configuration section. chat: # Predefined setting or environment that influences the behavior and responses of # the chat assistant. Each context is associated with a specific prompt that # guides the assistant on how to respond to user inputs. Available contexts: # default, cli_helper. # Default: default context: default # Directory where chat logs are stored. # Default: /mnt/.local/share/instructlab/chatlogs logs_dir: /mnt/.local/share/instructlab/chatlogs # The maximum number of tokens that can be generated in the chat completion. Be # aware that larger values use more memory. # Default: None max_tokens: # Model to be used for chatting with. # Default: /mnt/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf model: /mnt/.cache/instructlab/models/granite-3.1-8b-lab-v2 # Filepath of a dialog session file. # Default: None session: # Controls the randomness of the model's responses. Lower values make the output # more deterministic, while higher values produce more random results. # Default: 1.0 temperature: 1.0 # Enable vim keybindings for chat. # Default: False vi_mode: false # Renders vertical overflow if enabled, displays ellipses otherwise. # Default: True visible_overflow: true # Evaluate configuration section. evaluate: # Base taxonomy branch # Default: None base_branch: # Base model to compare with 'model' for mt_bench_branch and mmlu_branch. # Default: instructlab/granite-7b-lab base_model: /mnt/.cache/instructlab/models/granite-3.1-8b-starter-v2 # Taxonomy branch containing custom skills/knowledge that should be used for # evaluation runs. # Default: None branch: # Settings to run DK-Bench against a file of user created questions, reference # answers, and responses. If responses are not provided they are generated from a # model dk_bench: # File with questions and reference answers used for evaluation during DK-Bench. # The file must be valid a '.jsonl' file with the fields 'user_input' and # 'reference' in each entry # Default: None input_questions: # Judge model for DK-Bench. # Default: gpt-4o judge_model: gpt-4o # Directory where DK-Bench evaluation results are stored. # Default: /mnt/.local/share/instructlab/internal/eval_data/dk_bench output_dir: /mnt/.local/share/instructlab/internal/eval_data/dk_bench # Comma-separated list of file formats for results of the DK-Bench evaluation. Ex: # 'csv,jsonl'. Valid options in the list are csv, jsonl, and xlsx. If this option # is not provided the results are written as a .jsonl file # Default: jsonl output_file_formats: jsonl # Number of GPUs to use for running evaluation. # Default: None gpus: 8 # MMLU benchmarking settings mmlu: # Batch size for evaluation. Valid values are a positive integer or 'auto' to # select the largest batch size that will fit in memory. # Default: auto batch_size: auto # Number of question-answer pairs provided in the context preceding the question # used for evaluation. # Default: 5 few_shots: 5 # Settings to run MMLU against a branch of taxonomy containing custom # skills/knowledge used for training. mmlu_branch: # Directory where custom MMLU tasks are stored. # Default: /mnt/.local/share/instructlab/datasets tasks_dir: /mnt/.local/share/instructlab/datasets # Model to be evaluated # Default: None model: # Multi-turn benchmarking settings for skills. mt_bench: # Judge model for MT-Bench. # Default: prometheus-eval/prometheus-8x7b-v2.0 judge_model: /mnt/.cache/instructlab/models/prometheus-8x7b-v2-0 # Number of workers to use for evaluation with mt_bench or mt_bench_branch. Must # be a positive integer or 'auto'. # Default: auto max_workers: auto # Directory where MT-Bench evaluation results are stored. # Default: /mnt/.local/share/instructlab/internal/eval_data/mt_bench output_dir: /mnt/.local/share/instructlab/internal/eval_data/mt_bench # Settings to run MT-Bench against a branch of taxonomy containing custom # skills/knowledge used for training mt_bench_branch: # Judge model for MT-Bench-Branch. # Default: prometheus-eval/prometheus-8x7b-v2.0 judge_model: /mnt/.cache/instructlab/models/prometheus-8x7b-v2-0 # Directory where MT-Bench-Branch evaluation results are stored. # Default: /mnt/.local/share/instructlab/internal/eval_data/mt_bench_branch output_dir: /mnt/.local/share/instructlab/internal/eval_data/mt_bench_branch # Path to where base taxonomy is stored. # Default: /mnt/.local/share/instructlab/taxonomy taxonomy_path: /mnt/.local/share/instructlab/taxonomy # System prompt for model getting responses during DK-Bench. # Default: None system_prompt: # Temperature for model getting responses during DK-Bench. Temperature controls # the randomness of the model's responses. Lower values make the output more # deterministic, while higher values produce more random results. # Default: 0.0 temperature: 0.0 # General configuration section. general: # Debug level for logging. # Default: 0 debug_level: 0 # Log format. https://docs.python.org/3/library/logging.html#logrecord-attributes # Default: %(levelname)s %(asctime)s %(name)s:%(lineno)d: %(message)s log_format: '%(levelname)s %(asctime)s %(name)s:%(lineno)d: %(message)s' # Log level for logging. # Default: INFO log_level: INFO # ID of the student model to be used for training. # Default: None student_model_id: granite-3.1-starter-v2 # ID of the teacher model to be used for data generation. # Default: None teacher_model_id: # Use legacy IBM Granite chat template (default uses 3.0 Instruct template) # Default: False use_legacy_tmpl: false # Generate configuration section. generate: # Number of Batches to send for generation on each core. # Default: 8 batch_size: 256 # Maximum number of words per chunk. # Default: 1000 chunk_word_count: 1000 # The maximum amount of tokens for the model to generate during knowledge # generation. A lower number yields less data but a faster SDG run. It is # reccomended to use this on consumer hardware # Default: 4096 max_num_tokens: 4096 # Teacher model that will be used to synthetically generate training data. # Default: /mnt/.cache/instructlab/models/mistral-7b-instruct-v0.2.Q4_K_M.gguf model: /mnt/.cache/instructlab/models/mixtral-8x7b-instruct-v0-1 # Number of CPU cores to use for generation. # Default: 10 num_cpus: 2 # Number of instructions to use # Default: -1 # Deprecated: see 'sdg_scale_factor' instead num_instructions: -1 # Directory where generated datasets are stored. # Default: /mnt/.local/share/instructlab/datasets output_dir: /mnt/.local/share/instructlab/datasets # Data generation pipeline to use. Available: 'simple', 'full', or a valid path to # a directory of pipeline workflow YAML files. Note that 'full' requires a larger # teacher model, Mixtral-8x7b. # Default: full pipeline: /usr/share/instructlab/sdg/pipelines/agentic # The total number of instructions to be generated. # Default: 30 sdg_scale_factor: 30 # Branch of taxonomy used to calculate diff against. # Default: origin/main taxonomy_base: empty # Directory where taxonomy is stored and accessed from. # Default: /mnt/.local/share/instructlab/taxonomy taxonomy_path: /mnt/.local/share/instructlab/taxonomy # Teacher configuration teacher: # Serving backend to use to host the model. # Default: None # Examples: # - vllm # - llama-cpp backend: vllm # Chat template to supply to the model. Possible values: 'auto'(default), # 'tokenizer', a path to a jinja2 file. # Default: None # Examples: # - auto # - tokenizer # - A filesystem path expressing the location of a custom template chat_template: tokenizer # llama-cpp serving settings. llama_cpp: # Number of model layers to offload to GPU. -1 means all layers. # Default: -1 gpu_layers: -1 # Large Language Model Family # Default: '' # Examples: # - granite # - mixtral llm_family: '' # Maximum number of tokens that can be processed by the model. # Default: 4096 max_ctx_size: 4096 # Directory where model to be served is stored. # Default: /mnt/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf model_path: /mnt/.cache/instructlab/models/mixtral-8x7b-instruct-v0-1 # Server configuration including host and port. # Default: host='127.0.0.1' port=8000 backend_type='' current_max_ctx_size=4096 server: # Backend Instance Type # Default: '' # Examples: # - llama-cpp # - vllm backend_type: '' # Maximum number of tokens that can be processed by the currently served model. # Default: 4096 current_max_ctx_size: 4096 # Host to serve on. # Default: 127.0.0.1 host: 127.0.0.1 # Port to serve on. # Default: 8000 port: 8000 # vLLM serving settings. vllm: # Number of GPUs to use. # Default: None gpus: 8 # Large Language Model Family # Default: '' # Examples: # - granite # - mixtral llm_family: mixtral # Maximum number of attempts to start the vLLM server. # Default: 120 max_startup_attempts: 120 vllm_args: - --max-num-seqs - '512' - --enable-lora - --enable-prefix-caching - --max-lora-rank - '64' - --dtype - bfloat16 - --lora-dtype - bfloat16 - --fully-sharded-loras - --lora-modules - skill-classifier-v3-clm=/mnt/.cache/instructlab/models/skills-adapter-v3 - text-classifier-knowledge-v3-clm=/mnt/.cache/instructlab/models/knowledge-adapter-v3 # Metadata pertaining to the specifics of the system which the Configuration is # meant to be applied to. metadata: # Manufacturer, Family, and SKU of the system CPU, ex: Apple M3 Max # Default: None cpu_info: # Amount of GPUs on the system, ex: 8 # Default: None gpu_count: 8 # Family of the system GPU, ex: H100 # Default: None gpu_family: MI300X # Manufacturer of the system GPU, ex: Nvidia # Default: None gpu_manufacturer: AMD # Specific SKU related information about the given GPU, ex: PCIe, NVL # Default: None gpu_sku: models: - # Family the model belongs to. family: llama # Internal ID referring to a particular model from the list. id: llama-3.3 # Path to where the model can be found. Can either be a HF reference or local # filepath. path: /mnt/.cache/instructlab/models/meta-llama/Llama-3.3-70B-Instruct # The initial message used to prompt the conversation with this model. E.g. "You # are a helfpul AI assistant..." # Default: None system_prompt: - # Family the model belongs to. family: granite # Internal ID referring to a particular model from the list. id: granite-3.1-starter-v2 # Path to where the model can be found. Can either be a HF reference or local # filepath. path: /mnt/.cache/instructlab/models/granite-3.1-8b-starter-v2 # The initial message used to prompt the conversation with this model. E.g. "You # are a helfpul AI assistant..." # Default: None system_prompt: You are a Red Hat® Instruct Model, an AI language model developed by Red Hat and IBM Research based on the granite-3.1-8b-base model. Your primary role is to serve as a chat assistant. # RAG configuration section. rag: # RAG convert configuration section. convert: # Directory where converted documents are stored. # Default: /mnt/.local/share/instructlab/converted_documents output_dir: /mnt/.local/share/instructlab/converted_documents # Branch of taxonomy used to calculate diff against. # Default: origin/main taxonomy_base: origin/main # Directory where taxonomy is stored and accessed from. # Default: /mnt/.local/share/instructlab/taxonomy taxonomy_path: /mnt/.local/share/instructlab/taxonomy # Document store configuration for RAG. document_store: # Document store collection name. # Default: ilab collection_name: ilab # Document store service URI. # Default: /mnt/.local/share/instructlab/embeddings.db uri: /mnt/.local/share/instructlab/embeddings.db # Embedding model configuration for RAG embedding_model: # Embedding model to use for RAG. # Default: /mnt/.cache/instructlab/models/ibm-granite/granite-embedding-125m-english embedding_model_path: /mnt/.cache/instructlab/models/ibm-granite/granite-embedding-125m-english # Flag for enabling RAG functionality. # Default: False enabled: false # Retrieval configuration parameters for RAG retriever: # The maximum number of documents to retrieve. # Default: 3 top_k: 3 # Serve configuration section. serve: # Serving backend to use to host the model. # Default: None # Examples: # - vllm # - llama-cpp backend: vllm # Chat template to supply to the model. Possible values: 'auto'(default), # 'tokenizer', a path to a jinja2 file. # Default: None # Examples: # - auto # - tokenizer # - A filesystem path expressing the location of a custom template chat_template: auto # llama-cpp serving settings. llama_cpp: # Number of model layers to offload to GPU. -1 means all layers. # Default: -1 gpu_layers: -1 # Large Language Model Family # Default: '' # Examples: # - granite # - mixtral llm_family: '' # Maximum number of tokens that can be processed by the model. # Default: 4096 max_ctx_size: 4096 # Directory where model to be served is stored. # Default: /mnt/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf model_path: /mnt/.cache/instructlab/models/granite-3.1-8b-lab-v2 # Server configuration including host and port. # Default: host='127.0.0.1' port=8000 backend_type='' current_max_ctx_size=4096 server: # Backend Instance Type # Default: '' # Examples: # - llama-cpp # - vllm backend_type: '' # Maximum number of tokens that can be processed by the currently served model. # Default: 4096 current_max_ctx_size: 4096 # Host to serve on. # Default: 127.0.0.1 host: 127.0.0.1 # Port to serve on. # Default: 8000 port: 8000 # vLLM serving settings. vllm: # Number of GPUs to use. # Default: None gpus: 8 # Large Language Model Family # Default: '' # Examples: # - granite # - mixtral llm_family: '' # Maximum number of attempts to start the vLLM server. # Default: 120 max_startup_attempts: 120 vllm_args: - --tensor-parallel-size - '8' # Train configuration section. train: # Additional arguments to pass to the training script. These arguments are passed # as key-value pairs to the training script. # Default: {} additional_args: learning_rate: 6e-6 lora_alpha: 32 lora_dropout: 0.1 warmup_steps: 25 use_dolomite: false # Save a checkpoint at the end of each epoch. # Default: True checkpoint_at_epoch: true # Directory where periodic training checkpoints are stored. # Default: /mnt/.local/share/instructlab/checkpoints ckpt_output_dir: /mnt/.local/share/instructlab/checkpoints # Directory where the processed training data is stored (post # filtering/tokenization/masking). # Default: /mnt/.local/share/instructlab/internal data_output_dir: /mnt/.local/share/instructlab/internal # For the training library (pipelines 'full' or 'accelerated'), this must specify # the path to the dataset '.jsonl' file. For legacy training (pipeline 'simple'), # this specifies the path to the directory. # Default: /mnt/.local/share/instructlab/datasets data_path: /mnt/.local/share/instructlab/datasets # Allow CPU offload for deepspeed optimizer. # Default: False deepspeed_cpu_offload_optimizer: false # PyTorch device to use. Use 'cpu' for 'simple' and 'full' training on Linux. Use # 'mps' for 'full' training on MacOS Metal Performance Shader. Use 'cuda' for # Nvidia CUDA / AMD ROCm GPUs. Use 'hpu' for Intel Gaudi GPUs. # Default: cpu # Examples: # - cpu # - mps # - cuda # - hpu device: cuda # Whether or not we should disable the use of flash-attention during training. # This is useful when using older GPUs. # Default: False disable_flash_attn: false # Pick a distributed training backend framework for GPU accelerated full fine- # tuning. # Default: fsdp distributed_backend: fsdp # The number of samples in a batch that the model should see before its parameters # are updated. # Default: 64 effective_batch_size: 128 # Allow CPU offload for FSDP optimizer. # Default: False fsdp_cpu_offload_optimizer: false # Boolean to indicate if the model being trained is a padding-free transformer # model such as Granite. # Default: False is_padding_free: false # The data type for quantization in LoRA training. Valid options are 'None' and # 'nf4'. # Default: nf4 # Examples: # - nf4 lora_quantize_dtype: # Rank of low rank matrices to be used during training. # Default: 0 lora_rank: 0 # Maximum tokens per gpu for each batch that will be handled in a single step. If # running into out-of-memory errors, this value can be lowered but not below the # `max_seq_len`. # Default: 5000 max_batch_len: 120000 # Maximum sequence length to be included in the training set. Samples exceeding # this length will be dropped. # Default: 4096 max_seq_len: 10000 # Directory where the model to be trained is stored. # Default: instructlab/granite-7b-lab model_path: /mnt/.cache/instructlab/models/granite-3.1-8b-starter-v2 # Number of GPUs to use for training. This value is not supported in legacy # training or MacOS. # Default: 1 nproc_per_node: 8 # Number of epochs to run training for. # Default: 10 num_epochs: 8 # Base directory for organization of end-to-end intermediate outputs. # Default: /mnt/.local/share/instructlab/phased phased_base_dir: /mnt/.local/share/instructlab/phased # Judge model path for phased MT-Bench evaluation. # Default: /mnt/.cache/instructlab/models/prometheus-eval/prometheus-8x7b-v2.0 phased_mt_bench_judge: /mnt/.cache/instructlab/models/prometheus-8x7b-v2-0 # Phased phase1 effective batch size. # Default: 128 phased_phase1_effective_batch_size: 128 # Learning rate for phase1 knowledge training. # Default: 2e-05 phased_phase1_learning_rate: 2e-05 # Number of epochs to run training for during phase1 (experimentally optimal # number is 7). # Default: 7 phased_phase1_num_epochs: 7 # Number of samples the model should see before saving a checkpoint during phase1. # Disabled when set to 0. # Default: 0 phased_phase1_samples_per_save: 0 # Phased phase2 effective batch size. # Default: 3840 phased_phase2_effective_batch_size: 3840 # Learning rate for phase2 skills training. # Default: 6e-06 phased_phase2_learning_rate: 6e-06 # Number of epochs to run training for during phase2. # Default: 10 phased_phase2_num_epochs: 10 # Number of samples the model should see before saving a checkpoint during phase2. # Disabled when set to 0. # Default: 0 phased_phase2_samples_per_save: 0 # Training pipeline to use. Simple is for systems with limited resources, full is # for more capable consumer systems (64 GB of RAM), and accelerated is for systems # with a dedicated GPU. # Default: full # Examples: # - simple # - full # - accelerated pipeline: accelerated # Number of samples the model should see before saving a checkpoint. # Default: 250000 save_samples: 0 # Optional path to a yaml file that tracks the progress of multiphase training. # Default: None training_journal: # Configuration file structure version. # Default: 1.0.0 version: 1.0.0