Back to Blog
4 min read

PEFT Libraries: Practical Tools for Efficient Fine-Tuning

Several libraries make PEFT accessible to practitioners. Today we’ll explore the major PEFT libraries and their practical usage.

Hugging Face PEFT Library

# Install
# pip install peft transformers accelerate

from peft import (
    get_peft_model,
    LoraConfig,
    TaskType,
    PeftModel,
    prepare_model_for_kbit_training
)
from transformers import AutoModelForCausalLM, AutoTokenizer

# Basic workflow
model_name = "meta-llama/Llama-2-7b-hf"

# 1. Load base model
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 2. Configure PEFT method
config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj"]
)

# 3. Create PEFT model
peft_model = get_peft_model(model, config)

# 4. Train (using standard training loop or Trainer)
# 5. Save adapter
peft_model.save_pretrained("./my_adapter")

PEFT Configuration Options

from peft import (
    LoraConfig,
    PrefixTuningConfig,
    PromptTuningConfig,
    AdaLoraConfig,
    IA3Config
)

# LoRA Configuration
lora_config = LoraConfig(
    r=8,                           # Rank
    lora_alpha=32,                 # Scaling
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",                   # Don't train biases
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False           # Training mode
)

# Prefix Tuning Configuration
prefix_config = PrefixTuningConfig(
    task_type=TaskType.SEQ_2_SEQ_LM,
    num_virtual_tokens=20,
    encoder_hidden_size=128,
    prefix_projection=True
)

# Prompt Tuning Configuration
prompt_config = PromptTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    num_virtual_tokens=8,
    prompt_tuning_init="TEXT",
    prompt_tuning_init_text="Classify if this is positive or negative:",
    tokenizer_name_or_path=model_name
)

# AdaLoRA Configuration (Adaptive LoRA)
adalora_config = AdaLoraConfig(
    init_r=12,                     # Initial rank
    target_r=8,                    # Target rank
    beta1=0.85,
    beta2=0.85,
    tinit=200,
    tfinal=1000,
    deltaT=10,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj"],
    task_type=TaskType.CAUSAL_LM
)

# IA3 Configuration
ia3_config = IA3Config(
    task_type=TaskType.CAUSAL_LM,
    target_modules=["k_proj", "v_proj", "down_proj"],
    feedforward_modules=["down_proj"]
)

Training with Hugging Face Trainer

from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling
from datasets import load_dataset

# Load and prepare dataset
dataset = load_dataset("json", data_files="train.jsonl")

def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        max_length=512,
        padding="max_length"
    )

tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=dataset["train"].column_names
)

# Data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  # Causal LM
)

# Training arguments
training_args = TrainingArguments(
    output_dir="./peft_output",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    warmup_ratio=0.1,
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    optim="adamw_torch",
    report_to="none"  # or "wandb", "tensorboard"
)

# Create trainer
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    data_collator=data_collator
)

# Train
trainer.train()

Loading and Using Adapters

from peft import PeftModel, PeftConfig

# Method 1: Load adapter onto base model
base_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
peft_model = PeftModel.from_pretrained(base_model, "./my_adapter")

# Method 2: Auto-detect and load
config = PeftConfig.from_pretrained("./my_adapter")
base_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
peft_model = PeftModel.from_pretrained(base_model, "./my_adapter")

# Method 3: From Hub
peft_model = PeftModel.from_pretrained(base_model, "username/my-adapter")

# Merge adapter into base model
merged_model = peft_model.merge_and_unload()
merged_model.save_pretrained("./merged_model")

Using with bitsandbytes

from transformers import BitsAndBytesConfig
import torch

# 4-bit quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

# Load quantized model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

# Prepare for k-bit training
model = prepare_model_for_kbit_training(model)

# Apply PEFT
peft_model = get_peft_model(model, lora_config)

Adapter Fusion

# Combine multiple adapters
from peft import PeftModel

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(model_name)

# Load first adapter
model = PeftModel.from_pretrained(base_model, "./adapter1", adapter_name="task1")

# Load additional adapters
model.load_adapter("./adapter2", adapter_name="task2")
model.load_adapter("./adapter3", adapter_name="task3")

# List loaded adapters
print(model.peft_config.keys())  # ['task1', 'task2', 'task3']

# Switch active adapter
model.set_adapter("task1")
output1 = model.generate(...)

model.set_adapter("task2")
output2 = model.generate(...)

# Combine adapters with weighted merging
model.add_weighted_adapter(
    adapters=["task1", "task2"],
    weights=[0.7, 0.3],
    adapter_name="merged"
)
model.set_adapter("merged")

Integration with TRL

# TRL (Transformer Reinforcement Learning) integration
from trl import SFTTrainer
from peft import LoraConfig

# Configure LoRA
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# SFT Trainer handles PEFT automatically
trainer = SFTTrainer(
    model=model_name,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=512,
    args=TrainingArguments(
        output_dir="./sft_output",
        per_device_train_batch_size=4,
        num_train_epochs=3,
        learning_rate=2e-4
    )
)

trainer.train()

Debugging PEFT Models

# Check trainable parameters
def print_trainable_params(model):
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total = sum(p.numel() for p in model.parameters())
    print(f"Trainable: {trainable:,} / {total:,} = {trainable/total:.2%}")

print_trainable_params(peft_model)

# Inspect PEFT configuration
print(peft_model.peft_config)

# Check which modules have PEFT applied
for name, module in peft_model.named_modules():
    if "lora" in name.lower():
        print(f"LoRA module: {name}")

# Get adapter state dict
adapter_state = peft_model.get_adapter_state_dict("default")
print(f"Adapter parameters: {len(adapter_state)}")

Tomorrow we’ll explore Hugging Face integration with Azure.

Resources

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.