Back to Blog
2 min read

Fine-Tuning with LoRA: Efficient Model Customization

Low-Rank Adaptation (LoRA) revolutionizes fine-tuning by training only a small number of parameters while keeping base model weights frozen. This approach reduces compute costs by 10x while achieving comparable results to full fine-tuning.

Understanding LoRA

Instead of updating all model weights, LoRA injects trainable low-rank matrices into transformer layers. These adapters capture task-specific knowledge without modifying the original model.

from peft import LoraConfig, get_peft_model, TaskType
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from datasets import load_dataset
import torch

# Load base model
model_name = "microsoft/phi-2"
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

# Configure LoRA
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=16,  # Rank of update matrices
    lora_alpha=32,  # Scaling factor
    lora_dropout=0.1,
    target_modules=["q_proj", "k_proj", "v_proj", "dense"],
    bias="none"
)

# Apply LoRA to model
model = get_peft_model(base_model, lora_config)

# Check trainable parameters
def print_trainable_parameters(model):
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total = sum(p.numel() for p in model.parameters())
    print(f"Trainable: {trainable:,} / {total:,} ({100 * trainable / total:.2f}%)")

print_trainable_parameters(model)
# Output: Trainable: 4,194,304 / 2,780,000,000 (0.15%)

Training with LoRA

from trl import SFTTrainer

def prepare_dataset(examples):
    """Format examples for instruction fine-tuning."""
    prompts = []
    for instruction, response in zip(examples["instruction"], examples["response"]):
        prompt = f"### Instruction:\n{instruction}\n\n### Response:\n{response}"
        prompts.append(prompt)
    return {"text": prompts}

# Load and prepare dataset
dataset = load_dataset("json", data_files="training_data.json")
dataset = dataset.map(prepare_dataset, batched=True)

# Training configuration
training_args = TrainingArguments(
    output_dir="./lora-output",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    warmup_steps=100,
    logging_steps=10,
    save_strategy="epoch",
    fp16=True
)

# Initialize trainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    tokenizer=tokenizer,
    dataset_text_field="text",
    max_seq_length=512
)

# Train
trainer.train()

# Save LoRA adapters (small file!)
model.save_pretrained("./lora-adapters")

Merging and Deployment

from peft import PeftModel

# Load base model and merge LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(model_name)
lora_model = PeftModel.from_pretrained(base_model, "./lora-adapters")

# Merge adapters into base model
merged_model = lora_model.merge_and_unload()

# Save merged model for deployment
merged_model.save_pretrained("./merged-model")

LoRA enables rapid experimentation with model customization. Train multiple adapters for different tasks and swap them at inference time without reloading the base model.

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.