Back to Blog
3 min read

Transformers Library: The Swiss Army Knife of NLP

The Hugging Face Transformers library provides a unified API for working with thousands of pre-trained models. Today we’ll explore its core features and patterns.

Core Concepts

# Transformers library components
components = {
    "models": "Pre-trained model architectures",
    "tokenizers": "Text preprocessing",
    "pipelines": "High-level inference API",
    "trainers": "Training utilities",
    "configs": "Model configurations"
}

# Installation
# pip install transformers torch
# pip install transformers[torch]  # With PyTorch
# pip install transformers[tf]     # With TensorFlow

Pipelines: Easy Inference

from transformers import pipeline

# Text generation
generator = pipeline("text-generation", model="gpt2")
result = generator("The future of AI is", max_length=50, num_return_sequences=2)

# Sentiment analysis
classifier = pipeline("sentiment-analysis")
result = classifier("I love this product!")
# [{'label': 'POSITIVE', 'score': 0.9998}]

# Named entity recognition
ner = pipeline("ner", aggregation_strategy="simple")
result = ner("Microsoft is based in Redmond, Washington")

# Question answering
qa = pipeline("question-answering")
result = qa(question="What is the capital?", context="France is a country. Paris is its capital.")

# Summarization
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
result = summarizer(long_text, max_length=100, min_length=30)

# Translation
translator = pipeline("translation_en_to_fr")
result = translator("Hello, how are you?")

# Zero-shot classification
classifier = pipeline("zero-shot-classification")
result = classifier(
    "I need to book a flight to Paris",
    candidate_labels=["travel", "cooking", "technology"]
)

Working with Models and Tokenizers

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load tokenizer and model
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Tokenization
text = "Hello, how are you?"
tokens = tokenizer(text, return_tensors="pt")
print(tokens)
# {'input_ids': tensor([[15496,    11,   703,   389,   345,    30]]),
#  'attention_mask': tensor([[1, 1, 1, 1, 1, 1]])}

# Decode back to text
decoded = tokenizer.decode(tokens["input_ids"][0])

# Batch tokenization
texts = ["First sentence", "Second sentence"]
batch_tokens = tokenizer(
    texts,
    padding=True,
    truncation=True,
    max_length=512,
    return_tensors="pt"
)

# Generation
outputs = model.generate(
    tokens["input_ids"],
    max_length=50,
    num_return_sequences=1,
    temperature=0.7,
    do_sample=True
)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

Generation Parameters

# Comprehensive generation configuration
generation_config = {
    # Length control
    "max_length": 100,
    "max_new_tokens": 50,
    "min_length": 10,

    # Sampling strategies
    "do_sample": True,           # Enable sampling
    "temperature": 0.7,          # Randomness (lower = more deterministic)
    "top_k": 50,                 # Top-k sampling
    "top_p": 0.9,                # Nucleus sampling

    # Beam search
    "num_beams": 4,              # Beam search beams
    "early_stopping": True,

    # Repetition control
    "repetition_penalty": 1.2,
    "no_repeat_ngram_size": 3,

    # Output control
    "num_return_sequences": 3,
    "return_dict_in_generate": True,
    "output_scores": True
}

outputs = model.generate(**tokens, **generation_config)

Different Model Types

from transformers import (
    AutoModelForCausalLM,           # GPT-style generation
    AutoModelForSeq2SeqLM,          # T5-style encoder-decoder
    AutoModelForSequenceClassification,
    AutoModelForTokenClassification,
    AutoModelForQuestionAnswering,
    AutoModelForMaskedLM
)

# Causal LM (GPT, LLaMA, etc.)
model = AutoModelForCausalLM.from_pretrained("gpt2")

# Seq2Seq (T5, BART for generation)
model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")

# Classification
model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=3
)

# Token classification (NER)
model = AutoModelForTokenClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=9
)

Training with Trainer

from transformers import Trainer, TrainingArguments
from datasets import load_dataset

# Load dataset
dataset = load_dataset("imdb")

# Tokenize
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=512
    )

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=100,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True
)

# Create trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer
)

# Train
trainer.train()

# Evaluate
results = trainer.evaluate()

Saving and Loading

# Save model and tokenizer
model.save_pretrained("./my_model")
tokenizer.save_pretrained("./my_model")

# Load later
model = AutoModelForCausalLM.from_pretrained("./my_model")
tokenizer = AutoTokenizer.from_pretrained("./my_model")

# Push to Hub
model.push_to_hub("username/my-model")
tokenizer.push_to_hub("username/my-model")

Memory Optimization

import torch

# Half precision
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16
)

# Device mapping
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto"  # Automatic placement
)

# Memory efficient loading
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True
)

# Gradient checkpointing (for training)
model.gradient_checkpointing_enable()

Tomorrow we’ll explore the Accelerate library for distributed training.

Resources

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.