Back to Blog
4 min read

Hugging Face Model Hub: Finding and Using Pre-trained Models

The Hugging Face Model Hub hosts over 200,000 models for various ML tasks. Today we’ll explore how to effectively find, evaluate, and use models from the Hub.

Understanding the Model Hub

# Model Hub statistics (as of August 2023)
hub_stats = {
    "total_models": 200000,
    "model_types": [
        "Text Generation",
        "Text Classification",
        "Token Classification",
        "Question Answering",
        "Translation",
        "Summarization",
        "Image Classification",
        "Object Detection",
        "Speech Recognition",
        "Text-to-Speech"
    ],
    "frameworks": ["PyTorch", "TensorFlow", "JAX", "ONNX"],
    "licenses": ["Apache 2.0", "MIT", "CC", "OpenRAIL", "Custom"]
}

Searching for Models

Using the Hub API

from huggingface_hub import HfApi, ModelFilter

api = HfApi()

# Search for text generation models
models = api.list_models(
    filter=ModelFilter(
        task="text-generation",
        library="transformers"
    ),
    sort="downloads",
    direction=-1,
    limit=10
)

for model in models:
    print(f"{model.modelId}: {model.downloads:,} downloads")

# Search with multiple filters
models = api.list_models(
    filter=ModelFilter(
        task="text-classification",
        language="en",
        trained_dataset="imdb"
    )
)

Filtering by Properties

# Find models by specific criteria
def find_models(task, min_downloads=1000, license_type=None):
    filters = ModelFilter(task=task)

    models = api.list_models(
        filter=filters,
        sort="downloads",
        direction=-1
    )

    results = []
    for model in models:
        if model.downloads >= min_downloads:
            if license_type is None or license_type in str(model.tags):
                results.append({
                    "name": model.modelId,
                    "downloads": model.downloads,
                    "likes": model.likes,
                    "tags": model.tags
                })

    return results

# Example: Find popular summarization models
summarization_models = find_models("summarization", min_downloads=10000)

Evaluating Models

Model Card Information

from huggingface_hub import ModelCard

# Get model card
card = ModelCard.load("facebook/bart-large-cnn")

# Model card contains:
# - Description
# - Intended use
# - Training data
# - Evaluation results
# - Limitations
# - Bias considerations
print(card.content[:1000])  # First 1000 chars

# Get specific metadata
from huggingface_hub import model_info

info = model_info("facebook/bart-large-cnn")
print(f"Pipeline tag: {info.pipeline_tag}")
print(f"Downloads: {info.downloads}")
print(f"Library: {info.library_name}")
print(f"Tags: {info.tags}")

Testing Models Quickly

from transformers import pipeline

# Quick test with pipeline API
def test_model(model_name, task):
    try:
        pipe = pipeline(task, model=model_name)
        # Run simple test
        if task == "text-generation":
            result = pipe("Hello, I am", max_length=50)
        elif task == "summarization":
            result = pipe("Long text to summarize...", max_length=100)
        elif task == "sentiment-analysis":
            result = pipe("I love this product!")
        return {"status": "success", "result": result}
    except Exception as e:
        return {"status": "error", "message": str(e)}

# Test multiple models
models_to_test = [
    ("gpt2", "text-generation"),
    ("facebook/bart-large-cnn", "summarization"),
    ("distilbert-base-uncased-finetuned-sst-2-english", "sentiment-analysis")
]

for model, task in models_to_test:
    result = test_model(model, task)
    print(f"{model}: {result['status']}")

Loading Models

Basic Loading

from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM

# Auto classes detect the right model type
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModel.from_pretrained("gpt2")

# Task-specific auto classes
model = AutoModelForCausalLM.from_pretrained("gpt2")

# With specific revision
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf",
    revision="main"  # or specific commit hash
)

Loading with Options

import torch

# Load with specific configurations
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf",
    torch_dtype=torch.float16,    # Half precision
    device_map="auto",            # Automatic device placement
    low_cpu_mem_usage=True,       # Memory efficient loading
    trust_remote_code=True        # For custom model code
)

# Load with quantization
from transformers import BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf",
    quantization_config=bnb_config,
    device_map="auto"
)

Loading Gated Models

# Some models require access approval
# 1. Go to the model page
# 2. Accept the license
# 3. Use your token

from huggingface_hub import login

# Login with token
login(token="hf_your_token_here")

# Or set environment variable
import os
os.environ["HF_TOKEN"] = "hf_your_token_here"

# Now load gated models
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf",
    token=True  # Use stored token
)

Downloading Models for Offline Use

from huggingface_hub import snapshot_download

# Download entire model repository
local_dir = snapshot_download(
    repo_id="facebook/bart-large-cnn",
    local_dir="./models/bart-large-cnn",
    local_dir_use_symlinks=False
)

# Download specific files only
local_dir = snapshot_download(
    repo_id="meta-llama/Llama-2-7b-hf",
    allow_patterns=["*.json", "*.safetensors"],
    ignore_patterns=["*.bin"]  # Prefer safetensors
)

# Load from local directory
model = AutoModelForCausalLM.from_pretrained(local_dir)

Model Comparison

def compare_models(model_names, test_inputs, task="text-generation"):
    """Compare multiple models on same inputs."""
    results = {}

    for model_name in model_names:
        print(f"Testing {model_name}...")
        pipe = pipeline(task, model=model_name)

        model_results = []
        for input_text in test_inputs:
            output = pipe(input_text, max_length=100)
            model_results.append({
                "input": input_text,
                "output": output
            })

        results[model_name] = model_results

    return results

# Compare summarization models
models = [
    "facebook/bart-large-cnn",
    "google/pegasus-xsum",
    "philschmid/bart-large-cnn-samsum"
]

test_texts = [
    "Long article text here...",
    "Another article to summarize..."
]

comparison = compare_models(models, test_texts, task="summarization")

Publishing to the Hub

from huggingface_hub import HfApi, create_repo

# Create repository
create_repo("username/my-model", private=False)

# Push model
model.push_to_hub("username/my-model")
tokenizer.push_to_hub("username/my-model")

# Or upload files directly
api = HfApi()
api.upload_folder(
    folder_path="./my_model",
    repo_id="username/my-model",
    repo_type="model"
)

Tomorrow we’ll explore the Transformers library in depth.

Resources

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.