Skip to content
Back to Blog
1 min read

Hugging Face Model Hub: Finding and Using Pre-trained Models

I wrote “Hugging Face Model Hub: Finding and Using Pre-trained Models” to share practical, production-minded guidance on this topic.

Understanding the Model Hub

# Model Hub statistics (as of August 2023)
hub_stats = {
    "total_models": 200000,
    "model_types": [
        "Text Generation",
        "Text Classification",
        "Token Classification",
        "Question Answering",
        "Translation",
        "Summarization",
        "Image Classification",
        "Object Detection",
        "Speech Recognition",
        "Text-to-Speech"
    ],
    "frameworks": ["PyTorch", "TensorFlow", "JAX", "ONNX"],
    "licenses": ["Apache 2.0", "MIT", "CC", "OpenRAIL", "Custom"]
}

Searching for Models

Using the Hub API

from huggingface_hub import HfApi, ModelFilter

api = HfApi()

# Search for text generation models
models = api.list_models(
    filter=ModelFilter(
        task="text-generation",
        library="transformers"
    ),
    sort="downloads",
    direction=-1,
    limit=10
)

for model in models:
    print(f"{model.modelId}: {model.downloads:,} downloads")

# Search with multiple filters
models = api.list_models(
    filter=ModelFilter(
        task="text-classification",
        language="en",
        trained_dataset="imdb"
    )
)

Filtering by Properties

# Find models by specific criteria
def find_models(task, min_downloads=1000, license_type=None):
    filters = ModelFilter(task=task)

    models = api.list_models(
        filter=filters,
        sort="downloads",
        direction=-1
    )

    results = []
    for model in models:
        if model.downloads >= min_downloads:
            if license_type is None or license_type in str(model.tags):
                results.append({
                    "name": model.modelId,
                    "downloads": model.downloads,
                    "likes": model.likes,
                    "tags": model.tags
                })

    return results

# Example: Find popular summarization models
summarization_models = find_models("summarization", min_downloads=10000)

Evaluating Models

Model Card Information

from huggingface_hub import ModelCard

# Get model card
card = ModelCard.load("facebook/bart-large-cnn")

# Model card contains:
# - Description
# - Intended use
# - Training data
# - Evaluation results
# - Limitations
# - Bias considerations
print(card.content[:1000])  # First 1000 chars

# Get specific metadata
from huggingface_hub import model_info

info = model_info("facebook/bart-large-cnn")
print(f"Pipeline tag: {info.pipeline_tag}")
print(f"Downloads: {info.downloads}")
print(f"Library: {info.library_name}")
print(f"Tags: {info.tags}")

Testing Models Quickly

from transformers import pipeline

# Quick test with pipeline API
def test_model(model_name, task):
    try:
        pipe = pipeline(task, model=model_name)
        # Run simple test
        if task == "text-generation":
            result = pipe("Hello, I am", max_length=50)
        elif task == "summarization":
            result = pipe("Long text to summarize...", max_length=100)
        elif task == "sentiment-analysis":
            result = pipe("I love this product!")
        return {"status": "success", "result": result}
    except Exception as e:
        return {"status": "error", "message": str(e)}

# Test multiple models
models_to_test = [
    ("gpt2", "text-generation"),
    ("facebook/bart-large-cnn", "summarization"),
    ("distilbert-base-uncased-finetuned-sst-2-english", "sentiment-analysis")
]

for model, task in models_to_test:
    result = test_model(model, task)
    print(f"{model}: {result['status']}")

Loading Models

Basic Loading

from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM

# Auto classes detect the right model type
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModel.from_pretrained("gpt2")

# Task-specific auto classes
model = AutoModelForCausalLM.from_pretrained("gpt2")

# With specific revision
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf",
    revision="main"  # or specific commit hash
)

Loading with Options

import torch

# Load with specific configurations
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf",
    torch_dtype=torch.float16,    # Half precision
    device_map="auto",            # Automatic device placement
    low_cpu_mem_usage=True,       # Memory efficient loading
    trust_remote_code=True        # For custom model code
)

# Load with quantization
from transformers import BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf",
    quantization_config=bnb_config,
    device_map="auto"
)

Loading Gated Models

# Some models require access approval
# 1. Go to the model page
# 2. Accept the license
# 3. Use your token

from huggingface_hub import login

# Login with token
login(token="hf_your_token_here")

# Or set environment variable
import os
os.environ["HF_TOKEN"] = "hf_your_token_here"

# Now load gated models
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf",
    token=True  # Use stored token
)

Downloading Models for Offline Use

from huggingface_hub import snapshot_download

# Download entire model repository
local_dir = snapshot_download(
    repo_id="facebook/bart-large-cnn",
    local_dir="./models/bart-large-cnn",
    local_dir_use_symlinks=False
)

# Download specific files only
local_dir = snapshot_download(
    repo_id="meta-llama/Llama-2-7b-hf",
    allow_patterns=["*.json", "*.safetensors"],
    ignore_patterns=["*.bin"]  # Prefer safetensors
)

# Load from local directory
model = AutoModelForCausalLM.from_pretrained(local_dir)

Model Comparison

def compare_models(model_names, test_inputs, task="text-generation"):
    """Compare multiple models on same inputs."""
    results = {}

    for model_name in model_names:
        print(f"Testing {model_name}...")
        pipe = pipeline(task, model=model_name)

        model_results = []
        for input_text in test_inputs:
            output = pipe(input_text, max_length=100)
            model_results.append({
                "input": input_text,
                "output": output
            })

        results[model_name] = model_results

    return results

# Compare summarization models
models = [
    "facebook/bart-large-cnn",
    "google/pegasus-xsum",
    "philschmid/bart-large-cnn-samsum"
]

test_texts = [
    "Long article text here...",
    "Another article to summarize..."
]

comparison = compare_models(models, test_texts, task="summarization")

Publishing to the Hub

from huggingface_hub import HfApi, create_repo

# Create repository
create_repo("username/my-model", private=False)

# Push model
model.push_to_hub("username/my-model")
tokenizer.push_to_hub("username/my-model")

# Or upload files directly
api = HfApi()
api.upload_folder(
    folder_path="./my_model",
    repo_id="username/my-model",
    repo_type="model"
)

Tomorrow we’ll explore the Transformers library in depth.

Resources

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.