4 min read
Hugging Face Model Hub: Finding and Using Pre-trained Models
The Hugging Face Model Hub hosts over 200,000 models for various ML tasks. Today we’ll explore how to effectively find, evaluate, and use models from the Hub.
Understanding the Model Hub
# Model Hub statistics (as of August 2023)
hub_stats = {
"total_models": 200000,
"model_types": [
"Text Generation",
"Text Classification",
"Token Classification",
"Question Answering",
"Translation",
"Summarization",
"Image Classification",
"Object Detection",
"Speech Recognition",
"Text-to-Speech"
],
"frameworks": ["PyTorch", "TensorFlow", "JAX", "ONNX"],
"licenses": ["Apache 2.0", "MIT", "CC", "OpenRAIL", "Custom"]
}
Searching for Models
Using the Hub API
from huggingface_hub import HfApi, ModelFilter
api = HfApi()
# Search for text generation models
models = api.list_models(
filter=ModelFilter(
task="text-generation",
library="transformers"
),
sort="downloads",
direction=-1,
limit=10
)
for model in models:
print(f"{model.modelId}: {model.downloads:,} downloads")
# Search with multiple filters
models = api.list_models(
filter=ModelFilter(
task="text-classification",
language="en",
trained_dataset="imdb"
)
)
Filtering by Properties
# Find models by specific criteria
def find_models(task, min_downloads=1000, license_type=None):
filters = ModelFilter(task=task)
models = api.list_models(
filter=filters,
sort="downloads",
direction=-1
)
results = []
for model in models:
if model.downloads >= min_downloads:
if license_type is None or license_type in str(model.tags):
results.append({
"name": model.modelId,
"downloads": model.downloads,
"likes": model.likes,
"tags": model.tags
})
return results
# Example: Find popular summarization models
summarization_models = find_models("summarization", min_downloads=10000)
Evaluating Models
Model Card Information
from huggingface_hub import ModelCard
# Get model card
card = ModelCard.load("facebook/bart-large-cnn")
# Model card contains:
# - Description
# - Intended use
# - Training data
# - Evaluation results
# - Limitations
# - Bias considerations
print(card.content[:1000]) # First 1000 chars
# Get specific metadata
from huggingface_hub import model_info
info = model_info("facebook/bart-large-cnn")
print(f"Pipeline tag: {info.pipeline_tag}")
print(f"Downloads: {info.downloads}")
print(f"Library: {info.library_name}")
print(f"Tags: {info.tags}")
Testing Models Quickly
from transformers import pipeline
# Quick test with pipeline API
def test_model(model_name, task):
try:
pipe = pipeline(task, model=model_name)
# Run simple test
if task == "text-generation":
result = pipe("Hello, I am", max_length=50)
elif task == "summarization":
result = pipe("Long text to summarize...", max_length=100)
elif task == "sentiment-analysis":
result = pipe("I love this product!")
return {"status": "success", "result": result}
except Exception as e:
return {"status": "error", "message": str(e)}
# Test multiple models
models_to_test = [
("gpt2", "text-generation"),
("facebook/bart-large-cnn", "summarization"),
("distilbert-base-uncased-finetuned-sst-2-english", "sentiment-analysis")
]
for model, task in models_to_test:
result = test_model(model, task)
print(f"{model}: {result['status']}")
Loading Models
Basic Loading
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
# Auto classes detect the right model type
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModel.from_pretrained("gpt2")
# Task-specific auto classes
model = AutoModelForCausalLM.from_pretrained("gpt2")
# With specific revision
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf",
revision="main" # or specific commit hash
)
Loading with Options
import torch
# Load with specific configurations
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf",
torch_dtype=torch.float16, # Half precision
device_map="auto", # Automatic device placement
low_cpu_mem_usage=True, # Memory efficient loading
trust_remote_code=True # For custom model code
)
# Load with quantization
from transformers import BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16
)
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf",
quantization_config=bnb_config,
device_map="auto"
)
Loading Gated Models
# Some models require access approval
# 1. Go to the model page
# 2. Accept the license
# 3. Use your token
from huggingface_hub import login
# Login with token
login(token="hf_your_token_here")
# Or set environment variable
import os
os.environ["HF_TOKEN"] = "hf_your_token_here"
# Now load gated models
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf",
token=True # Use stored token
)
Downloading Models for Offline Use
from huggingface_hub import snapshot_download
# Download entire model repository
local_dir = snapshot_download(
repo_id="facebook/bart-large-cnn",
local_dir="./models/bart-large-cnn",
local_dir_use_symlinks=False
)
# Download specific files only
local_dir = snapshot_download(
repo_id="meta-llama/Llama-2-7b-hf",
allow_patterns=["*.json", "*.safetensors"],
ignore_patterns=["*.bin"] # Prefer safetensors
)
# Load from local directory
model = AutoModelForCausalLM.from_pretrained(local_dir)
Model Comparison
def compare_models(model_names, test_inputs, task="text-generation"):
"""Compare multiple models on same inputs."""
results = {}
for model_name in model_names:
print(f"Testing {model_name}...")
pipe = pipeline(task, model=model_name)
model_results = []
for input_text in test_inputs:
output = pipe(input_text, max_length=100)
model_results.append({
"input": input_text,
"output": output
})
results[model_name] = model_results
return results
# Compare summarization models
models = [
"facebook/bart-large-cnn",
"google/pegasus-xsum",
"philschmid/bart-large-cnn-samsum"
]
test_texts = [
"Long article text here...",
"Another article to summarize..."
]
comparison = compare_models(models, test_texts, task="summarization")
Publishing to the Hub
from huggingface_hub import HfApi, create_repo
# Create repository
create_repo("username/my-model", private=False)
# Push model
model.push_to_hub("username/my-model")
tokenizer.push_to_hub("username/my-model")
# Or upload files directly
api = HfApi()
api.upload_folder(
folder_path="./my_model",
repo_id="username/my-model",
repo_type="model"
)
Tomorrow we’ll explore the Transformers library in depth.