1 min read
Hugging Face Model Hub: Finding and Using Pre-trained Models
I wrote “Hugging Face Model Hub: Finding and Using Pre-trained Models” to share practical, production-minded guidance on this topic.
Understanding the Model Hub
# Model Hub statistics (as of August 2023)
hub_stats = {
"total_models": 200000,
"model_types": [
"Text Generation",
"Text Classification",
"Token Classification",
"Question Answering",
"Translation",
"Summarization",
"Image Classification",
"Object Detection",
"Speech Recognition",
"Text-to-Speech"
],
"frameworks": ["PyTorch", "TensorFlow", "JAX", "ONNX"],
"licenses": ["Apache 2.0", "MIT", "CC", "OpenRAIL", "Custom"]
}
Searching for Models
Using the Hub API
from huggingface_hub import HfApi, ModelFilter
api = HfApi()
# Search for text generation models
models = api.list_models(
filter=ModelFilter(
task="text-generation",
library="transformers"
),
sort="downloads",
direction=-1,
limit=10
)
for model in models:
print(f"{model.modelId}: {model.downloads:,} downloads")
# Search with multiple filters
models = api.list_models(
filter=ModelFilter(
task="text-classification",
language="en",
trained_dataset="imdb"
)
)
Filtering by Properties
# Find models by specific criteria
def find_models(task, min_downloads=1000, license_type=None):
filters = ModelFilter(task=task)
models = api.list_models(
filter=filters,
sort="downloads",
direction=-1
)
results = []
for model in models:
if model.downloads >= min_downloads:
if license_type is None or license_type in str(model.tags):
results.append({
"name": model.modelId,
"downloads": model.downloads,
"likes": model.likes,
"tags": model.tags
})
return results
# Example: Find popular summarization models
summarization_models = find_models("summarization", min_downloads=10000)
Evaluating Models
Model Card Information
from huggingface_hub import ModelCard
# Get model card
card = ModelCard.load("facebook/bart-large-cnn")
# Model card contains:
# - Description
# - Intended use
# - Training data
# - Evaluation results
# - Limitations
# - Bias considerations
print(card.content[:1000]) # First 1000 chars
# Get specific metadata
from huggingface_hub import model_info
info = model_info("facebook/bart-large-cnn")
print(f"Pipeline tag: {info.pipeline_tag}")
print(f"Downloads: {info.downloads}")
print(f"Library: {info.library_name}")
print(f"Tags: {info.tags}")
Testing Models Quickly
from transformers import pipeline
# Quick test with pipeline API
def test_model(model_name, task):
try:
pipe = pipeline(task, model=model_name)
# Run simple test
if task == "text-generation":
result = pipe("Hello, I am", max_length=50)
elif task == "summarization":
result = pipe("Long text to summarize...", max_length=100)
elif task == "sentiment-analysis":
result = pipe("I love this product!")
return {"status": "success", "result": result}
except Exception as e:
return {"status": "error", "message": str(e)}
# Test multiple models
models_to_test = [
("gpt2", "text-generation"),
("facebook/bart-large-cnn", "summarization"),
("distilbert-base-uncased-finetuned-sst-2-english", "sentiment-analysis")
]
for model, task in models_to_test:
result = test_model(model, task)
print(f"{model}: {result['status']}")
Loading Models
Basic Loading
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
# Auto classes detect the right model type
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModel.from_pretrained("gpt2")
# Task-specific auto classes
model = AutoModelForCausalLM.from_pretrained("gpt2")
# With specific revision
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf",
revision="main" # or specific commit hash
)
Loading with Options
import torch
# Load with specific configurations
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf",
torch_dtype=torch.float16, # Half precision
device_map="auto", # Automatic device placement
low_cpu_mem_usage=True, # Memory efficient loading
trust_remote_code=True # For custom model code
)
# Load with quantization
from transformers import BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16
)
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf",
quantization_config=bnb_config,
device_map="auto"
)
Loading Gated Models
# Some models require access approval
# 1. Go to the model page
# 2. Accept the license
# 3. Use your token
from huggingface_hub import login
# Login with token
login(token="hf_your_token_here")
# Or set environment variable
import os
os.environ["HF_TOKEN"] = "hf_your_token_here"
# Now load gated models
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf",
token=True # Use stored token
)
Downloading Models for Offline Use
from huggingface_hub import snapshot_download
# Download entire model repository
local_dir = snapshot_download(
repo_id="facebook/bart-large-cnn",
local_dir="./models/bart-large-cnn",
local_dir_use_symlinks=False
)
# Download specific files only
local_dir = snapshot_download(
repo_id="meta-llama/Llama-2-7b-hf",
allow_patterns=["*.json", "*.safetensors"],
ignore_patterns=["*.bin"] # Prefer safetensors
)
# Load from local directory
model = AutoModelForCausalLM.from_pretrained(local_dir)
Model Comparison
def compare_models(model_names, test_inputs, task="text-generation"):
"""Compare multiple models on same inputs."""
results = {}
for model_name in model_names:
print(f"Testing {model_name}...")
pipe = pipeline(task, model=model_name)
model_results = []
for input_text in test_inputs:
output = pipe(input_text, max_length=100)
model_results.append({
"input": input_text,
"output": output
})
results[model_name] = model_results
return results
# Compare summarization models
models = [
"facebook/bart-large-cnn",
"google/pegasus-xsum",
"philschmid/bart-large-cnn-samsum"
]
test_texts = [
"Long article text here...",
"Another article to summarize..."
]
comparison = compare_models(models, test_texts, task="summarization")
Publishing to the Hub
from huggingface_hub import HfApi, create_repo
# Create repository
create_repo("username/my-model", private=False)
# Push model
model.push_to_hub("username/my-model")
tokenizer.push_to_hub("username/my-model")
# Or upload files directly
api = HfApi()
api.upload_folder(
folder_path="./my_model",
repo_id="username/my-model",
repo_type="model"
)
Tomorrow we’ll explore the Transformers library in depth.
Resources
- Hugging Face Hub
- Hub Documentation
- Model Cards\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n