2 min read
Databricks AI: Unified Data and AI Platform
Databricks provides a unified platform for data engineering, ML, and AI. Here’s how to leverage its AI capabilities.
Databricks AI Patterns
# Databricks AI integration patterns
from databricks import sql
from databricks.sdk import WorkspaceClient
from mlflow import MlflowClient
import mlflow
class DatabricksAI:
def __init__(self):
self.workspace = WorkspaceClient()
self.mlflow_client = MlflowClient()
def foundation_model_api(self, prompt: str, model: str = "databricks-meta-llama-3-70b-instruct"):
"""Use Databricks Foundation Model API."""
from databricks_genai_inference import ChatSession
chat = ChatSession(
model=model,
system_message="You are a helpful data analyst assistant."
)
response = chat.reply(prompt)
return response
def vector_search(self, query: str, index_name: str, num_results: int = 10):
"""Use Databricks Vector Search."""
from databricks.vector_search.client import VectorSearchClient
client = VectorSearchClient()
index = client.get_index(index_name)
results = index.similarity_search(
query_text=query,
num_results=num_results,
columns=["id", "content", "metadata"]
)
return results
def create_vector_index(self, table_name: str, embedding_col: str):
"""Create vector search index on Delta table."""
from databricks.vector_search.client import VectorSearchClient
client = VectorSearchClient()
index = client.create_delta_sync_index(
endpoint_name="vector-search-endpoint",
index_name=f"{table_name}_index",
source_table_name=table_name,
primary_key="id",
embedding_column=embedding_col,
embedding_model_endpoint_name="databricks-bge-large-en"
)
return index
def serve_model(self, model_name: str, model_version: str):
"""Serve model with Databricks Model Serving."""
from mlflow.deployments import get_deploy_client
client = get_deploy_client("databricks")
endpoint = client.create_endpoint(
name=f"{model_name}-endpoint",
config={
"served_models": [{
"model_name": model_name,
"model_version": model_version,
"workload_size": "Small",
"scale_to_zero_enabled": True
}]
}
)
return endpoint
def feature_serving(self, feature_spec: dict):
"""Create feature serving endpoint."""
from databricks.feature_engineering import FeatureEngineeringClient
fe = FeatureEngineeringClient()
endpoint = fe.create_feature_serving_endpoint(
name="customer-features",
config={
"served_entities": [{
"entity_name": "customer",
"feature_spec_name": feature_spec["name"]
}]
}
)
return endpoint
def rag_with_unity_catalog(self, question: str, table_name: str):
"""RAG using Unity Catalog managed tables."""
# Get relevant documents from vector index
docs = self.vector_search(question, f"{table_name}_index")
# Generate response using Foundation Model
context = "\n\n".join([d["content"] for d in docs])
prompt = f"Context:\n{context}\n\nQuestion: {question}\n\nAnswer:"
response = self.foundation_model_api(prompt)
return response
# AutoML for quick model training
from databricks import automl
summary = automl.classify(
dataset=train_df,
target_col="label",
timeout_minutes=30,
max_trials=50
)
best_model = summary.best_trial.load_model()
Databricks unifies data, ML, and AI on a single lakehouse platform.