Back to Blog
5 min read

Dashboards in Databricks: Building Data Applications

Dashboards in Databricks: Building Data Applications

Databricks dashboards (including Lakeview) provide powerful visualization capabilities integrated with the lakehouse. This guide covers building effective dashboards.

Dashboard Types in Databricks

DASHBOARD_TYPES = {
    "lakeview": {
        "description": "Native Databricks dashboards with AI features",
        "capabilities": [
            "Drag-and-drop design",
            "AI-generated visualizations",
            "Natural language filters",
            "Automatic formatting"
        ],
        "best_for": "Quick, self-service analytics"
    },
    "sql_dashboards": {
        "description": "SQL-based dashboards with scheduled refresh",
        "capabilities": [
            "Parameter-driven queries",
            "Multiple visualization types",
            "Scheduled refresh",
            "Alerts and notifications"
        ],
        "best_for": "Operational dashboards"
    },
    "notebook_dashboards": {
        "description": "Notebook-based dashboards with Python visualizations",
        "capabilities": [
            "Custom visualizations",
            "Complex calculations",
            "Interactive widgets",
            "ML model integration"
        ],
        "best_for": "Advanced analytics and ML dashboards"
    }
}

Creating SQL Dashboards

-- Dashboard query examples

-- KPI Card: Total Revenue
SELECT
    SUM(revenue) as total_revenue,
    SUM(revenue) - LAG(SUM(revenue)) OVER (ORDER BY month) as change_from_last
FROM monthly_sales
WHERE month = DATE_TRUNC('month', CURRENT_DATE - INTERVAL 1 MONTH);

-- Time Series: Revenue Trend
SELECT
    date,
    SUM(revenue) as daily_revenue,
    AVG(SUM(revenue)) OVER (ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) as rolling_7d_avg
FROM sales
WHERE date >= DATE_SUB(CURRENT_DATE, 90)
GROUP BY date
ORDER BY date;

-- Bar Chart: Revenue by Category
SELECT
    category,
    SUM(revenue) as revenue,
    COUNT(DISTINCT order_id) as orders
FROM sales
JOIN products USING (product_id)
WHERE date >= DATE_SUB(CURRENT_DATE, 30)
GROUP BY category
ORDER BY revenue DESC
LIMIT 10;

-- Table: Top Customers
SELECT
    customer_name,
    SUM(revenue) as total_revenue,
    COUNT(DISTINCT order_id) as order_count,
    AVG(revenue) as avg_order_value,
    MAX(date) as last_order_date
FROM sales
JOIN customers USING (customer_id)
GROUP BY customer_name
ORDER BY total_revenue DESC
LIMIT 20;

-- Map: Revenue by State
SELECT
    state,
    SUM(revenue) as revenue,
    COUNT(DISTINCT customer_id) as customers
FROM sales
JOIN customers USING (customer_id)
WHERE country = 'USA'
GROUP BY state;

Dashboard with Parameters

-- Parameterized dashboard queries

-- Date range parameter: {{ date_range }}
-- Category parameter: {{ category }}

SELECT
    date,
    category,
    SUM(revenue) as revenue
FROM sales
JOIN products USING (product_id)
WHERE date BETWEEN '{{ date_range.start }}' AND '{{ date_range.end }}'
  AND ({{ category }} IS NULL OR category = {{ category }})
GROUP BY date, category
ORDER BY date;

Python Notebook Dashboards

# Notebook-based dashboard with interactive widgets

import ipywidgets as widgets
from IPython.display import display
import plotly.express as px
import plotly.graph_objects as go
from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()

class InteractiveDashboard:
    """Interactive dashboard using notebook widgets"""

    def __init__(self):
        self.df = self._load_data()
        self._create_widgets()

    def _load_data(self):
        """Load data from lakehouse"""
        return spark.sql("""
            SELECT
                date,
                region,
                category,
                SUM(revenue) as revenue,
                COUNT(*) as transactions
            FROM sales
            WHERE date >= DATE_SUB(CURRENT_DATE, 365)
            GROUP BY date, region, category
        """).toPandas()

    def _create_widgets(self):
        """Create filter widgets"""

        self.region_filter = widgets.SelectMultiple(
            options=['All'] + sorted(self.df['region'].unique().tolist()),
            value=['All'],
            description='Region:',
            disabled=False
        )

        self.category_filter = widgets.SelectMultiple(
            options=['All'] + sorted(self.df['category'].unique().tolist()),
            value=['All'],
            description='Category:',
            disabled=False
        )

        self.date_range = widgets.DatePicker(
            description='Start Date:',
            disabled=False
        )

        # Create update button
        self.update_btn = widgets.Button(
            description='Update Dashboard',
            button_style='primary'
        )
        self.update_btn.on_click(self._update_dashboard)

        # Output area
        self.output = widgets.Output()

    def _filter_data(self):
        """Apply filters to data"""
        filtered = self.df.copy()

        if 'All' not in self.region_filter.value:
            filtered = filtered[filtered['region'].isin(self.region_filter.value)]

        if 'All' not in self.category_filter.value:
            filtered = filtered[filtered['category'].isin(self.category_filter.value)]

        return filtered

    def _update_dashboard(self, btn):
        """Update dashboard visualizations"""

        self.output.clear_output()

        with self.output:
            filtered = self._filter_data()

            # KPIs
            total_revenue = filtered['revenue'].sum()
            total_transactions = filtered['transactions'].sum()
            avg_transaction = total_revenue / total_transactions if total_transactions > 0 else 0

            print(f"Total Revenue: ${total_revenue:,.0f}")
            print(f"Total Transactions: {total_transactions:,}")
            print(f"Average Transaction: ${avg_transaction:,.2f}")

            # Revenue trend
            trend_df = filtered.groupby('date')['revenue'].sum().reset_index()
            fig1 = px.line(trend_df, x='date', y='revenue', title='Revenue Trend')
            fig1.show()

            # Revenue by region
            region_df = filtered.groupby('region')['revenue'].sum().reset_index()
            fig2 = px.bar(region_df, x='region', y='revenue', title='Revenue by Region')
            fig2.show()

            # Revenue by category
            cat_df = filtered.groupby('category')['revenue'].sum().reset_index()
            fig3 = px.pie(cat_df, values='revenue', names='category', title='Revenue by Category')
            fig3.show()

    def display(self):
        """Display the dashboard"""

        # Layout
        filters = widgets.HBox([
            self.region_filter,
            self.category_filter
        ])

        display(widgets.VBox([
            widgets.HTML("<h1>Sales Dashboard</h1>"),
            filters,
            self.update_btn,
            self.output
        ]))

        # Initial load
        self._update_dashboard(None)

# Create and display dashboard
# dashboard = InteractiveDashboard()
# dashboard.display()

Dashboard API

from databricks.sdk import WorkspaceClient
from databricks.sdk.service.sql import (
    CreateDashboardRequest,
    Dashboard,
    Widget
)

class DashboardManager:
    """Manage Databricks dashboards programmatically"""

    def __init__(self):
        self.client = WorkspaceClient()

    def create_dashboard(
        self,
        name: str,
        warehouse_id: str,
        queries: list
    ) -> Dashboard:
        """Create a new dashboard"""

        # Create dashboard
        dashboard = self.client.dashboards.create(
            name=name,
            parent=None  # Root folder
        )

        # Add widgets for each query
        for i, query_config in enumerate(queries):
            self._add_widget(
                dashboard_id=dashboard.id,
                query_config=query_config,
                position=i
            )

        return dashboard

    def _add_widget(
        self,
        dashboard_id: str,
        query_config: dict,
        position: int
    ):
        """Add a widget to dashboard"""

        # Create query first
        query = self.client.queries.create(
            name=query_config['name'],
            data_source_id=query_config['warehouse_id'],
            query=query_config['sql']
        )

        # Create visualization
        visualization = self.client.query_visualizations.create(
            query_id=query.id,
            type=query_config.get('chart_type', 'TABLE'),
            options=query_config.get('options', {})
        )

        # Add widget to dashboard
        self.client.dashboard_widgets.create(
            dashboard_id=dashboard_id,
            visualization_id=visualization.id,
            width=query_config.get('width', 1),
            options={
                "position": {"col": position % 2, "row": position // 2}
            }
        )

    def refresh_dashboard(self, dashboard_id: str):
        """Trigger dashboard refresh"""
        # Refresh all queries in dashboard
        dashboard = self.client.dashboards.get(dashboard_id)
        for widget in dashboard.widgets:
            if widget.visualization:
                self.client.queries.execute(widget.visualization.query.id)

    def schedule_refresh(
        self,
        dashboard_id: str,
        schedule: str  # Cron expression
    ):
        """Schedule automatic refresh"""
        # Note: Implement based on available scheduling APIs
        pass

# Usage
# manager = DashboardManager()
# dashboard = manager.create_dashboard(
#     name="Sales Overview",
#     warehouse_id="warehouse_id",
#     queries=[
#         {"name": "Revenue KPI", "sql": "SELECT SUM(revenue) FROM sales", "chart_type": "counter"},
#         {"name": "Revenue Trend", "sql": "SELECT date, SUM(revenue) FROM sales GROUP BY date", "chart_type": "line"}
#     ]
# )

Conclusion

Databricks dashboards provide flexible options from quick Lakeview dashboards to sophisticated notebook-based visualizations. Choose the right type based on your audience and update requirements.

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.