Skip to content
Back to Blog
1 min read

Space APIs: Working with Satellite Data on Azure

I wrote “Space APIs: Working with Satellite Data on Azure” to share practical, production-minded guidance on this topic.

Azure Space Data APIs

Microsoft’s partnership with space industry providers enables access to various satellite data:

// Configure satellite data client
public class SpaceDataClient
{
    private readonly HttpClient _httpClient;
    private readonly string _apiKey;

    public SpaceDataClient(string apiKey)
    {
        _apiKey = apiKey;
        _httpClient = new HttpClient
        {
            BaseAddress = new Uri("https://api.spacedata.azure.com/v1/")
        };
        _httpClient.DefaultRequestHeaders.Add("X-API-Key", apiKey);
    }

    public async Task<SatelliteImageCatalog> SearchImagesAsync(
        GeoPolygon areaOfInterest,
        DateTime startDate,
        DateTime endDate,
        double maxCloudCover = 20)
    {
        var request = new ImageSearchRequest
        {
            Geometry = areaOfInterest,
            DateRange = new DateRange(startDate, endDate),
            MaxCloudCover = maxCloudCover,
            Providers = new[] { "Sentinel-2", "Landsat-8" }
        };

        var response = await _httpClient.PostAsJsonAsync("images/search", request);
        response.EnsureSuccessStatusCode();

        return await response.Content.ReadFromJsonAsync<SatelliteImageCatalog>();
    }

    public async Task<Stream> DownloadImageAsync(
        string imageId,
        string[] bands = null)
    {
        var url = $"images/{imageId}/download";
        if (bands != null && bands.Length > 0)
        {
            url += $"?bands={string.Join(",", bands)}";
        }

        return await _httpClient.GetStreamAsync(url);
    }
}

public record ImageSearchRequest
{
    public GeoPolygon Geometry { get; init; }
    public DateRange DateRange { get; init; }
    public double MaxCloudCover { get; init; }
    public string[] Providers { get; init; }
}

public record GeoPolygon
{
    public string Type => "Polygon";
    public double[][][] Coordinates { get; init; }
}

public record DateRange(DateTime Start, DateTime End);

public record SatelliteImageCatalog
{
    public List<SatelliteImage> Images { get; init; }
    public int TotalCount { get; init; }
}

public record SatelliteImage
{
    public string Id { get; init; }
    public string Provider { get; init; }
    public DateTime AcquisitionDate { get; init; }
    public double CloudCover { get; init; }
    public GeoPolygon Footprint { get; init; }
    public string[] AvailableBands { get; init; }
    public double Resolution { get; init; }
}

Processing Satellite Imagery

Use Azure Functions for image processing:

# Azure Function for satellite image analysis
import azure.functions as func
import rasterio
import numpy as np
from rasterio.io import MemoryFile
from azure.storage.blob import BlobServiceClient

def main(blob: func.InputStream) -> str:
    """Process satellite imagery for vegetation analysis."""

    # Read the image from blob storage
    with MemoryFile(blob.read()) as memfile:
        with memfile.open() as dataset:
            # Read bands (assuming Sentinel-2 ordering)
            red = dataset.read(4).astype(float)   # Band 4 - Red
            nir = dataset.read(8).astype(float)   # Band 8 - NIR

            # Calculate NDVI (Normalized Difference Vegetation Index)
            ndvi = calculate_ndvi(red, nir)

            # Calculate vegetation statistics
            stats = calculate_vegetation_stats(ndvi)

            # Save processed NDVI image
            save_ndvi_image(ndvi, dataset.profile, blob.name)

            return json.dumps(stats)


def calculate_ndvi(red: np.ndarray, nir: np.ndarray) -> np.ndarray:
    """Calculate NDVI from red and NIR bands."""
    # Avoid division by zero
    denominator = nir + red
    denominator[denominator == 0] = 1

    ndvi = (nir - red) / denominator

    # Clip to valid NDVI range
    ndvi = np.clip(ndvi, -1, 1)

    return ndvi


def calculate_vegetation_stats(ndvi: np.ndarray) -> dict:
    """Calculate vegetation statistics from NDVI."""
    # Classify vegetation
    bare_soil = np.sum((ndvi >= -1) & (ndvi < 0.1))
    sparse_vegetation = np.sum((ndvi >= 0.1) & (ndvi < 0.3))
    moderate_vegetation = np.sum((ndvi >= 0.3) & (ndvi < 0.5))
    dense_vegetation = np.sum((ndvi >= 0.5) & (ndvi <= 1))

    total_pixels = ndvi.size

    return {
        "mean_ndvi": float(np.nanmean(ndvi)),
        "max_ndvi": float(np.nanmax(ndvi)),
        "min_ndvi": float(np.nanmin(ndvi)),
        "bare_soil_percent": float(bare_soil / total_pixels * 100),
        "sparse_vegetation_percent": float(sparse_vegetation / total_pixels * 100),
        "moderate_vegetation_percent": float(moderate_vegetation / total_pixels * 100),
        "dense_vegetation_percent": float(dense_vegetation / total_pixels * 100)
    }


def save_ndvi_image(ndvi: np.ndarray, profile: dict, original_name: str):
    """Save NDVI as a GeoTIFF."""
    # Update profile for single-band output
    profile.update(
        dtype=rasterio.float32,
        count=1,
        compress='lzw'
    )

    output_name = original_name.replace('.tif', '_ndvi.tif')

    blob_service = BlobServiceClient.from_connection_string(
        os.environ['STORAGE_CONNECTION_STRING']
    )
    container = blob_service.get_container_client('processed-imagery')

    with MemoryFile() as memfile:
        with memfile.open(**profile) as dst:
            dst.write(ndvi.astype(rasterio.float32), 1)

        memfile.seek(0)
        container.upload_blob(output_name, memfile, overwrite=True)

Time-Series Analysis

Analyze vegetation changes over time:

# Synapse notebook for time-series satellite analysis
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.window import Window

# Read NDVI time series data
ndvi_df = spark.read.parquet(
    "abfss://satellite-data@datalake.dfs.core.windows.net/ndvi/"
)

# Calculate seasonal trends
seasonal_trend = ndvi_df \
    .withColumn("year", year("acquisition_date")) \
    .withColumn("month", month("acquisition_date")) \
    .withColumn("week", weekofyear("acquisition_date")) \
    .groupBy("region_id", "year", "month") \
    .agg(
        avg("mean_ndvi").alias("monthly_avg_ndvi"),
        stddev("mean_ndvi").alias("ndvi_stddev"),
        count("*").alias("observation_count")
    )

# Detect anomalies using rolling statistics
window_spec = Window \
    .partitionBy("region_id") \
    .orderBy("year", "month") \
    .rowsBetween(-12, 0)  # 12-month rolling window

anomaly_detection = seasonal_trend \
    .withColumn("rolling_avg", avg("monthly_avg_ndvi").over(window_spec)) \
    .withColumn("rolling_std", stddev("monthly_avg_ndvi").over(window_spec)) \
    .withColumn("z_score", (col("monthly_avg_ndvi") - col("rolling_avg")) / col("rolling_std")) \
    .withColumn("is_anomaly", abs(col("z_score")) > 2)

# Identify regions with significant vegetation decline
declining_regions = anomaly_detection \
    .filter(col("is_anomaly") == True) \
    .filter(col("z_score") < -2) \
    .select("region_id", "year", "month", "monthly_avg_ndvi", "z_score") \
    .orderBy("z_score")

# Save results
declining_regions.write.format("delta").mode("overwrite").save(
    "abfss://satellite-data@datalake.dfs.core.windows.net/vegetation_alerts/"
)

display(declining_regions)

Integration with Weather Data

Correlate satellite imagery with weather:

# Combine satellite and weather data
weather_df = spark.read.parquet(
    "abfss://weather-data@datalake.dfs.core.windows.net/daily/"
)

# Join datasets
combined_df = ndvi_df.alias("sat") \
    .join(
        weather_df.alias("weather"),
        (col("sat.region_id") == col("weather.region_id")) &
        (col("sat.acquisition_date") == col("weather.date")),
        "left"
    ) \
    .select(
        "sat.region_id",
        "sat.acquisition_date",
        "sat.mean_ndvi",
        "weather.temperature_avg",
        "weather.precipitation",
        "weather.humidity"
    )

# Analyze correlation between weather and vegetation
from pyspark.ml.stat import Correlation
from pyspark.ml.feature import VectorAssembler

# Create feature vector
assembler = VectorAssembler(
    inputCols=["temperature_avg", "precipitation", "humidity"],
    outputCol="weather_features"
)

features_df = assembler.transform(combined_df.dropna())

# Calculate correlation matrix
correlation_matrix = Correlation.corr(features_df, "weather_features")
print("Weather-Vegetation Correlation Matrix:")
print(correlation_matrix.collect()[0][0].toArray())

Real-Time Monitoring Dashboard

Power BI integration for visualization:

// KQL query for satellite monitoring dashboard
SatelliteObservations
| where Timestamp > ago(30d)
| summarize
    AvgNDVI = avg(MeanNDVI),
    MinNDVI = min(MeanNDVI),
    MaxNDVI = max(MeanNDVI),
    ObservationCount = count()
    by RegionId, bin(Timestamp, 1d)
| join kind=inner (
    Regions | project RegionId, RegionName, CropType
) on RegionId
| project
    Timestamp,
    RegionName,
    CropType,
    AvgNDVI,
    HealthStatus = case(
        AvgNDVI >= 0.6, "Healthy",
        AvgNDVI >= 0.4, "Moderate",
        AvgNDVI >= 0.2, "Stressed",
        "Critical"
    )
| render timechart

Summary

Space APIs and satellite data enable:

  • Agricultural monitoring at scale
  • Climate change analysis
  • Urban development tracking
  • Disaster response support
  • Environmental compliance

Azure provides the infrastructure to process petabytes of satellite imagery.

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.