Back to Blog
2 min read

Azure AI Vision Updates: New Features for 2024

Azure AI Vision has received significant updates. Here’s what’s new and how to use these capabilities.

Image Analysis 4.0

from azure.ai.vision.imageanalysis import ImageAnalysisClient
from azure.ai.vision.imageanalysis.models import VisualFeatures
from azure.identity import DefaultAzureCredential

client = ImageAnalysisClient(
    endpoint="https://your-vision.cognitiveservices.azure.com/",
    credential=DefaultAzureCredential()
)

def analyze_image(image_url: str) -> dict:
    """Analyze image with new Vision 4.0 features."""

    result = client.analyze_from_url(
        image_url=image_url,
        visual_features=[
            VisualFeatures.CAPTION,
            VisualFeatures.DENSE_CAPTIONS,
            VisualFeatures.OBJECTS,
            VisualFeatures.TAGS,
            VisualFeatures.PEOPLE,
            VisualFeatures.SMART_CROPS,
            VisualFeatures.READ
        ]
    )

    return {
        "caption": result.caption.text if result.caption else None,
        "dense_captions": [c.text for c in result.dense_captions.list] if result.dense_captions else [],
        "objects": [{"name": o.tags[0].name, "confidence": o.tags[0].confidence} for o in result.objects.list] if result.objects else [],
        "tags": [t.name for t in result.tags.list] if result.tags else [],
        "people_count": len(result.people.list) if result.people else 0,
        "text": result.read.blocks if result.read else []
    }

Dense Captioning

def get_dense_captions(image_url: str) -> list[dict]:
    """Get multiple captions describing different regions."""

    result = client.analyze_from_url(
        image_url=image_url,
        visual_features=[VisualFeatures.DENSE_CAPTIONS]
    )

    captions = []
    for caption in result.dense_captions.list:
        captions.append({
            "text": caption.text,
            "confidence": caption.confidence,
            "bounding_box": caption.bounding_box
        })

    return captions

Smart Cropping

def smart_crop(image_url: str, aspect_ratios: list[float]) -> list[dict]:
    """Get smart crop suggestions for multiple aspect ratios."""

    result = client.analyze_from_url(
        image_url=image_url,
        visual_features=[VisualFeatures.SMART_CROPS],
        smart_crops_aspect_ratios=aspect_ratios
    )

    crops = []
    for crop in result.smart_crops.list:
        crops.append({
            "aspect_ratio": crop.aspect_ratio,
            "bounding_box": {
                "x": crop.bounding_box.x,
                "y": crop.bounding_box.y,
                "width": crop.bounding_box.width,
                "height": crop.bounding_box.height
            }
        })

    return crops

# Get crops for social media
crops = smart_crop(image_url, [1.0, 1.91, 0.8])  # Square, Twitter, Pinterest

Background Removal

def remove_background(image_url: str) -> bytes:
    """Remove background from image."""

    result = client.analyze_from_url(
        image_url=image_url,
        visual_features=[],  # Background removal is separate
        background_removal_mode="foreground"
    )

    return result.foreground_image_data

Best Practices

  1. Use appropriate features - Only request what you need
  2. Handle confidence scores - Filter low-confidence results
  3. Combine with GPT-4V - Vision 4.0 for detection, GPT-4V for reasoning
  4. Batch processing - Process multiple images efficiently
  5. Cache results - Store analysis for reuse

Conclusion

Azure AI Vision 4.0 brings powerful new features like dense captioning and smart cropping. Combine with GPT-4 Vision for comprehensive image understanding.

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.