1 min read
Azure AI Vision Updates: New Features for 2024
I wrote “Azure AI Vision Updates: New Features for 2024” to share practical, production-minded guidance on this topic.
Image Analysis 4.0
from azure.ai.vision.imageanalysis import ImageAnalysisClient
from azure.ai.vision.imageanalysis.models import VisualFeatures
from azure.identity import DefaultAzureCredential
client = ImageAnalysisClient(
endpoint="https://your-vision.cognitiveservices.azure.com/",
credential=DefaultAzureCredential()
)
def analyze_image(image_url: str) -> dict:
"""Analyze image with new Vision 4.0 features."""
result = client.analyze_from_url(
image_url=image_url,
visual_features=[
VisualFeatures.CAPTION,
VisualFeatures.DENSE_CAPTIONS,
VisualFeatures.OBJECTS,
VisualFeatures.TAGS,
VisualFeatures.PEOPLE,
VisualFeatures.SMART_CROPS,
VisualFeatures.READ
]
)
return {
"caption": result.caption.text if result.caption else None,
"dense_captions": [c.text for c in result.dense_captions.list] if result.dense_captions else [],
"objects": [{"name": o.tags[0].name, "confidence": o.tags[0].confidence} for o in result.objects.list] if result.objects else [],
"tags": [t.name for t in result.tags.list] if result.tags else [],
"people_count": len(result.people.list) if result.people else 0,
"text": result.read.blocks if result.read else []
}
Dense Captioning
def get_dense_captions(image_url: str) -> list[dict]:
"""Get multiple captions describing different regions."""
result = client.analyze_from_url(
image_url=image_url,
visual_features=[VisualFeatures.DENSE_CAPTIONS]
)
captions = []
for caption in result.dense_captions.list:
captions.append({
"text": caption.text,
"confidence": caption.confidence,
"bounding_box": caption.bounding_box
})
return captions
Smart Cropping
def smart_crop(image_url: str, aspect_ratios: list[float]) -> list[dict]:
"""Get smart crop suggestions for multiple aspect ratios."""
result = client.analyze_from_url(
image_url=image_url,
visual_features=[VisualFeatures.SMART_CROPS],
smart_crops_aspect_ratios=aspect_ratios
)
crops = []
for crop in result.smart_crops.list:
crops.append({
"aspect_ratio": crop.aspect_ratio,
"bounding_box": {
"x": crop.bounding_box.x,
"y": crop.bounding_box.y,
"width": crop.bounding_box.width,
"height": crop.bounding_box.height
}
})
return crops
# Get crops for social media
crops = smart_crop(image_url, [1.0, 1.91, 0.8]) # Square, Twitter, Pinterest
Background Removal
def remove_background(image_url: str) -> bytes:
"""Remove background from image."""
result = client.analyze_from_url(
image_url=image_url,
visual_features=[], # Background removal is separate
background_removal_mode="foreground"
)
return result.foreground_image_data
Best Practices
- Use appropriate features - Only request what you need
- Handle confidence scores - Filter low-confidence results
- Combine with GPT-4V - Vision 4.0 for detection, GPT-4V for reasoning
- Batch processing - Process multiple images efficiently
- Cache results - Store analysis for reuse
Conclusion
Azure AI Vision 4.0 brings powerful new features like dense captioning and smart cropping. Combine with GPT-4 Vision for comprehensive image understanding.