Back to Blog
4 min read

Azure Cognitive Services Updates: What's New in Early 2023

Azure Cognitive Services continues to evolve rapidly. Let’s explore the recent updates and new capabilities that make building AI applications easier and more powerful.

Vision Services Updates

from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from msrest.authentication import CognitiveServicesCredentials

# Computer Vision 4.0 - New unified API
vision_client = ComputerVisionClient(
    endpoint="https://your-resource.cognitiveservices.azure.com/",
    credentials=CognitiveServicesCredentials("your-key")
)

# New Image Analysis 4.0 features
def analyze_image_v4(image_url: str):
    """Use new Image Analysis 4.0 API."""
    from azure.ai.vision.imageanalysis import ImageAnalysisClient
    from azure.ai.vision.imageanalysis.models import VisualFeatures

    client = ImageAnalysisClient(
        endpoint="https://your-resource.cognitiveservices.azure.com/",
        credential=AzureKeyCredential("your-key")
    )

    # Specify visual features to analyze
    result = client.analyze(
        image_url=image_url,
        visual_features=[
            VisualFeatures.CAPTION,
            VisualFeatures.DENSE_CAPTIONS,
            VisualFeatures.OBJECTS,
            VisualFeatures.TAGS,
            VisualFeatures.PEOPLE,
            VisualFeatures.SMART_CROPS,
            VisualFeatures.READ
        ]
    )

    return {
        "caption": result.caption.text if result.caption else None,
        "dense_captions": [c.text for c in result.dense_captions.list] if result.dense_captions else [],
        "objects": [{"name": o.name, "confidence": o.confidence} for o in result.objects.list] if result.objects else [],
        "tags": [{"name": t.name, "confidence": t.confidence} for t in result.tags.list] if result.tags else [],
        "people": len(result.people.list) if result.people else 0,
        "text": result.read.content if result.read else None
    }

Speech Services Updates

import azure.cognitiveservices.speech as speechsdk

# New neural voices and languages
NEURAL_VOICES = {
    "en-US": ["Jenny", "Guy", "Aria", "Davis"],
    "en-GB": ["Sonia", "Ryan", "Libby"],
    "ja-JP": ["Nanami", "Keita"],
    "zh-CN": ["Xiaoxiao", "Yunxi"]
}

def text_to_speech_neural(
    text: str,
    voice_name: str = "en-US-JennyNeural",
    style: str = None
) -> bytes:
    """Convert text to speech with neural voices."""

    speech_config = speechsdk.SpeechConfig(
        subscription="your-key",
        region="your-region"
    )

    # Use SSML for advanced control
    if style:
        ssml = f"""
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"
       xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="en-US">
    <voice name="{voice_name}">
        <mstts:express-as style="{style}">
            {text}
        </mstts:express-as>
    </voice>
</speak>"""
        speech_config.speech_synthesis_ssml = ssml
    else:
        speech_config.speech_synthesis_voice_name = voice_name

    synthesizer = speechsdk.SpeechSynthesizer(
        speech_config=speech_config,
        audio_config=None  # Get raw audio
    )

    result = synthesizer.speak_text_async(text).get()

    if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
        return result.audio_data
    else:
        raise Exception(f"Speech synthesis failed: {result.reason}")

# Speech-to-text with new models
def speech_to_text_continuous(audio_file: str) -> list:
    """Continuous speech recognition with improved accuracy."""

    speech_config = speechsdk.SpeechConfig(
        subscription="your-key",
        region="your-region"
    )

    # Enable new features
    speech_config.set_property(
        speechsdk.PropertyId.Speech_SegmentationSilenceTimeoutMs, "1000"
    )

    audio_config = speechsdk.AudioConfig(filename=audio_file)

    recognizer = speechsdk.SpeechRecognizer(
        speech_config=speech_config,
        audio_config=audio_config
    )

    results = []

    def handle_result(evt):
        if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
            results.append({
                "text": evt.result.text,
                "offset": evt.result.offset,
                "duration": evt.result.duration
            })

    recognizer.recognized.connect(handle_result)
    recognizer.start_continuous_recognition()

    import time
    time.sleep(60)  # Process audio

    recognizer.stop_continuous_recognition()
    return results

Language Services Updates

from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential

# New unified Text Analytics client
text_client = TextAnalyticsClient(
    endpoint="https://your-resource.cognitiveservices.azure.com/",
    credential=AzureKeyCredential("your-key")
)

# New conversational language understanding
def analyze_conversation(messages: list) -> dict:
    """Analyze conversation with new CLU."""
    from azure.ai.language.conversations import ConversationAnalysisClient

    client = ConversationAnalysisClient(
        endpoint="https://your-resource.cognitiveservices.azure.com/",
        credential=AzureKeyCredential("your-key")
    )

    result = client.analyze_conversation(
        task={
            "kind": "Conversation",
            "analysisInput": {
                "conversationItem": {
                    "participantId": "user",
                    "id": "1",
                    "text": messages[-1]
                }
            },
            "parameters": {
                "projectName": "your-project",
                "deploymentName": "your-deployment"
            }
        }
    )

    return {
        "intent": result.result.prediction.top_intent,
        "confidence": result.result.prediction.intents[0].confidence,
        "entities": [
            {"category": e.category, "text": e.text}
            for e in result.result.prediction.entities
        ]
    }

# PII detection improvements
def detect_pii_enhanced(text: str) -> dict:
    """Detect PII with enhanced categories."""
    result = text_client.recognize_pii_entities(
        documents=[text],
        categories_filter=[
            "PhoneNumber", "Email", "Address",
            "CreditCardNumber", "IPAddress",
            "InternationalBankingAccountNumber",
            "SocialSecurityNumber"
        ]
    )[0]

    return {
        "redacted_text": result.redacted_text,
        "entities": [
            {
                "text": e.text,
                "category": e.category,
                "confidence": e.confidence_score,
                "offset": e.offset
            }
            for e in result.entities
        ]
    }

Decision Services Updates

# Personalizer updates
from azure.cognitiveservices.personalizer import PersonalizerClient

def rank_with_personalizer(
    context: dict,
    actions: list
) -> dict:
    """Rank actions using Personalizer."""
    personalizer_client = PersonalizerClient(
        endpoint="https://your-personalizer.cognitiveservices.azure.com/",
        credential=CognitiveServicesCredentials("your-key")
    )

    rank_request = {
        "contextFeatures": [context],
        "actions": [
            {"id": a["id"], "features": [a["features"]]}
            for a in actions
        ],
        "excludedActions": [],
        "eventId": str(uuid.uuid4()),
        "deferActivation": False
    }

    response = personalizer_client.rank(rank_request)

    return {
        "event_id": rank_request["eventId"],
        "recommended_action": response.reward_action_id,
        "ranking": [
            {"id": r.id, "probability": r.probability}
            for r in response.ranking
        ]
    }

# Content Moderator updates
from azure.cognitiveservices.contentmoderator import ContentModeratorClient

def moderate_content_enhanced(text: str) -> dict:
    """Enhanced content moderation."""
    moderator_client = ContentModeratorClient(
        endpoint="https://your-moderator.cognitiveservices.azure.com/",
        credentials=CognitiveServicesCredentials("your-key")
    )

    # Text moderation with classification
    screen_result = moderator_client.text_moderation.screen_text(
        text_content_type="text/plain",
        text_content=text,
        language="eng",
        autocorrect=True,
        pii=True,
        classify=True
    )

    return {
        "classification": {
            "category1": screen_result.classification.category1.score,
            "category2": screen_result.classification.category2.score,
            "category3": screen_result.classification.category3.score
        },
        "pii": screen_result.pii,
        "terms": screen_result.terms,
        "autocorrected": screen_result.autocorrected_text
    }

Multi-Service Solutions

class CognitiveServicesHub:
    """Unified access to multiple Cognitive Services."""

    def __init__(self, endpoint: str, key: str):
        self.credential = AzureKeyCredential(key)
        self.endpoint = endpoint

    async def process_document_complete(self, document_url: str) -> dict:
        """Process document with multiple services."""
        results = {}

        # Extract text with Form Recognizer
        from azure.ai.formrecognizer import DocumentAnalysisClient
        doc_client = DocumentAnalysisClient(self.endpoint, self.credential)

        poller = doc_client.begin_analyze_document_from_url(
            "prebuilt-read", document_url
        )
        doc_result = poller.result()

        text_content = ""
        for page in doc_result.pages:
            for line in page.lines:
                text_content += line.content + "\n"

        results["extracted_text"] = text_content

        # Analyze text with Text Analytics
        text_client = TextAnalyticsClient(self.endpoint, self.credential)

        # Get key phrases
        key_phrases = text_client.extract_key_phrases([text_content])[0]
        results["key_phrases"] = key_phrases.key_phrases

        # Get sentiment
        sentiment = text_client.analyze_sentiment([text_content])[0]
        results["sentiment"] = sentiment.sentiment

        # Detect language
        language = text_client.detect_language([text_content])[0]
        results["language"] = language.primary_language.iso6391_name

        # Detect PII
        pii = text_client.recognize_pii_entities([text_content])[0]
        results["pii_detected"] = len(pii.entities) > 0

        return results

What’s Coming

  • GPT-4 integration in Azure OpenAI
  • More neural voice options
  • Enhanced document understanding
  • Improved multi-modal capabilities

Resources

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.