May 5, 2021 2 min read

Azure Cognitive Services: Building Intelligent Applications

Azure AI Cognitive Services Machine Learning NLP

Azure Cognitive Services provides pre-built AI capabilities that developers can integrate into applications without deep machine learning expertise. As Microsoft continues to expand these services, let’s explore how to build intelligent applications using the current offerings.

Why Azure Cognitive Services?

Instead of training custom models from scratch, Cognitive Services offers:

Pre-trained models - Ready to use immediately
API-first design - Simple REST APIs and SDKs
Enterprise security - Azure compliance and data protection
Scalability - Handle production workloads
Continuous improvement - Microsoft updates models regularly

Service Categories

Vision

from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from msrest.authentication import CognitiveServicesCredentials

# Initialize client
credentials = CognitiveServicesCredentials(subscription_key)
client = ComputerVisionClient(endpoint, credentials)

# Analyze image
analysis = client.analyze_image(
    image_url,
    visual_features=['Categories', 'Description', 'Tags', 'Objects', 'Faces']
)

print(f"Description: {analysis.description.captions[0].text}")
print(f"Tags: {[tag.name for tag in analysis.tags]}")

# OCR - Extract text from images
ocr_result = client.read(image_url, raw=True)
operation_location = ocr_result.headers["Operation-Location"]
operation_id = operation_location.split("/")[-1]

# Wait for result
import time
while True:
    result = client.get_read_result(operation_id)
    if result.status not in ['notStarted', 'running']:
        break
    time.sleep(1)

# Extract text
for page in result.analyze_result.read_results:
    for line in page.lines:
        print(line.text)

Language

from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential

# Initialize client
client = TextAnalyticsClient(
    endpoint=endpoint,
    credential=AzureKeyCredential(key)
)

# Sentiment Analysis
documents = [
    "The product quality is excellent and the customer service was amazing!",
    "Disappointed with the delivery time. Very slow shipping.",
    "It's an okay product. Nothing special."
]

response = client.analyze_sentiment(documents)
for doc in response:
    print(f"Sentiment: {doc.sentiment}")
    print(f"Scores - Positive: {doc.confidence_scores.positive:.2f}, "
          f"Neutral: {doc.confidence_scores.neutral:.2f}, "
          f"Negative: {doc.confidence_scores.negative:.2f}")

# Entity Recognition
response = client.recognize_entities(documents)
for doc in response:
    for entity in doc.entities:
        print(f"Entity: {entity.text}, Category: {entity.category}, "
              f"Confidence: {entity.confidence_score:.2f}")

# Key Phrase Extraction
response = client.extract_key_phrases(documents)
for doc in response:
    print(f"Key phrases: {doc.key_phrases}")

Speech

import azure.cognitiveservices.speech as speechsdk

# Speech-to-Text
speech_config = speechsdk.SpeechConfig(
    subscription=speech_key,
    region=service_region
)
audio_config = speechsdk.audio.AudioConfig(filename="audio.wav")
speech_recognizer = speechsdk.SpeechRecognizer(
    speech_config=speech_config,
    audio_config=audio_config
)

result = speech_recognizer.recognize_once()
if result.reason == speechsdk.ResultReason.RecognizedSpeech:
    print(f"Recognized: {result.text}")

# Text-to-Speech
speech_config.speech_synthesis_voice_name = "en-US-JennyNeural"
synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)

result = synthesizer.speak_text_async("Hello, this is Azure Speech Service.").get()
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
    print("Speech synthesized successfully")

# Real-time transcription
def recognized_callback(evt):
    print(f"Recognized: {evt.result.text}")

speech_recognizer.recognized.connect(recognized_callback)
speech_recognizer.start_continuous_recognition()

Translator

import requests
import uuid

# Translate text
def translate_text(text, target_languages):
    path = '/translate'
    url = f"https://api.cognitive.microsofttranslator.com{path}"

    params = {
        'api-version': '3.0',
        'to': target_languages
    }

    headers = {
        'Ocp-Apim-Subscription-Key': translator_key,
        'Ocp-Apim-Subscription-Region': service_region,
        'Content-type': 'application/json',
        'X-ClientTraceId': str(uuid.uuid4())
    }

    body = [{'text': text}]

    response = requests.post(url, params=params, headers=headers, json=body)
    return response.json()

# Usage
result = translate_text(
    "Hello, how are you today?",
    ['es', 'fr', 'de', 'ja']
)

for translation in result[0]['translations']:
    print(f"{translation['to']}: {translation['text']}")

# Detect language
def detect_language(text):
    path = '/detect'
    url = f"https://api.cognitive.microsofttranslator.com{path}"

    params = {'api-version': '3.0'}
    headers = {
        'Ocp-Apim-Subscription-Key': translator_key,
        'Content-type': 'application/json'
    }
    body = [{'text': text}]

    response = requests.post(url, params=params, headers=headers, json=body)
    return response.json()

detected = detect_language("Bonjour, comment allez-vous?")
print(f"Language: {detected[0]['language']}, Confidence: {detected[0]['score']}")

Form Recognizer

from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.core.credentials import AzureKeyCredential

# Initialize client
client = DocumentAnalysisClient(
    endpoint=endpoint,
    credential=AzureKeyCredential(key)
)

# Analyze invoice
with open("invoice.pdf", "rb") as f:
    poller = client.begin_analyze_document("prebuilt-invoice", f)

result = poller.result()

for document in result.documents:
    print(f"Invoice number: {document.fields.get('InvoiceId').value}")
    print(f"Vendor: {document.fields.get('VendorName').value}")
    print(f"Total: {document.fields.get('InvoiceTotal').value}")

# Analyze receipts
with open("receipt.jpg", "rb") as f:
    poller = client.begin_analyze_document("prebuilt-receipt", f)

result = poller.result()
for document in result.documents:
    print(f"Merchant: {document.fields.get('MerchantName').value}")
    print(f"Total: {document.fields.get('Total').value}")

Building a Document Intelligence Solution

Combine multiple services for a complete solution:

from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.ai.textanalytics import TextAnalyticsClient
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
import json

class DocumentProcessor:
    def __init__(self, config):
        self.form_client = DocumentAnalysisClient(
            config['form_endpoint'],
            AzureKeyCredential(config['form_key'])
        )
        self.text_client = TextAnalyticsClient(
            config['text_endpoint'],
            AzureKeyCredential(config['text_key'])
        )

    def process_document(self, document_path):
        """Process a document through multiple cognitive services."""
        results = {}

        # Extract text and structure with Form Recognizer
        with open(document_path, "rb") as f:
            poller = self.form_client.begin_analyze_document(
                "prebuilt-document", f
            )
        form_result = poller.result()

        # Extract all text
        full_text = " ".join([
            page.content for page in form_result.pages
        ])
        results['extracted_text'] = full_text

        # Analyze sentiment
        sentiment = self.text_client.analyze_sentiment([full_text])[0]
        results['sentiment'] = {
            'overall': sentiment.sentiment,
            'scores': {
                'positive': sentiment.confidence_scores.positive,
                'neutral': sentiment.confidence_scores.neutral,
                'negative': sentiment.confidence_scores.negative
            }
        }

        # Extract entities
        entities = self.text_client.recognize_entities([full_text])[0]
        results['entities'] = [
            {
                'text': e.text,
                'category': e.category,
                'confidence': e.confidence_score
            }
            for e in entities.entities
        ]

        # Extract key phrases
        key_phrases = self.text_client.extract_key_phrases([full_text])[0]
        results['key_phrases'] = key_phrases.key_phrases

        return results

# Usage
config = {
    'form_endpoint': os.environ['FORM_ENDPOINT'],
    'form_key': os.environ['FORM_KEY'],
    'text_endpoint': os.environ['TEXT_ENDPOINT'],
    'text_key': os.environ['TEXT_KEY']
}

processor = DocumentProcessor(config)
results = processor.process_document("contract.pdf")

print(json.dumps(results, indent=2))

Deploying to Azure Functions

import azure.functions as func
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential
import json
import os

# Initialize client
client = TextAnalyticsClient(
    endpoint=os.environ['TEXT_ANALYTICS_ENDPOINT'],
    credential=AzureKeyCredential(os.environ['TEXT_ANALYTICS_KEY'])
)

def main(req: func.HttpRequest) -> func.HttpResponse:
    try:
        body = req.get_json()
        text = body.get('text')

        if not text:
            return func.HttpResponse(
                json.dumps({'error': 'Text is required'}),
                status_code=400
            )

        # Analyze text
        sentiment = client.analyze_sentiment([text])[0]
        entities = client.recognize_entities([text])[0]
        key_phrases = client.extract_key_phrases([text])[0]

        result = {
            'sentiment': sentiment.sentiment,
            'confidence': {
                'positive': sentiment.confidence_scores.positive,
                'negative': sentiment.confidence_scores.negative,
                'neutral': sentiment.confidence_scores.neutral
            },
            'entities': [
                {'text': e.text, 'category': e.category}
                for e in entities.entities
            ],
            'key_phrases': key_phrases.key_phrases
        }

        return func.HttpResponse(
            json.dumps(result),
            mimetype='application/json'
        )

    except Exception as e:
        return func.HttpResponse(
            json.dumps({'error': str(e)}),
            status_code=500
        )

Best Practices

Handle rate limits - Implement retry logic with exponential backoff
Batch requests - Process multiple documents in single API calls
Cache results - Store analysis results to avoid redundant calls
Monitor costs - Use Azure Cost Management to track usage
Secure credentials - Use Azure Key Vault for API keys

Cost Optimization

Tip	Benefit
Use free tier for dev	5,000 transactions/month free
Batch processing	Reduce number of API calls
Choose right tier	Match capacity to workload
Cache results	Avoid duplicate processing

Conclusion

Azure Cognitive Services enables developers to add sophisticated AI capabilities without deep machine learning expertise. The pre-built models handle common scenarios effectively, while custom training options allow for domain-specific optimization. Combined with Azure’s infrastructure, you can build scalable, intelligent applications.