Back to Blog
5 min read

Azure Cognitive Services: Building Intelligent Applications

Azure Cognitive Services provides pre-built AI capabilities that developers can integrate into applications without deep machine learning expertise. As Microsoft continues to expand these services, let’s explore how to build intelligent applications using the current offerings.

Why Azure Cognitive Services?

Instead of training custom models from scratch, Cognitive Services offers:

  • Pre-trained models - Ready to use immediately
  • API-first design - Simple REST APIs and SDKs
  • Enterprise security - Azure compliance and data protection
  • Scalability - Handle production workloads
  • Continuous improvement - Microsoft updates models regularly

Service Categories

Vision

from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from msrest.authentication import CognitiveServicesCredentials

# Initialize client
credentials = CognitiveServicesCredentials(subscription_key)
client = ComputerVisionClient(endpoint, credentials)

# Analyze image
analysis = client.analyze_image(
    image_url,
    visual_features=['Categories', 'Description', 'Tags', 'Objects', 'Faces']
)

print(f"Description: {analysis.description.captions[0].text}")
print(f"Tags: {[tag.name for tag in analysis.tags]}")

# OCR - Extract text from images
ocr_result = client.read(image_url, raw=True)
operation_location = ocr_result.headers["Operation-Location"]
operation_id = operation_location.split("/")[-1]

# Wait for result
import time
while True:
    result = client.get_read_result(operation_id)
    if result.status not in ['notStarted', 'running']:
        break
    time.sleep(1)

# Extract text
for page in result.analyze_result.read_results:
    for line in page.lines:
        print(line.text)

Language

from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential

# Initialize client
client = TextAnalyticsClient(
    endpoint=endpoint,
    credential=AzureKeyCredential(key)
)

# Sentiment Analysis
documents = [
    "The product quality is excellent and the customer service was amazing!",
    "Disappointed with the delivery time. Very slow shipping.",
    "It's an okay product. Nothing special."
]

response = client.analyze_sentiment(documents)
for doc in response:
    print(f"Sentiment: {doc.sentiment}")
    print(f"Scores - Positive: {doc.confidence_scores.positive:.2f}, "
          f"Neutral: {doc.confidence_scores.neutral:.2f}, "
          f"Negative: {doc.confidence_scores.negative:.2f}")

# Entity Recognition
response = client.recognize_entities(documents)
for doc in response:
    for entity in doc.entities:
        print(f"Entity: {entity.text}, Category: {entity.category}, "
              f"Confidence: {entity.confidence_score:.2f}")

# Key Phrase Extraction
response = client.extract_key_phrases(documents)
for doc in response:
    print(f"Key phrases: {doc.key_phrases}")

Speech

import azure.cognitiveservices.speech as speechsdk

# Speech-to-Text
speech_config = speechsdk.SpeechConfig(
    subscription=speech_key,
    region=service_region
)
audio_config = speechsdk.audio.AudioConfig(filename="audio.wav")
speech_recognizer = speechsdk.SpeechRecognizer(
    speech_config=speech_config,
    audio_config=audio_config
)

result = speech_recognizer.recognize_once()
if result.reason == speechsdk.ResultReason.RecognizedSpeech:
    print(f"Recognized: {result.text}")

# Text-to-Speech
speech_config.speech_synthesis_voice_name = "en-US-JennyNeural"
synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)

result = synthesizer.speak_text_async("Hello, this is Azure Speech Service.").get()
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
    print("Speech synthesized successfully")

# Real-time transcription
def recognized_callback(evt):
    print(f"Recognized: {evt.result.text}")

speech_recognizer.recognized.connect(recognized_callback)
speech_recognizer.start_continuous_recognition()

Translator

import requests
import uuid

# Translate text
def translate_text(text, target_languages):
    path = '/translate'
    url = f"https://api.cognitive.microsofttranslator.com{path}"

    params = {
        'api-version': '3.0',
        'to': target_languages
    }

    headers = {
        'Ocp-Apim-Subscription-Key': translator_key,
        'Ocp-Apim-Subscription-Region': service_region,
        'Content-type': 'application/json',
        'X-ClientTraceId': str(uuid.uuid4())
    }

    body = [{'text': text}]

    response = requests.post(url, params=params, headers=headers, json=body)
    return response.json()

# Usage
result = translate_text(
    "Hello, how are you today?",
    ['es', 'fr', 'de', 'ja']
)

for translation in result[0]['translations']:
    print(f"{translation['to']}: {translation['text']}")

# Detect language
def detect_language(text):
    path = '/detect'
    url = f"https://api.cognitive.microsofttranslator.com{path}"

    params = {'api-version': '3.0'}
    headers = {
        'Ocp-Apim-Subscription-Key': translator_key,
        'Content-type': 'application/json'
    }
    body = [{'text': text}]

    response = requests.post(url, params=params, headers=headers, json=body)
    return response.json()

detected = detect_language("Bonjour, comment allez-vous?")
print(f"Language: {detected[0]['language']}, Confidence: {detected[0]['score']}")

Form Recognizer

from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.core.credentials import AzureKeyCredential

# Initialize client
client = DocumentAnalysisClient(
    endpoint=endpoint,
    credential=AzureKeyCredential(key)
)

# Analyze invoice
with open("invoice.pdf", "rb") as f:
    poller = client.begin_analyze_document("prebuilt-invoice", f)

result = poller.result()

for document in result.documents:
    print(f"Invoice number: {document.fields.get('InvoiceId').value}")
    print(f"Vendor: {document.fields.get('VendorName').value}")
    print(f"Total: {document.fields.get('InvoiceTotal').value}")

# Analyze receipts
with open("receipt.jpg", "rb") as f:
    poller = client.begin_analyze_document("prebuilt-receipt", f)

result = poller.result()
for document in result.documents:
    print(f"Merchant: {document.fields.get('MerchantName').value}")
    print(f"Total: {document.fields.get('Total').value}")

Building a Document Intelligence Solution

Combine multiple services for a complete solution:

from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.ai.textanalytics import TextAnalyticsClient
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
import json

class DocumentProcessor:
    def __init__(self, config):
        self.form_client = DocumentAnalysisClient(
            config['form_endpoint'],
            AzureKeyCredential(config['form_key'])
        )
        self.text_client = TextAnalyticsClient(
            config['text_endpoint'],
            AzureKeyCredential(config['text_key'])
        )

    def process_document(self, document_path):
        """Process a document through multiple cognitive services."""
        results = {}

        # Extract text and structure with Form Recognizer
        with open(document_path, "rb") as f:
            poller = self.form_client.begin_analyze_document(
                "prebuilt-document", f
            )
        form_result = poller.result()

        # Extract all text
        full_text = " ".join([
            page.content for page in form_result.pages
        ])
        results['extracted_text'] = full_text

        # Analyze sentiment
        sentiment = self.text_client.analyze_sentiment([full_text])[0]
        results['sentiment'] = {
            'overall': sentiment.sentiment,
            'scores': {
                'positive': sentiment.confidence_scores.positive,
                'neutral': sentiment.confidence_scores.neutral,
                'negative': sentiment.confidence_scores.negative
            }
        }

        # Extract entities
        entities = self.text_client.recognize_entities([full_text])[0]
        results['entities'] = [
            {
                'text': e.text,
                'category': e.category,
                'confidence': e.confidence_score
            }
            for e in entities.entities
        ]

        # Extract key phrases
        key_phrases = self.text_client.extract_key_phrases([full_text])[0]
        results['key_phrases'] = key_phrases.key_phrases

        return results

# Usage
config = {
    'form_endpoint': os.environ['FORM_ENDPOINT'],
    'form_key': os.environ['FORM_KEY'],
    'text_endpoint': os.environ['TEXT_ENDPOINT'],
    'text_key': os.environ['TEXT_KEY']
}

processor = DocumentProcessor(config)
results = processor.process_document("contract.pdf")

print(json.dumps(results, indent=2))

Deploying to Azure Functions

import azure.functions as func
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential
import json
import os

# Initialize client
client = TextAnalyticsClient(
    endpoint=os.environ['TEXT_ANALYTICS_ENDPOINT'],
    credential=AzureKeyCredential(os.environ['TEXT_ANALYTICS_KEY'])
)

def main(req: func.HttpRequest) -> func.HttpResponse:
    try:
        body = req.get_json()
        text = body.get('text')

        if not text:
            return func.HttpResponse(
                json.dumps({'error': 'Text is required'}),
                status_code=400
            )

        # Analyze text
        sentiment = client.analyze_sentiment([text])[0]
        entities = client.recognize_entities([text])[0]
        key_phrases = client.extract_key_phrases([text])[0]

        result = {
            'sentiment': sentiment.sentiment,
            'confidence': {
                'positive': sentiment.confidence_scores.positive,
                'negative': sentiment.confidence_scores.negative,
                'neutral': sentiment.confidence_scores.neutral
            },
            'entities': [
                {'text': e.text, 'category': e.category}
                for e in entities.entities
            ],
            'key_phrases': key_phrases.key_phrases
        }

        return func.HttpResponse(
            json.dumps(result),
            mimetype='application/json'
        )

    except Exception as e:
        return func.HttpResponse(
            json.dumps({'error': str(e)}),
            status_code=500
        )

Best Practices

  1. Handle rate limits - Implement retry logic with exponential backoff
  2. Batch requests - Process multiple documents in single API calls
  3. Cache results - Store analysis results to avoid redundant calls
  4. Monitor costs - Use Azure Cost Management to track usage
  5. Secure credentials - Use Azure Key Vault for API keys

Cost Optimization

TipBenefit
Use free tier for dev5,000 transactions/month free
Batch processingReduce number of API calls
Choose right tierMatch capacity to workload
Cache resultsAvoid duplicate processing

Conclusion

Azure Cognitive Services enables developers to add sophisticated AI capabilities without deep machine learning expertise. The pre-built models handle common scenarios effectively, while custom training options allow for domain-specific optimization. Combined with Azure’s infrastructure, you can build scalable, intelligent applications.

Resources

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.