Back to Blog
4 min read

Conversation Summarization: Managing Long Chat Histories

As conversations grow, managing context becomes challenging. Today we explore techniques for summarizing conversations to fit within context limits.

The Challenge

challenges = {
    "context_limit": "Models have finite context windows",
    "cost": "Longer contexts = higher costs",
    "relevance": "Old messages may be less relevant",
    "coherence": "Need to maintain conversation flow"
}

Basic Summarization

from openai import AzureOpenAI

class ConversationSummarizer:
    def __init__(self, client):
        self.client = client

    def summarize(self, messages, max_tokens=500):
        """Summarize a conversation history."""
        conversation_text = self._format_messages(messages)

        prompt = f"""Summarize the following conversation, capturing:
1. Main topics discussed
2. Key decisions or conclusions
3. Important context for continuation

Conversation:
{conversation_text}

Summary:"""

        response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=max_tokens
        )

        return response.choices[0].message.content

    def _format_messages(self, messages):
        formatted = []
        for msg in messages:
            role = msg["role"].capitalize()
            content = msg["content"]
            formatted.append(f"{role}: {content}")
        return "\n\n".join(formatted)

Rolling Summary

class RollingSummary:
    """Maintain a rolling summary of conversation."""

    def __init__(self, client, summary_threshold=10):
        self.client = client
        self.summarizer = ConversationSummarizer(client)
        self.threshold = summary_threshold
        self.summary = ""
        self.recent_messages = []

    def add_message(self, message):
        self.recent_messages.append(message)

        if len(self.recent_messages) >= self.threshold:
            self._update_summary()

    def _update_summary(self):
        # Combine old summary with messages to summarize
        messages_to_summarize = self.recent_messages[:self.threshold // 2]

        if self.summary:
            # Include previous summary as context
            combined = f"Previous summary: {self.summary}\n\nNew messages:\n"
            combined += self.summarizer._format_messages(messages_to_summarize)
        else:
            combined = self.summarizer._format_messages(messages_to_summarize)

        # Generate new summary
        self.summary = self.summarizer.summarize(
            [{"role": "user", "content": combined}]
        )

        # Keep only recent messages
        self.recent_messages = self.recent_messages[self.threshold // 2:]

    def get_context(self):
        """Get summary + recent messages for context."""
        context = []

        if self.summary:
            context.append({
                "role": "system",
                "content": f"Conversation summary: {self.summary}"
            })

        context.extend(self.recent_messages)
        return context

Hierarchical Summarization

class HierarchicalSummarizer:
    """Multi-level summarization for very long conversations."""

    def __init__(self, client):
        self.client = client
        self.hourly_summaries = []
        self.daily_summary = ""
        self.current_hour_messages = []

    def add_message(self, message, timestamp):
        self.current_hour_messages.append({
            **message,
            "timestamp": timestamp
        })

        # Summarize hourly
        if self._new_hour(timestamp):
            self._summarize_hour()

        # Summarize daily
        if self._new_day(timestamp):
            self._summarize_day()

    def _summarize_hour(self):
        if self.current_hour_messages:
            summary = self._generate_summary(self.current_hour_messages, "hour")
            self.hourly_summaries.append(summary)
            self.current_hour_messages = []

    def _summarize_day(self):
        if self.hourly_summaries:
            # Summarize all hourly summaries
            combined = "\n\n".join(self.hourly_summaries)
            self.daily_summary = self._generate_summary(
                [{"role": "system", "content": combined}],
                "day"
            )
            self.hourly_summaries = []

    def get_context(self, detail_level="recent"):
        """Get appropriate context based on detail level."""
        if detail_level == "full":
            return self.daily_summary + "\n" + "\n".join(self.hourly_summaries)
        elif detail_level == "recent":
            recent = self.hourly_summaries[-2:] if self.hourly_summaries else []
            return "\n".join(recent) + self._format_messages(self.current_hour_messages[-5:])
        else:
            return self.daily_summary

Topic-Aware Summarization

class TopicAwareSummarizer:
    """Summarize by topic clusters."""

    def __init__(self, client):
        self.client = client
        self.model = SentenceTransformer("all-MiniLM-L6-v2")

    def summarize_by_topics(self, messages, num_topics=3):
        # Cluster messages by topic
        topics = self._cluster_messages(messages, num_topics)

        summaries = {}
        for topic_id, topic_messages in topics.items():
            summaries[topic_id] = self._summarize_topic(topic_messages)

        return summaries

    def _cluster_messages(self, messages, num_topics):
        from sklearn.cluster import KMeans

        texts = [m["content"] for m in messages]
        embeddings = self.model.encode(texts)

        kmeans = KMeans(n_clusters=num_topics, random_state=42)
        labels = kmeans.fit_predict(embeddings)

        topics = {}
        for i, msg in enumerate(messages):
            label = labels[i]
            if label not in topics:
                topics[label] = []
            topics[label].append(msg)

        return topics

    def _summarize_topic(self, messages):
        text = "\n".join([m["content"] for m in messages])
        prompt = f"Summarize this discussion topic:\n{text}"

        response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=200
        )
        return response.choices[0].message.content

Selective Memory

class SelectiveMemory:
    """Keep important messages, summarize others."""

    def __init__(self, client):
        self.client = client
        self.important_messages = []
        self.summarized_context = ""

    def add_message(self, message):
        importance = self._score_importance(message)

        if importance > 0.7:
            self.important_messages.append(message)
        else:
            # Add to summary queue
            self._update_summary(message)

    def _score_importance(self, message):
        """Score message importance (0-1)."""
        indicators = {
            "decision": 0.3,
            "action": 0.3,
            "question": 0.2,
            "agreement": 0.2
        }

        score = 0
        content = message["content"].lower()

        if any(word in content for word in ["decided", "conclude", "final"]):
            score += indicators["decision"]
        if any(word in content for word in ["will", "shall", "must", "need to"]):
            score += indicators["action"]
        if "?" in content:
            score += indicators["question"]

        return min(1.0, score)

    def get_context(self):
        context = []
        if self.summarized_context:
            context.append({
                "role": "system",
                "content": f"Background: {self.summarized_context}"
            })
        context.extend(self.important_messages[-10:])
        return context

Tomorrow we’ll explore memory management patterns for LLM applications.

Resources

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.