Layer 2: Memory

Overview

The Memory Layer (L2) manages all state and context for an AI agent, providing short-term memory (context windows), long-term persistence, and knowledge storage capabilities. It’s built on top of the Runtime Layer and serves as the foundation for agent cognition.

Responsibilities

window Context Management

Manage conversation context and working memory windows

floppy-disk State Persistence

Store and retrieve agent state across sessions

brain Knowledge Storage

Vector databases, knowledge graphs, embeddings

clock-rotate-left Memory Lifecycle

Retention policies, garbage collection, archival

Architecture

Memory Types

1. Short-Term Memory (Context Window)

Maintains recent conversation history and working memory.

interface ShortTermMemory {
  // Add to context
  addMessage(role: string, content: string): void

  // Retrieve context
  getContext(maxTokens?: number): Message[]

  // Manage context window
  prune(strategy: PruneStrategy): void
  clear(): void

  // Context stats
  getTokenCount(): number
  isFull(): boolean
}

class ContextWindow implements ShortTermMemory {
  private messages: Message[] = []
  private maxTokens: number = 4096

  addMessage(role: string, content: string): void {
    this.messages.push({ role, content, timestamp: Date.now() })

    // Auto-prune if over limit
    if (this.getTokenCount() > this.maxTokens) {
      this.prune('fifo')
    }
  }

  getContext(maxTokens?: number): Message[] {
    const limit = maxTokens || this.maxTokens
    let tokenCount = 0
    const context: Message[] = []

    // Get most recent messages that fit
    for (let i = this.messages.length - 1; i >= 0; i--) {
      const msg = this.messages[i]
      const msgTokens = this.estimateTokens(msg.content)

      if (tokenCount + msgTokens > limit) break

      context.unshift(msg)
      tokenCount += msgTokens
    }

    return context
  }

  prune(strategy: PruneStrategy): void {
    switch (strategy) {
      case 'fifo':
        // Remove oldest messages
        while (this.getTokenCount() > this.maxTokens) {
          this.messages.shift()
        }
        break

      case 'summarize':
        // Summarize old context
        const oldContext = this.messages.slice(0, -10)
        const summary = this.summarize(oldContext)
        this.messages = [summary, ...this.messages.slice(-10)]
        break

      case 'semantic':
        // Keep semantically important messages
        this.messages = this.rankByImportance(this.messages)
          .slice(0, this.calculateMaxMessages())
        break
    }
  }
}

from typing import List, Dict, Optional
from enum import Enum

class PruneStrategy(Enum):
    FIFO = "fifo"
    SUMMARIZE = "summarize"
    SEMANTIC = "semantic"

class ShortTermMemory:
    """Context window management"""

    def __init__(self, max_tokens: int = 4096):
        self.messages: List[Dict] = []
        self.max_tokens = max_tokens

    def add_message(self, role: str, content: str) -> None:
        """Add message to context"""
        self.messages.append({
            'role': role,
            'content': content,
            'timestamp': time.time()
        })

        # Auto-prune if over limit
        if self.get_token_count() > self.max_tokens:
            self.prune(PruneStrategy.FIFO)

    def get_context(self, max_tokens: Optional[int] = None) -> List[Dict]:
        """Get context within token limit"""
        limit = max_tokens or self.max_tokens
        token_count = 0
        context = []

        # Get most recent messages that fit
        for msg in reversed(self.messages):
            msg_tokens = self.estimate_tokens(msg['content'])

            if token_count + msg_tokens > limit:
                break

            context.insert(0, msg)
            token_count += msg_tokens

        return context

    def prune(self, strategy: PruneStrategy) -> None:
        """Prune context using strategy"""
        if strategy == PruneStrategy.FIFO:
            # Remove oldest messages
            while self.get_token_count() > self.max_tokens:
                self.messages.pop(0)

        elif strategy == PruneStrategy.SUMMARIZE:
            # Summarize old context
            old_context = self.messages[:-10]
            summary = self.summarize(old_context)
            self.messages = [summary] + self.messages[-10:]

        elif strategy == PruneStrategy.SEMANTIC:
            # Keep semantically important messages
            ranked = self.rank_by_importance(self.messages)
            max_count = self.calculate_max_messages()
            self.messages = ranked[:max_count]

2. Long-Term Memory (Persistence)

Stores agent state, conversation history, and learned knowledge permanently.

{
  "memory": {
    "persistence": "postgresql",
    "config": {
      "host": "localhost",
      "port": 5432,
      "database": "agent_memory",
      "retention": {
        "conversations": "30d",
        "facts": "1y",
        "preferences": "permanent"
      }
    },
    "schema": {
      "conversations": {
        "id": "uuid",
        "agent_id": "string",
        "user_id": "string",
        "messages": "jsonb",
        "created_at": "timestamp",
        "updated_at": "timestamp"
      },
      "facts": {
        "id": "uuid",
        "content": "text",
        "confidence": "float",
        "source": "string",
        "created_at": "timestamp"
      }
    }
  }
}

comments Conversation History

interface ConversationStore {
  // Save conversation
  saveConversation(
    agentId: string,
    userId: string,
    messages: Message[]
  ): Promise<string>  // Returns conversation ID

  // Retrieve conversation
  getConversation(id: string): Promise<Conversation>

  // Search conversations
  searchConversations(query: SearchQuery): Promise<Conversation[]>

  // Delete old conversations
  pruneConversations(olderThan: Date): Promise<number>
}

lightbulb Learned Facts

interface FactStore {
  // Store fact
  storeFact(fact: Fact): Promise<string>

  // Retrieve facts
  getFactsAbout(subject: string): Promise<Fact[]>

  // Update confidence
  updateConfidence(factId: string, confidence: number): Promise<void>

  // Forget outdated facts
  forgetFacts(filter: FactFilter): Promise<number>
}

interface Fact {
  id: string
  content: string
  confidence: number  // 0.0 to 1.0
  source: string
  timestamp: Date
  metadata: Record<string, any>
}

user-gear User Preferences

interface PreferenceStore {
  // Set preference
  setPreference(userId: string, key: string, value: any): Promise<void>

  // Get preference
  getPreference(userId: string, key: string): Promise<any>

  // Get all preferences
  getAllPreferences(userId: string): Promise<Record<string, any>>

  // Delete preference
  deletePreference(userId: string, key: string): Promise<void>
}

3. Vector Store (Semantic Memory)

Stores embeddings for semantic search and retrieval.

interface VectorStore {
  // Upsert embeddings
  upsert(vectors: Vector[]): Promise<void>

  // Semantic search
  search(query: number[], topK: number): Promise<SearchResult[]>

  // Delete vectors
  delete(ids: string[]): Promise<void>

  // Get stats
  getStats(): Promise<VectorStats>
}

interface Vector {
  id: string
  values: number[]       // Embedding vector
  metadata: {
    text: string
    timestamp: Date
    source: string
  }
}

// Example: Using Pinecone
class PineconeVectorStore implements VectorStore {
  async upsert(vectors: Vector[]): Promise<void> {
    await this.index.upsert({
      vectors: vectors.map(v => ({
        id: v.id,
        values: v.values,
        metadata: v.metadata
      }))
    })
  }

  async search(query: number[], topK: number): Promise<SearchResult[]> {
    const results = await this.index.query({
      vector: query,
      topK,
      includeMetadata: true
    })

    return results.matches.map(match => ({
      id: match.id,
      score: match.score,
      text: match.metadata.text,
      metadata: match.metadata
    }))
  }
}

4. Knowledge Graph

Structured knowledge representation with entities and relationships.

interface KnowledgeGraph {
  // Add entities
  addEntity(entity: Entity): Promise<string>

  // Add relationships
  addRelationship(from: string, to: string, type: string): Promise<void>

  // Query graph
  query(cypher: string): Promise<QueryResult[]>

  // Get neighbors
  getNeighbors(entityId: string, depth: number): Promise<Entity[]>
}

// Example: Neo4j query
const findUserPreferences = `
  MATCH (u:User {id: $userId})-[:PREFERS]->(p)
  RETURN p.name, p.value
`

Configuration Example

{
  "layers": {
    "memory": {
      "shortTerm": {
        "type": "context-window",
        "maxTokens": 4096,
        "pruneStrategy": "summarize"
      },
      "longTerm": {
        "type": "postgresql",
        "connection": {
          "host": "localhost",
          "port": 5432,
          "database": "agent_memory"
        },
        "retention": {
          "conversations": "30d",
          "facts": "1y"
        }
      },
      "vectorStore": {
        "type": "pinecone",
        "dimension": 1536,
        "metric": "cosine",
        "index": "agent-memory"
      },
      "knowledgeGraph": {
        "type": "neo4j",
        "connection": {
          "uri": "bolt://localhost:7687"
        }
      }
    }
  }
}

Best Practices

window Context Management

✅ DO:

Keep context within token limits
Summarize old context intelligently
Prioritize recent and important messages
Monitor token usage

❌ DON’T:

Let context grow unbounded
Lose important context
Include irrelevant history

database Data Persistence

✅ DO:

Use appropriate storage for each data type
Implement backup strategies
Encrypt sensitive data
Set retention policies

❌ DON’T:

Store everything in memory
Keep data indefinitely
Store plaintext secrets

magnifying-glass Vector Search

✅ DO:

Use appropriate embedding models
Batch upsert operations
Set reasonable similarity thresholds
Cache frequently accessed vectors

❌ DON’T:

Use low-quality embeddings
Perform individual upserts
Return too many results

Performance Tips

class CachedMemoryLayer {
  private cache = new LRUCache({ max: 1000 })

  async getContext(userId: string): Promise<Context> {
    // Check cache first
    const cached = this.cache.get(userId)
    if (cached) return cached

    // Fetch from database
    const context = await this.db.getContext(userId)

    // Cache result
    this.cache.set(userId, context)

    return context
  }
}

class BatchedVectorStore {
  private batch: Vector[] = []
  private batchSize = 100

  async upsert(vector: Vector): Promise<void> {
    this.batch.push(vector)

    if (this.batch.length >= this.batchSize) {
      await this.flush()
    }
  }

  async flush(): Promise<void> {
    if (this.batch.length === 0) return

    await this.vectorStore.upsert(this.batch)
    this.batch = []
  }
}

-- Index for fast conversation lookup
CREATE INDEX idx_conversations_user_agent
ON conversations(user_id, agent_id, created_at DESC);

-- Index for fact search
CREATE INDEX idx_facts_content_gin
ON facts USING gin(to_tsvector('english', content));

-- Partial index for active conversations
CREATE INDEX idx_active_conversations
ON conversations(updated_at)
WHERE updated_at > NOW() - INTERVAL '24 hours';

Next Steps

wand-magic-sparkles Layer 3: Capabilities

Add actions the agent can perform

database Memory Patterns

Advanced memory implementations

magnifying-glass Coming Soon

Choose the right vector store

book Specification

Formal memory requirements