Layer 2: Memory
Overview
Section titled “Overview”The Memory Layer (L2) manages all state and context for an AI agent, providing short-term memory (context windows), long-term persistence, and knowledge storage capabilities. It’s built on top of the Runtime Layer and serves as the foundation for agent cognition.
Responsibilities
Section titled “Responsibilities”window Context Management
Manage conversation context and working memory windows
floppy-disk State Persistence
Store and retrieve agent state across sessions
brain Knowledge Storage
Vector databases, knowledge graphs, embeddings
clock-rotate-left Memory Lifecycle
Retention policies, garbage collection, archival
Architecture
Section titled “Architecture”Memory Types
Section titled “Memory Types”1. Short-Term Memory (Context Window)
Section titled “1. Short-Term Memory (Context Window)”Maintains recent conversation history and working memory.
interface ShortTermMemory { // Add to context addMessage(role: string, content: string): void
// Retrieve context getContext(maxTokens?: number): Message[]
// Manage context window prune(strategy: PruneStrategy): void clear(): void
// Context stats getTokenCount(): number isFull(): boolean}
class ContextWindow implements ShortTermMemory { private messages: Message[] = [] private maxTokens: number = 4096
addMessage(role: string, content: string): void { this.messages.push({ role, content, timestamp: Date.now() })
// Auto-prune if over limit if (this.getTokenCount() > this.maxTokens) { this.prune('fifo') } }
getContext(maxTokens?: number): Message[] { const limit = maxTokens || this.maxTokens let tokenCount = 0 const context: Message[] = []
// Get most recent messages that fit for (let i = this.messages.length - 1; i >= 0; i--) { const msg = this.messages[i] const msgTokens = this.estimateTokens(msg.content)
if (tokenCount + msgTokens > limit) break
context.unshift(msg) tokenCount += msgTokens }
return context }
prune(strategy: PruneStrategy): void { switch (strategy) { case 'fifo': // Remove oldest messages while (this.getTokenCount() > this.maxTokens) { this.messages.shift() } break
case 'summarize': // Summarize old context const oldContext = this.messages.slice(0, -10) const summary = this.summarize(oldContext) this.messages = [summary, ...this.messages.slice(-10)] break
case 'semantic': // Keep semantically important messages this.messages = this.rankByImportance(this.messages) .slice(0, this.calculateMaxMessages()) break } }}from typing import List, Dict, Optionalfrom enum import Enum
class PruneStrategy(Enum): FIFO = "fifo" SUMMARIZE = "summarize" SEMANTIC = "semantic"
class ShortTermMemory: """Context window management"""
def __init__(self, max_tokens: int = 4096): self.messages: List[Dict] = [] self.max_tokens = max_tokens
def add_message(self, role: str, content: str) -> None: """Add message to context""" self.messages.append({ 'role': role, 'content': content, 'timestamp': time.time() })
# Auto-prune if over limit if self.get_token_count() > self.max_tokens: self.prune(PruneStrategy.FIFO)
def get_context(self, max_tokens: Optional[int] = None) -> List[Dict]: """Get context within token limit""" limit = max_tokens or self.max_tokens token_count = 0 context = []
# Get most recent messages that fit for msg in reversed(self.messages): msg_tokens = self.estimate_tokens(msg['content'])
if token_count + msg_tokens > limit: break
context.insert(0, msg) token_count += msg_tokens
return context
def prune(self, strategy: PruneStrategy) -> None: """Prune context using strategy""" if strategy == PruneStrategy.FIFO: # Remove oldest messages while self.get_token_count() > self.max_tokens: self.messages.pop(0)
elif strategy == PruneStrategy.SUMMARIZE: # Summarize old context old_context = self.messages[:-10] summary = self.summarize(old_context) self.messages = [summary] + self.messages[-10:]
elif strategy == PruneStrategy.SEMANTIC: # Keep semantically important messages ranked = self.rank_by_importance(self.messages) max_count = self.calculate_max_messages() self.messages = ranked[:max_count]2. Long-Term Memory (Persistence)
Section titled “2. Long-Term Memory (Persistence)”Stores agent state, conversation history, and learned knowledge permanently.
{ "memory": { "persistence": "postgresql", "config": { "host": "localhost", "port": 5432, "database": "agent_memory", "retention": { "conversations": "30d", "facts": "1y", "preferences": "permanent" } }, "schema": { "conversations": { "id": "uuid", "agent_id": "string", "user_id": "string", "messages": "jsonb", "created_at": "timestamp", "updated_at": "timestamp" }, "facts": { "id": "uuid", "content": "text", "confidence": "float", "source": "string", "created_at": "timestamp" } } }}Conversation History
interface ConversationStore { // Save conversation saveConversation( agentId: string, userId: string, messages: Message[] ): Promise<string> // Returns conversation ID
// Retrieve conversation getConversation(id: string): Promise<Conversation>
// Search conversations searchConversations(query: SearchQuery): Promise<Conversation[]>
// Delete old conversations pruneConversations(olderThan: Date): Promise<number>}Learned Facts
interface FactStore { // Store fact storeFact(fact: Fact): Promise<string>
// Retrieve facts getFactsAbout(subject: string): Promise<Fact[]>
// Update confidence updateConfidence(factId: string, confidence: number): Promise<void>
// Forget outdated facts forgetFacts(filter: FactFilter): Promise<number>}
interface Fact { id: string content: string confidence: number // 0.0 to 1.0 source: string timestamp: Date metadata: Record<string, any>}User Preferences
interface PreferenceStore { // Set preference setPreference(userId: string, key: string, value: any): Promise<void>
// Get preference getPreference(userId: string, key: string): Promise<any>
// Get all preferences getAllPreferences(userId: string): Promise<Record<string, any>>
// Delete preference deletePreference(userId: string, key: string): Promise<void>}3. Vector Store (Semantic Memory)
Section titled “3. Vector Store (Semantic Memory)”Stores embeddings for semantic search and retrieval.
interface VectorStore { // Upsert embeddings upsert(vectors: Vector[]): Promise<void>
// Semantic search search(query: number[], topK: number): Promise<SearchResult[]>
// Delete vectors delete(ids: string[]): Promise<void>
// Get stats getStats(): Promise<VectorStats>}
interface Vector { id: string values: number[] // Embedding vector metadata: { text: string timestamp: Date source: string }}
// Example: Using Pineconeclass PineconeVectorStore implements VectorStore { async upsert(vectors: Vector[]): Promise<void> { await this.index.upsert({ vectors: vectors.map(v => ({ id: v.id, values: v.values, metadata: v.metadata })) }) }
async search(query: number[], topK: number): Promise<SearchResult[]> { const results = await this.index.query({ vector: query, topK, includeMetadata: true })
return results.matches.map(match => ({ id: match.id, score: match.score, text: match.metadata.text, metadata: match.metadata })) }}4. Knowledge Graph
Section titled “4. Knowledge Graph”Structured knowledge representation with entities and relationships.
interface KnowledgeGraph { // Add entities addEntity(entity: Entity): Promise<string>
// Add relationships addRelationship(from: string, to: string, type: string): Promise<void>
// Query graph query(cypher: string): Promise<QueryResult[]>
// Get neighbors getNeighbors(entityId: string, depth: number): Promise<Entity[]>}
// Example: Neo4j queryconst findUserPreferences = ` MATCH (u:User {id: $userId})-[:PREFERS]->(p) RETURN p.name, p.value`Configuration Example
Section titled “Configuration Example”{ "layers": { "memory": { "shortTerm": { "type": "context-window", "maxTokens": 4096, "pruneStrategy": "summarize" }, "longTerm": { "type": "postgresql", "connection": { "host": "localhost", "port": 5432, "database": "agent_memory" }, "retention": { "conversations": "30d", "facts": "1y" } }, "vectorStore": { "type": "pinecone", "dimension": 1536, "metric": "cosine", "index": "agent-memory" }, "knowledgeGraph": { "type": "neo4j", "connection": { "uri": "bolt://localhost:7687" } } } }}Best Practices
Section titled “Best Practices”Context Management
✅ DO:
- Keep context within token limits
- Summarize old context intelligently
- Prioritize recent and important messages
- Monitor token usage
❌ DON’T:
- Let context grow unbounded
- Lose important context
- Include irrelevant history
Data Persistence
✅ DO:
- Use appropriate storage for each data type
- Implement backup strategies
- Encrypt sensitive data
- Set retention policies
❌ DON’T:
- Store everything in memory
- Keep data indefinitely
- Store plaintext secrets
Vector Search
✅ DO:
- Use appropriate embedding models
- Batch upsert operations
- Set reasonable similarity thresholds
- Cache frequently accessed vectors
❌ DON’T:
- Use low-quality embeddings
- Perform individual upserts
- Return too many results
Performance Tips
Section titled “Performance Tips”class CachedMemoryLayer { private cache = new LRUCache({ max: 1000 })
async getContext(userId: string): Promise<Context> { // Check cache first const cached = this.cache.get(userId) if (cached) return cached
// Fetch from database const context = await this.db.getContext(userId)
// Cache result this.cache.set(userId, context)
return context }}class BatchedVectorStore { private batch: Vector[] = [] private batchSize = 100
async upsert(vector: Vector): Promise<void> { this.batch.push(vector)
if (this.batch.length >= this.batchSize) { await this.flush() } }
async flush(): Promise<void> { if (this.batch.length === 0) return
await this.vectorStore.upsert(this.batch) this.batch = [] }}-- Index for fast conversation lookupCREATE INDEX idx_conversations_user_agentON conversations(user_id, agent_id, created_at DESC);
-- Index for fact searchCREATE INDEX idx_facts_content_ginON facts USING gin(to_tsvector('english', content));
-- Partial index for active conversationsCREATE INDEX idx_active_conversationsON conversations(updated_at)WHERE updated_at > NOW() - INTERVAL '24 hours';