Skip to content

Layer 2: Memory

The Memory Layer (L2) manages all state and context for an AI agent, providing short-term memory (context windows), long-term persistence, and knowledge storage capabilities. It’s built on top of the Runtime Layer and serves as the foundation for agent cognition.

Storage Backends

Layer 2: Memory

Short-Term Memory

Long-Term Memory

Vector Store

Knowledge Graph

Memory Controller

Redis

PostgreSQL

Pinecone

Neo4j

Maintains recent conversation history and working memory.

interface ShortTermMemory {
// Add to context
addMessage(role: string, content: string): void
// Retrieve context
getContext(maxTokens?: number): Message[]
// Manage context window
prune(strategy: PruneStrategy): void
clear(): void
// Context stats
getTokenCount(): number
isFull(): boolean
}
class ContextWindow implements ShortTermMemory {
private messages: Message[] = []
private maxTokens: number = 4096
addMessage(role: string, content: string): void {
this.messages.push({ role, content, timestamp: Date.now() })
// Auto-prune if over limit
if (this.getTokenCount() > this.maxTokens) {
this.prune('fifo')
}
}
getContext(maxTokens?: number): Message[] {
const limit = maxTokens || this.maxTokens
let tokenCount = 0
const context: Message[] = []
// Get most recent messages that fit
for (let i = this.messages.length - 1; i >= 0; i--) {
const msg = this.messages[i]
const msgTokens = this.estimateTokens(msg.content)
if (tokenCount + msgTokens > limit) break
context.unshift(msg)
tokenCount += msgTokens
}
return context
}
prune(strategy: PruneStrategy): void {
switch (strategy) {
case 'fifo':
// Remove oldest messages
while (this.getTokenCount() > this.maxTokens) {
this.messages.shift()
}
break
case 'summarize':
// Summarize old context
const oldContext = this.messages.slice(0, -10)
const summary = this.summarize(oldContext)
this.messages = [summary, ...this.messages.slice(-10)]
break
case 'semantic':
// Keep semantically important messages
this.messages = this.rankByImportance(this.messages)
.slice(0, this.calculateMaxMessages())
break
}
}
}
from typing import List, Dict, Optional
from enum import Enum
class PruneStrategy(Enum):
FIFO = "fifo"
SUMMARIZE = "summarize"
SEMANTIC = "semantic"
class ShortTermMemory:
"""Context window management"""
def __init__(self, max_tokens: int = 4096):
self.messages: List[Dict] = []
self.max_tokens = max_tokens
def add_message(self, role: str, content: str) -> None:
"""Add message to context"""
self.messages.append({
'role': role,
'content': content,
'timestamp': time.time()
})
# Auto-prune if over limit
if self.get_token_count() > self.max_tokens:
self.prune(PruneStrategy.FIFO)
def get_context(self, max_tokens: Optional[int] = None) -> List[Dict]:
"""Get context within token limit"""
limit = max_tokens or self.max_tokens
token_count = 0
context = []
# Get most recent messages that fit
for msg in reversed(self.messages):
msg_tokens = self.estimate_tokens(msg['content'])
if token_count + msg_tokens > limit:
break
context.insert(0, msg)
token_count += msg_tokens
return context
def prune(self, strategy: PruneStrategy) -> None:
"""Prune context using strategy"""
if strategy == PruneStrategy.FIFO:
# Remove oldest messages
while self.get_token_count() > self.max_tokens:
self.messages.pop(0)
elif strategy == PruneStrategy.SUMMARIZE:
# Summarize old context
old_context = self.messages[:-10]
summary = self.summarize(old_context)
self.messages = [summary] + self.messages[-10:]
elif strategy == PruneStrategy.SEMANTIC:
# Keep semantically important messages
ranked = self.rank_by_importance(self.messages)
max_count = self.calculate_max_messages()
self.messages = ranked[:max_count]

Stores agent state, conversation history, and learned knowledge permanently.

{
"memory": {
"persistence": "postgresql",
"config": {
"host": "localhost",
"port": 5432,
"database": "agent_memory",
"retention": {
"conversations": "30d",
"facts": "1y",
"preferences": "permanent"
}
},
"schema": {
"conversations": {
"id": "uuid",
"agent_id": "string",
"user_id": "string",
"messages": "jsonb",
"created_at": "timestamp",
"updated_at": "timestamp"
},
"facts": {
"id": "uuid",
"content": "text",
"confidence": "float",
"source": "string",
"created_at": "timestamp"
}
}
}
}
comments Conversation History
interface ConversationStore {
// Save conversation
saveConversation(
agentId: string,
userId: string,
messages: Message[]
): Promise<string> // Returns conversation ID
// Retrieve conversation
getConversation(id: string): Promise<Conversation>
// Search conversations
searchConversations(query: SearchQuery): Promise<Conversation[]>
// Delete old conversations
pruneConversations(olderThan: Date): Promise<number>
}
lightbulb Learned Facts
interface FactStore {
// Store fact
storeFact(fact: Fact): Promise<string>
// Retrieve facts
getFactsAbout(subject: string): Promise<Fact[]>
// Update confidence
updateConfidence(factId: string, confidence: number): Promise<void>
// Forget outdated facts
forgetFacts(filter: FactFilter): Promise<number>
}
interface Fact {
id: string
content: string
confidence: number // 0.0 to 1.0
source: string
timestamp: Date
metadata: Record<string, any>
}
user-gear User Preferences
interface PreferenceStore {
// Set preference
setPreference(userId: string, key: string, value: any): Promise<void>
// Get preference
getPreference(userId: string, key: string): Promise<any>
// Get all preferences
getAllPreferences(userId: string): Promise<Record<string, any>>
// Delete preference
deletePreference(userId: string, key: string): Promise<void>
}

Stores embeddings for semantic search and retrieval.

interface VectorStore {
// Upsert embeddings
upsert(vectors: Vector[]): Promise<void>
// Semantic search
search(query: number[], topK: number): Promise<SearchResult[]>
// Delete vectors
delete(ids: string[]): Promise<void>
// Get stats
getStats(): Promise<VectorStats>
}
interface Vector {
id: string
values: number[] // Embedding vector
metadata: {
text: string
timestamp: Date
source: string
}
}
// Example: Using Pinecone
class PineconeVectorStore implements VectorStore {
async upsert(vectors: Vector[]): Promise<void> {
await this.index.upsert({
vectors: vectors.map(v => ({
id: v.id,
values: v.values,
metadata: v.metadata
}))
})
}
async search(query: number[], topK: number): Promise<SearchResult[]> {
const results = await this.index.query({
vector: query,
topK,
includeMetadata: true
})
return results.matches.map(match => ({
id: match.id,
score: match.score,
text: match.metadata.text,
metadata: match.metadata
}))
}
}

Structured knowledge representation with entities and relationships.

PREFERS

WORKS_ON

USES

HAS_LAYER

DEPENDS_ON

User: Alice

Language: Python

Project: ARAL

Layer: Runtime

Platform: Node.js

interface KnowledgeGraph {
// Add entities
addEntity(entity: Entity): Promise<string>
// Add relationships
addRelationship(from: string, to: string, type: string): Promise<void>
// Query graph
query(cypher: string): Promise<QueryResult[]>
// Get neighbors
getNeighbors(entityId: string, depth: number): Promise<Entity[]>
}
// Example: Neo4j query
const findUserPreferences = `
MATCH (u:User {id: $userId})-[:PREFERS]->(p)
RETURN p.name, p.value
`
{
"layers": {
"memory": {
"shortTerm": {
"type": "context-window",
"maxTokens": 4096,
"pruneStrategy": "summarize"
},
"longTerm": {
"type": "postgresql",
"connection": {
"host": "localhost",
"port": 5432,
"database": "agent_memory"
},
"retention": {
"conversations": "30d",
"facts": "1y"
}
},
"vectorStore": {
"type": "pinecone",
"dimension": 1536,
"metric": "cosine",
"index": "agent-memory"
},
"knowledgeGraph": {
"type": "neo4j",
"connection": {
"uri": "bolt://localhost:7687"
}
}
}
}
}
window Context Management

DO:

  • Keep context within token limits
  • Summarize old context intelligently
  • Prioritize recent and important messages
  • Monitor token usage

DON’T:

  • Let context grow unbounded
  • Lose important context
  • Include irrelevant history
database Data Persistence

DO:

  • Use appropriate storage for each data type
  • Implement backup strategies
  • Encrypt sensitive data
  • Set retention policies

DON’T:

  • Store everything in memory
  • Keep data indefinitely
  • Store plaintext secrets
magnifying-glass Vector Search

DO:

  • Use appropriate embedding models
  • Batch upsert operations
  • Set reasonable similarity thresholds
  • Cache frequently accessed vectors

DON’T:

  • Use low-quality embeddings
  • Perform individual upserts
  • Return too many results
class CachedMemoryLayer {
private cache = new LRUCache({ max: 1000 })
async getContext(userId: string): Promise<Context> {
// Check cache first
const cached = this.cache.get(userId)
if (cached) return cached
// Fetch from database
const context = await this.db.getContext(userId)
// Cache result
this.cache.set(userId, context)
return context
}
}
class BatchedVectorStore {
private batch: Vector[] = []
private batchSize = 100
async upsert(vector: Vector): Promise<void> {
this.batch.push(vector)
if (this.batch.length >= this.batchSize) {
await this.flush()
}
}
async flush(): Promise<void> {
if (this.batch.length === 0) return
await this.vectorStore.upsert(this.batch)
this.batch = []
}
}
-- Index for fast conversation lookup
CREATE INDEX idx_conversations_user_agent
ON conversations(user_id, agent_id, created_at DESC);
-- Index for fact search
CREATE INDEX idx_facts_content_gin
ON facts USING gin(to_tsvector('english', content));
-- Partial index for active conversations
CREATE INDEX idx_active_conversations
ON conversations(updated_at)
WHERE updated_at > NOW() - INTERVAL '24 hours';