How to Build Memory-Driven AI Agents with Short-Term, Long-Term, and Episodic Memory

def openai_chat(system: str, user: str) -> str:
resp = client.chat.completions.create(
model=OPENAI_MODEL,
messages=[
“ep_id”: eid, “score”: sc, “outcome”: e.outcome_score, “task”: e.task[:140], “lessons”: ” ,
“ep_id”: eid, “score”: sc, “outcome”: e.outcome_score, “task”: e.task[:140], “lessons”: ” ,
],
temperature=0.3
)
return resp.choices[0].message.content

def heuristic_responder(context: str, question: str) -> str:
lessons = re.findall(r”Lessons=(.*)”, context)
avoid = re.findall(r”Avoid=(.*)”, context)
ltm_lines = [ln for ln in context.splitlines() if ln.startswith(“[LTM:”)]

steps = []
if lessons:
for chunk in lessons[:2]:
for s in [x.strip() for x in chunk.split(“;”) if x.strip()]:
steps.append(s)
for ln in ltm_lines:
if “[LTM:procedure]” in ln.lower():
proc = re.sub(r”^\[LTM:procedure\]\s*”, “”, ln, flags=re.I)
proc = proc.split(“(salience=”)[0].strip()
for part in [p.strip() for p in proc.split(“|”) if p.strip()]:
steps.append(part)

steps = steps[:8] if steps else [“Clarify the target outcome and constraints.”, “Use semantic recall + episodic lessons to propose a plan.”, “Execute, then store lessons learned.”]

pitfalls = []
if avoid:
for chunk in avoid[:2]:
for s in [x.strip() for x in chunk.split(“;”) if x.strip()]:
pitfalls.append(s)
pitfalls = pitfalls[:6]

prefs = [ln for ln in ltm_lines if “[LTM:preference]” in ln.lower()]
facts = [ln for ln in ltm_lines if “[LTM:fact]” in ln.lower() or “[LTM:constraint]” in ln.lower()]

out = []
out.append(“Answer (memory-informed, offline fallback)\n”)
if prefs:
out.append(“Relevant preferences/constraints remembered:”)
for ln in (prefs + facts)[:6]:
out.append(” – ” + ln.split(“] “,1)[1].split(” (salience=”)[0].strip())
out.append(“”)
out.append(“Recommended approach:”)
for i, s in enumerate(steps, 1):
out.append(f” “ep_id”: eid, “score”: sc, “outcome”: e.outcome_score, “task”: e.task[:140], “lessons”: ” . “ep_id”: eid, “score”: sc, “outcome”: e.outcome_score, “task”: e.task[:140], “lessons”: ” “)
if pitfalls:
out.append(“\nPitfalls to avoid (from episodic traces):”)
for p in pitfalls:
out.append(” – ” + p)
out.append(“\n(If you add an API key, the same memory context will feed a stronger LLM for higher-quality responses.)”)
return “\n”.join(out).strip()

class MemoryAugmentedAgent:
def __init__(self, mem: MemoryEngine):
self.mem = mem

def answer(self, question: str) -> Dict[str, Any]:
pack = self.mem.retrieve(question)
context = self.mem.build_context(question, pack)

system = (
“You are a memory-augmented agent. Use the provided memory context.\n”
“Prioritize:\n”
“1) Episodic lessons (what worked before)\n”
“2) Long-term facts/preferences/procedures\n”
“3) Short-term conversation state\n”
“Be concrete and stepwise. If memory conflicts, state the uncertainty.”
)

if USE_OPENAI:
reply = openai_chat(system=system, user=context + “\n\nUser question:\n” + question)
else:
reply = heuristic_responder(context=context, question=question)

self.mem.st_add(“user”, question, kind=”message”)
self.mem.st_add(“assistant”, reply, kind=”message”)

return {“reply”: reply, “pack”: pack, “context”: context}

mem = MemoryEngine()
agent = MemoryAugmentedAgent(mem)

mem.ltm_add(kind=”preference”, text=”Prefer concise, structured answers with steps and bullet points when helpful.”, tags=[“style”], pinned=True)
mem.ltm_add(kind=”preference”, text=”Prefer solutions that run on Google Colab without extra setup.”, tags=[“environment”], pinned=True)
mem.ltm_add(kind=”procedure”, text=”When building agent memory: embed items, store with salience/novelty policy, retrieve with hybrid semantic+episodic, and decay overuse to avoid repetition.”, tags=[“agent-memory”])
mem.ltm_add(kind=”constraint”, text=”If no API key is available, provide a runnable offline fallback instead of failing.”, tags=[“robustness”], pinned=True)

mem.episode_add(
task=”Build an agent memory layer for troubleshooting Python errors in Colab”,
constraints={“offline_ok”: True, “single_notebook”: True},
plan=[
“Capture short-term chat context”,
“Store durable constraints/preferences in long-term vector memory”,
“After solving, extract lessons into episodic traces”,
“On new tasks, retrieve top episodic lessons + semantic facts”
],
actions=[
{“type”:”analysis”, “detail”:”Identified recurring failure: missing installs and version mismatches.”},
{“type”:”action”, “detail”:”Added pip install block + minimal fallbacks.”},
{“type”:”action”, “detail”:”Added memory policy: pin constraints, drop low-salience items.”}
],
result=”Notebook became robust: runs with or without external keys; troubleshooting quality improved with episodic lessons.”,
outcome_score=0.90,
lessons=[
“Always include a pip install cell for non-standard deps.”,
“Pin hard constraints (e.g., offline fallback) into long-term memory.”,
“Store a post-task ‘lesson list’ as an episodic trace for reuse.”
],
failure_modes=[
“Assuming an API key exists and crashing when absent.”,
“Storing too much noise into long-term memory causing irrelevant recall context.”
],
tags=[“colab”,”robustness”,”memory”]
)

print(“✅ Memory engine initialized.”)
print(f” LTM items: {len(mem.ltm)} | Episodes: {len(mem.episodes)} | ST items: {len(mem.short_term)}”)

q1 = “I want to build memory for an agent in Colab. What should I store and how do I retrieve it?”
out1 = agent.answer(q1)
print(“\n” + “=”*90)
print(“Q1 REPLY\n”)
print(out1[“reply”][:1800])

q2 = “How do I avoid my agent repeating the same memory over and over?”
out2 = agent.answer(q2)
print(“\n” + “=”*90)
print(“Q2 REPLY\n”)
print(out2[“reply”][:1800])

def simple_outcome_eval(text: str) -> float:
hits = 0
for kw in [“decay”, “usage”, “penalty”, “novelty”, “prune”, “retrieve”, “episodic”, “semantic”]:
if kw in text.lower():
hits += 1
return float(np.clip(hits/8.0, 0.0, 1.0))

score2 = simple_outcome_eval(out2[“reply”])
mem.episode_add(
task=”Prevent repetitive recall in a memory-augmented agent”,
constraints={“must_be_simple”: True, “runs_in_colab”: True},
plan=[
“Track usage counts per memory item”,
“Apply usage-based penalty during ranking”,
“Boost novelty during storage to reduce duplicates”,
“Optionally prune low-salience memories”
],
actions=[
{“type”:”design”, “detail”:”Added usage-based penalty 1/(1+alpha*usage).”},
{“type”:”design”, “detail”:”Used novelty = 1 – max_similarity at store time.”}
],
result=out2[“reply”][:600],
outcome_score=score2,
lessons=[
“Penalize overused memories during ranking (usage decay).”,
“Enforce novelty threshold at storage time to prevent duplicates.”,
“Keep episodic lessons distilled to avoid bloated recall context.”
],
failure_modes=[
“No usage tracking, causing one high-similarity memory to dominate forever.”,
“Storing raw chat logs as LTM instead of distilled summaries.”
],
tags=[“ranking”,”decay”,”policy”]
)

cons = mem.consolidate()
print(“\n” + “=”*90)
print(“CONSOLIDATION RESULT:”, cons)

print(“\n” + “=”*90)
print(“LTM (top rows):”)
display(mem.ltm_df().head(12))

print(“\n” + “=”*90)
print(“EPISODES (top rows):”)
display(mem.episodes_df().head(12))

def debug_retrieval(query: str):
pack = mem.retrieve(query)
ctx = mem.build_context(query, pack)
sem = []
for mid, sc in pack[“semantic_scored”]:
it = mem.ltm[mid]
sem.append({“mem_id”: mid, “score”: sc, “kind”: it.kind, “salience”: it.salience, “usage”: it.usage, “text”: it.text[:160]})
ep = []
for eid, sc in pack[“episodic_scored”]:
e = mem.episodes[eid]
ep.append({“ep_id”: eid, “score”: sc, “outcome”: e.outcome_score, “task”: e.task[:140], “lessons”: ” | “.join(e.lessons[:4])})
return ctx, pd.DataFrame(sem), pd.DataFrame(ep)

print(“\n” + “=”*90)
ctx, sem_df, ep_df = debug_retrieval(“How do I design an agent memory policy for storage and retrieval?”)
print(ctx[:1600])
print(“\nTop semantic hits:”)
display(sem_df)
print(“\nTop episodic hits:”)
display(ep_df)

print(“\n✅ Done. You now have working short-term, long-term vector, and episodic memory with storage/retrieval policies in one Colab snippet.”)

What's Hot

Anti-ICE Protesters Have Started a Month-Long Tech and AI Boycott. Here’s How It Works

Gemini might finally fix one of the reasons why I haven’t switched yet

Prime Video: 23 of the Best Sci-Fi TV Shows You Should Stream Now

NVIDIA AI Brings Nemotron-3-Nano-30B to NVFP4 with Quantization Aware Distillation (QAD) for Efficient Reasoning Inference

A Coding and Experimental Analysis of Decentralized Federated Learning with Gossip Protocols and Differential Privacy

Apple’s online store now lets you build a new Mac exactly the way you want

How to Use Physics to Escape an Ice Bowl

Slate wants to build more than just a cheap truck

Finding the Best Gradient Boosting Method

Anti-ICE Protesters Have Started a Month-Long Tech and AI Boycott. Here’s How It Works

Gemini might finally fix one of the reasons why I haven’t switched yet

Prime Video: 23 of the Best Sci-Fi TV Shows You Should Stream Now

Anti-ICE Protesters Have Started a Month-Long Tech and AI Boycott. Here’s How It Works

Gemini might finally fix one of the reasons why I haven’t switched yet

Prime Video: 23 of the Best Sci-Fi TV Shows You Should Stream Now

Usefull link

categories

What's Hot

How to Build Memory-Driven AI Agents with Short-Term, Long-Term, and Episodic Memory

Related Posts

Usefull link

categories