if proxy_alive():
print(“\n[10] Mixed 10-prompt workload…”)
workload = [
“Capital of France?”,
“Read foo.py”,
“Type hint for a list of dicts”,
“Lowercase: HELLO”,
“One-sentence summary of REST”,
“Refactor a callback chain into async/await with proper error handling”,
“Design a sharded multi-region key-value store with linearizable reads”,
“Analyze the asymptotic complexity of this code and prove the bound rigorously”,
“Debug why our gRPC stream stalls when the client TCP window saturates”,
“Compare and contrast B-trees and LSM-trees for write-heavy workloads”,
]
runs = []
client = OpenAI(base_url=f”http://localhost:{PORT}/v1″, api_key=”local”)
for p in workload:
t0 = time.time()
try:
r = client.chat.completions.create(
model=”auto”,
messages=[{“role”: “user”, “content”: p}],
max_tokens=140,
)
usage = getattr(r, “usage”, None)
runs.append({
“prompt”: p[:55],
“model”: r.model,
“latency_s”: round(time.time() – t0, 2),
“in_tok”: getattr(usage, “prompt_tokens”, 0) if usage else 0,
“out_tok”: getattr(usage, “completion_tokens”, 0) if usage else 0,
})
except Exception as e:
runs.append({“prompt”: p[:55], “model”: “ERROR”,
“latency_s”: None, “in_tok”: 0, “out_tok”: 0,
“error”: str(e)[:80]})
rdf = pd.DataFrame(runs)
print(rdf.to_string(index=False))
PRICE = {
“flash”: {“in”: 0.30 / 1e6, “out”: 2.50 / 1e6},
“pro”: {“in”: 1.25 / 1e6, “out”: 10.0 / 1e6},
}
def price_for(model_str, in_t, out_t):
m = (model_str or “”).lower()
tier = “flash” if “flash” in m else “pro”
return in_t * PRICE[tier][“in”] + out_t * PRICE[tier][“out”]
cost_routed = sum(price_for(r[“model”], r[“in_tok”], r[“out_tok”]) for r in runs)
cost_no_route = sum(price_for(“gemini-2.5-pro”, r[“in_tok”], r[“out_tok”]) for r in runs)
print(f”\n[10] Cost (NadirClaw routed) : ${cost_routed:.6f}”)
print(f” Cost (always-Pro baseline) : ${cost_no_route:.6f}”)
if cost_no_route > 0:
print(f” Estimated savings on this run : ”
f”{(1 – cost_routed/cost_no_route) * 100:.1f}%”)
print(“\n[11] `nadirclaw report` (parses the JSONL request log):”)
rep = subprocess.run([“nadirclaw”, “report”], capture_output=True, text=True, timeout=60)
print(rep.stdout or rep.stderr)
if proxy_alive():
print(“\n[12] Stopping the proxy…”)
try:
if hasattr(os, “killpg”):
os.killpg(os.getpgid(server_proc.pid), signal.SIGTERM)
else:
server_proc.terminate()
server_proc.wait(timeout=10)
except Exception:
try:
server_proc.kill()
except Exception:
pass
print(” ✓ proxy stopped.”)
print(“\nDone. 🎉”)
Trending
- Galaxy Watch 9 Classic might be coming with raise-to-talk feature
- Apple has new Beats headphones coming and Barca star Lamine Yamal is already rocking them
- ‘X by Xreal’ wants everyone in on AR glasses, launches entry-level pair for gamers, movie-lovers
- 3 ways you can use Circle to Search to identify songs on any Android phone
- Galaxy Z Fold 8 is allegedly a major display crease improvement
- Out of the blue, Acer just dropped two smart glasses that look pretty stylish
- Motorola Razr 2026 vs. Razr 2024
- I actually don’t hate the new Google Health app, but it could still use some work. Here are my highlights after testing the revamped Fitbit app, and how I think Google can improve

