[{"data":1,"prerenderedAt":129},["ShallowReactive",2],{"nav-stories":3,"footer-stories":61,"project-legacy-monitor":74},[4,16,25,34,43,52],{"id":5,"color":6,"extension":7,"image":8,"label":9,"link":10,"meta":11,"order":12,"stem":13,"text":14,"__hash__":15},"stories\u002Fstories\u002F01-data-center.yml",null,"yml","https:\u002F\u002Fimages.unsplash.com\u002Fphoto-1558494949-ef010cbdcc31?w=1080","DATA_CENTER","https:\u002F\u002Fx.com\u002Fabbeytetteh_",{},1,"stories\u002F01-data-center","Racking new servers. 40gbit backbone online.","0QUZQbaANhdO8WemZxkDdO7vbVopfnynHtH9FxBZb_w",{"id":17,"color":6,"extension":7,"image":18,"label":19,"link":6,"meta":20,"order":21,"stem":22,"text":23,"__hash__":24},"stories\u002Fstories\u002F02-thoughts.yml","https:\u002F\u002Fimages.unsplash.com\u002Fphoto-1498050108023-c5249f4df085?w=1080","THOUGHTS",{},2,"stories\u002F02-thoughts","Late night bug hunting. Found the memory leak.","Gd1am954aasY6HRHD7hCtOuessXb6zYZ8iizS501ICg",{"id":26,"color":27,"extension":7,"image":6,"label":28,"link":6,"meta":29,"order":30,"stem":31,"text":32,"__hash__":33},"stories\u002Fstories\u002F03-coding.yml","#3b82f6","CODING",{},3,"stories\u002F03-coding","Just thinking about how much easier life is with Swarm.","vLAyiGUPtlXB2SHa5KM_U2AaK4QkG3Og85UEUE7qzgM",{"id":35,"color":6,"extension":7,"image":36,"label":37,"link":6,"meta":38,"order":39,"stem":40,"text":41,"__hash__":42},"stories\u002Fstories\u002F04-update.yml","https:\u002F\u002Fimages.unsplash.com\u002Fphoto-1591799264318-7e6ef8ddb7ea?w=1080","UPDATE",{},4,"stories\u002F04-update","New cluster nodes arrived. Prepping for installation.","kyT60N5C6Re_jMonZbgNy0PbQhzXmUWxDbD0D_v43ts",{"id":44,"color":45,"extension":7,"image":6,"label":46,"link":6,"meta":47,"order":48,"stem":49,"text":50,"__hash__":51},"stories\u002Fstories\u002F05-setup.yml","#86868b","SETUP",{},5,"stories\u002F05-setup","Optimizing the telemetry pipeline for 1M req\u002Fs.","cPOBkzoyXsCmPgRO2d80Hj3vm4MP-6nAejtlQ5iuSzw",{"id":53,"color":6,"extension":7,"image":54,"label":55,"link":6,"meta":56,"order":57,"stem":58,"text":59,"__hash__":60},"stories\u002Fstories\u002F06-travel.yml","https:\u002F\u002Fimages.unsplash.com\u002Fphoto-1560969184-10fe8719e047?w=1080","TRAVEL",{},6,"stories\u002F06-travel","Travel log — system architecture workshop in Berlin.","jnOxerdF6usAIHdR35Z-opx0LJAy9kZluXnZhtz62Z0",[62,64,66,68,70,72],{"id":5,"color":6,"extension":7,"image":8,"label":9,"link":10,"meta":63,"order":12,"stem":13,"text":14,"__hash__":15},{},{"id":17,"color":6,"extension":7,"image":18,"label":19,"link":6,"meta":65,"order":21,"stem":22,"text":23,"__hash__":24},{},{"id":26,"color":27,"extension":7,"image":6,"label":28,"link":6,"meta":67,"order":30,"stem":31,"text":32,"__hash__":33},{},{"id":35,"color":6,"extension":7,"image":36,"label":37,"link":6,"meta":69,"order":39,"stem":40,"text":41,"__hash__":42},{},{"id":44,"color":45,"extension":7,"image":6,"label":46,"link":6,"meta":71,"order":48,"stem":49,"text":50,"__hash__":51},{},{"id":53,"color":6,"extension":7,"image":54,"label":55,"link":6,"meta":73,"order":57,"stem":58,"text":59,"__hash__":60},{},{"id":75,"title":76,"body":77,"description":115,"extension":116,"hash":117,"liveUrl":6,"meta":118,"navigation":119,"order":39,"path":120,"rackBay":121,"rackStatus":122,"region":123,"seo":124,"stem":125,"thumbnail":126,"vault":127,"__hash__":128},"projects\u002Fprojects\u002Flegacy-monitor.md","Legacy Monitor",{"type":78,"value":79,"toc":109},"minimark",[80,84,89,92,95,99,102,106],[81,82,83],"p",{},"Legacy Monitor was the first attempt at building an observability layer for the cluster. It was a Python script that ran as a cron job, queried the OpenStack API, and wrote metrics to a flat CSV file.",[85,86,88],"h2",{"id":87},"why-it-was-retired","Why It Was Retired",[81,90,91],{},"The architecture was fundamentally incompatible with scaling. Every metric query was synchronous and blocking. Adding a new hypervisor node increased the cron runtime linearly. At 40 nodes it took longer to collect metrics than the cron interval — meaning metrics were always stale.",[81,93,94],{},"The final straw was a 6-hour outage that the monitor completely failed to surface because it had silently timed out on an unresponsive node while marking it healthy.",[85,96,98],{"id":97},"what-it-taught-us","What It Taught Us",[81,100,101],{},"The failure modes of Legacy Monitor directly shaped Production Engine's design: async metric collection from the start, dead-man's-switch alerts for collector health, and immutable audit logs for every state change.",[85,103,105],{"id":104},"status","Status",[81,107,108],{},"Archived. The codebase is preserved for historical reference but the binary is not running. Replaced by Production Engine in Q2 2025.",{"title":110,"searchDepth":21,"depth":21,"links":111},"",[112,113,114],{"id":87,"depth":21,"text":88},{"id":97,"depth":21,"text":98},{"id":104,"depth":21,"text":105},"The original hand-rolled infrastructure monitoring system, now decommissioned and replaced by Production Engine.","md","J0K1L2",{},true,"\u002Fprojects\u002Flegacy-monitor","BAY 03","offline","AF-SOUTH-1",{"title":76,"description":115},"projects\u002Flegacy-monitor","\u002Fimages\u002Fthumbnails\u002Flegacy-monitor.png",false,"COeQJfFwO5TJJlgs1PCKaOyTgclEx2NhFONFnl95a8s",1779361989071]