id: 5bfcdd98478f4d52b9a9bbd4d8c2a2bc
parent_id: 9d464eec1d0c4f6791903c81c207c635
item_type: 1
item_id: 47031623e2c6451fb36730ffe93380ea
item_updated_time: 1781510535694
title_diff: "[]"
body_diff: "[{\"diffs\":[[0,\"ition, 1\"],[-1,\"5\"],[1,\"4\"],[0,\" registe\"]],\"start1\":200,\"start2\":200,\"length1\":17,\"length2\":17},{\"diffs\":[[0,\"n 2 \"],[-1,\"strategies — rollout-based EHS with nut-aware thresholds\\\n- ✅ V3 production baseline — F1 (trap protection) + F4 (OTB draw call) + F7 (raise safety)\\\n- ✅ V4 MC multi-way — V3 + Monte Carlo multi-way equity engine (HU exhaustive, multi-way MC)\"],[1,\"V4 production baseline — nut-aware (F1+F4+F7) + MC multi-way equity\"],[0,\"\\\n- ✅\"]],\"start1\":299,\"start2\":299,\"length1\":248,\"length2\":75},{\"diffs\":[[0,\"elds\"],[-1,\" (incl. mc_samples/mc_seed)\"],[0,\" wit\"]],\"start1\":425,\"start2\":425,\"length1\":35,\"length2\":8},{\"diffs\":[[0,\"ted \"],[-1,\"6 \"],[0,\"fixes,\"],[1,\" V4\"],[0,\" pro\"]],\"start1\":489,\"start2\":489,\"length1\":16,\"length2\":17},{\"diffs\":[[0,\"romoted \"],[-1,\"3 \"],[0,\"to produ\"]],\"start1\":504,\"start2\":504,\"length1\":18,\"length2\":16},{\"diffs\":[[0,\"ion\\\n\"],[-1,\"- ✅ Parameter sweep framework — 19 params × 2 directions × 2 scenarios = 76 sims completed\\\n\"],[0,\"- ✅ \"]],\"start1\":522,\"start2\":522,\"length1\":99,\"length2\":8},{\"diffs\":[[0,\"pped V2 \"],[1,\"and V3 \"],[0,\"(superse\"]],\"start1\":600,\"start2\":600,\"length1\":16,\"length2\":23},{\"diffs\":[[0,\"🔄\\\n\\\n\"],[-1,\"**Current focus: Gen 3 range-aware postflop strategy tuning**\\\n\\\n\"],[0,\"### \"]],\"start1\":670,\"start2\":670,\"length1\":71,\"length2\":8},{\"diffs\":[[0,\"nge`\"],[-1,\" (strong made hands ∪ strong draws)\"],[0,\"\\\n- ✅\"]],\"start1\":809,\"start2\":809,\"length1\":43,\"length2\":8},{\"diffs\":[[0,\"gine\"],[-1,\" — `compute_vs_range_multiway_shared`\"],[0,\"\\\n- ✅\"]],\"start1\":849,\"start2\":849,\"length1\":45,\"length2\":8},{\"diffs\":[[0,\"ters\"],[-1,\" (6 new weighted-decision weights + sweep_mode flag)\\\n- ✅ Deterministic RNG (`decision_rng`) with `sweep_mode` toggle for live vs sweep\\\n- ✅ Performance optimizations — sparse MC sampling, `select_nth_unstable_by`, fixed-array `is_oesd`\"],[0,\"\\\n- ✅ 34\"],[-1,\"0\"],[1,\"5\"],[0,\" tes\"]],\"start1\":963,\"start2\":963,\"length1\":250,\"length2\":16},{\"diffs\":[[0,\"eep \"],[-1,\"(16 params × 3 values × 4 seeds = 192 runs, 10K hands each)\\\n  - Sweeping: weighted-decision weights + previously-dead params + top performers from round 1\\\n  - Early results: `call_w_pot_odds` shows most sensitivity (0.75 value swings Gen3 positive)\\\n  - Most new weighted-decision parameters show small but non-zero sensitivity (working as intended)\\\n  - Key observation: Gen3 currently\"],[1,\"analysis\\\n- 🔄 Gen3\"],[0,\" und\"]],\"start1\":1030,\"start2\":1030,\"length1\":392,\"length2\":26},{\"diffs\":[[0,\"ms Gen2 \"],[1,\"V4 \"],[0,\"in 5v5 1\"]],\"start1\":1064,\"start2\":1064,\"length1\":16,\"length2\":19},{\"diffs\":[[0,\"rmat\"],[-1,\" (negative delta)\"],[0,\"\\\n\\\n**\"]],\"start1\":1092,\"start2\":1092,\"length1\":25,\"length2\":8},{\"diffs\":[[0,\":**\\\n\"],[-1,\"- [ ] Analyze round 2 results, identify optimal parameter values\\\n- [ ] Address Gen3 underperformance vs Gen2 in large-field format\\\n\"],[0,\"- [ \"]],\"start1\":1107,\"start2\":1107,\"length1\":139,\"length2\":8},{\"diffs\":[[0,\"n)\\\n\\\n\"],[-1,\"### Gen 2 Optimization ✅\\\n- ✅ Gen 2 V4 uses MC multi-way (replacing exhaustive enumeration) for 10-50× speedup in multi-way pots\\\n- [ ] A/B test V4 vs V3 to validate MC doesn't degrade play quality\\\n- [ ] Parameter sweep `mc_samples` for accuracy/speed sweet spot\\\n- [ ] SNG-specific parameter tuning\\\n\\\n---\\\n\\\n## Phase 3: Reinforcement Learning (Future)\\\n\\\n**Goal:** Self-play optimization\\\n\\\n- Training environment using `holdem_core` simulation\\\n- State representation (hand strength, pot odds, position, opponent actions)\\\n- Action space (fold, call, raise sizes)\\\n- Reward function (profit-based with variance reduction)\\\n- Policy network training loop\\\n\\\n---\\\n\\\n## Phase 4: Live Server Infrastructure (Future)\\\n\\\n**Goal:** Real-time bot deployment\\\n\\\n- Network protocol design (TBD: WebSocket, JSON, MQ, etc.)\\\n- Client registration and management\\\n- Game state synchronization\\\n- Action dispatch system\\\n- Event logging for analysis\\\n- Client adapter system (screen/protocol scraping)\\\n- Table selection logic\\\n- Configuration-driven game selection\\\n\\\n---\\\n\\\n## Phase 5: Production Polish (Future)\\\n\\\n**Goal:** Production-ready system\\\n\\\n- Real-time strategy adaptation\\\n- Multi-table coordination\\\n- Bankroll management\\\n- Risk adjustment based on results\\\n- Performance profiling and optimization\\\n- Error handling and recovery\\\n- Documentation and deployment scripts\\\n\\\n\"],[0,\"---\\\n\"]],\"start1\":1321,\"start2\":1321,\"length1\":1341,\"length2\":8},{\"diffs\":[[0,\"gies | 1\"],[-1,\"5\"],[1,\"4\"],[0,\" |\\\n| Tot\"]],\"start1\":1416,\"start2\":1416,\"length1\":17,\"length2\":17},{\"diffs\":[[0,\"sts | 34\"],[-1,\"0\"],[1,\"5\"],[0,\" |\\\n| Gen\"]],\"start1\":1438,\"start2\":1438,\"length1\":17,\"length2\":17},{\"diffs\":[[0,\"|\\\n| \"],[-1,\"Benchmark: Gen3 vs Gen2 vs Gen1 (6-max) | +2.53 / +1.02 / -3.55 BB/hand\"],[1,\"V4 vs V3 (10K hands, 10 seeds) | +0.34 BB/hand (6/10 wins)\"],[0,\" |\"]],\"start1\":1541,\"start2\":1541,\"length1\":77,\"length2\":64}]"
metadata_diff: {"new":{},"deleted":[]}
encryption_cipher_text: 
encryption_applied: 0
updated_time: 2026-06-15T08:06:54.965Z
created_time: 2026-06-15T08:06:54.965Z
type_: 13