use rand::{seq::IteratorRandom, SeedableRng};
use rand_pcg::Pcg64Mcg;
use std::{collections::BTreeMap, hash::Hash};
trait Game: Clone {
type Action;
type Actions: IntoIterator<Item = Self::Action>;
type Player: Copy;
type InfoSet: Clone + Hash + Eq;
fn player(&self) -> Self::Player;
fn info_set(&self, p: Self::Player) -> Self::InfoSet;
fn actions(&self) -> Self::Actions;
fn play(&mut self, action: Self::Action);
fn is_terminal(&self) -> bool;
fn value(&self, p: Self::Player) -> f32;
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
struct MatrixAction(usize);
#[derive(Debug, Clone)]
struct MatrixGame<const N: usize> {
history: [Option<MatrixAction>; 2],
payouts: [[f32; N]; N],
}
const RPS_PAYOUTS: [[f32; 3]; 3] = [[0., -1., 1.], [1., 0., -1.], [-1., 1., 0.]];
impl<const N: usize> MatrixGame<N> {
fn new(payouts: [[f32; N]; N]) -> MatrixGame<N> {
MatrixGame {
history: Default::default(),
payouts,
}
}
}
impl<const N: usize> Game for MatrixGame<N> {
type Action = MatrixAction;
type Actions = [Self::Action; N];
type Player = usize;
type InfoSet = [Option<MatrixAction>; 2];
fn actions(&self) -> Self::Actions {
let mut a = [MatrixAction(0); N];
for i in 0..N {
a[i] = MatrixAction(i);
}
a
}
fn info_set(&self, p: Self::Player) -> Self::InfoSet {
let mut infoset = self.history.clone();
if !self.is_terminal() {
infoset[1 - p] = None;
}
infoset
}
fn player(&self) -> Self::Player {
match self.history[0] {
None => 0,
Some(_) => 1,
}
}
fn play(&mut self, action: Self::Action) {
self.history[self.player()] = Some(action);
}
fn is_terminal(&self) -> bool {
self.history[1].is_some()
}
fn value(&self, p: Self::Player) -> f32 {
let val_p0 = self.payouts[self.history[0].unwrap().0][self.history[1].unwrap().0];
if p == 0 {
val_p0
} else {
-val_p0
}
}
}
struct RandomPlayer {
rng: Pcg64Mcg,
}
impl RandomPlayer {
fn new(seed: u64) -> RandomPlayer {
RandomPlayer {
rng: Pcg64Mcg::seed_from_u64(seed),
}
}
fn act<G: Game>(&mut self, game: &G) -> <G as Game>::Action {
game.actions().into_iter().choose(&mut self.rng).unwrap()
}
}
struct TabularLcfr<G: Game> {
regret_sum: BTreeMap<(G::Player, G::InfoSet, G::Action), f32>,
avg_strat: BTreeMap<(G::Player, G::InfoSet, G::Action), f32>,
}
impl<G: Game> TabularLcfr<G> {
fn single_iter(&mut self, game: &mut G) {}
fn walk(&mut self, game: &mut G) {}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn it_works() {
let mut rps = MatrixGame::new(RPS_PAYOUTS);
assert!(!rps.is_terminal());
assert_eq!(rps.player(), 0);
rps.play(MatrixAction(0));
assert_eq!(rps.info_set(0), [Some(MatrixAction(0)), None]);
assert_eq!(rps.info_set(1), [None, None]);
assert!(!rps.is_terminal());
assert_eq!(rps.player(), 1);
rps.play(MatrixAction(1));
assert_eq!(
rps.info_set(0),
[Some(MatrixAction(0)), Some(MatrixAction(1))]
);
assert_eq!(
rps.info_set(1),
[Some(MatrixAction(0)), Some(MatrixAction(1))]
);
assert!(rps.is_terminal());
assert_eq!(rps.value(0), -1.0);
assert_eq!(rps.value(1), 1.0);
let mut rps = MatrixGame::new(RPS_PAYOUTS);
let mut rand_play = RandomPlayer::new(4);
rps.play(rand_play.act(&rps));
rps.play(rand_play.act(&rps));
assert!(-1. <= rps.value(0));
assert!(rps.value(0) <= 1.0);
assert!(-1. <= rps.value(1));
assert!(rps.value(1) <= 1.0);
}
}