import random
class Game:
    def __init__(self):
        self.winner = 0
        self.e1 = 0 # energy of player 1, etc.
        self.e2 = 0
        self.over = False # the game is still on
        self.requires = [0, 0, 0, 1, 1, 0, 1, 3, 2]
        self.text = [0, "zan", "shield", "shoot", "reflect", "buddha", "steal", "chick", "big-shield"]
        self.record = []
    def act(self, action1, action2):
        # 1：攒豆；2：盾；3：枪；4：鄙视；5：拜佛；6：爱心；7：鸡！8：大盾
        # determine if the game is still on
        if self.determine(action1, action2):
            # player 1 wins
            self.over = True
            self.winner = 1
        elif self.determine(action2, action1):
            self.over = True
            self.winner = 2
        if action1 == 1:
            self.e1 += 1
        elif action1 in [3, 4, 6]:
            self.e1 -= 1
        elif action1 == 7:
            self.e1 -= 3
        elif action1 == 8:
            self.e1 -= 2
        if action2 == 1:
            self.e2 += 1
        elif action2 in [3, 4, 6]:
            self.e2 -= 1
        elif action2 == 7:
            self.e2 -= 3
        elif action2 == 8:
            self.e2 -= 2
        # 特判拜佛、爱心
        # 如果有一方拜佛而且没有死，那么对方豆肯定被清空。对拜同理。
        if action1 == 5:
            self.e2 = 0
        if action2 == 5:
            self.e1 = 0
        # 如果双方同时出爱心，则对撞，不产生效果
        if action1==6 and action2==6:
            return
        if action1==6 and action2!=5:
            self.e1+=self.e2
            self.e2 = 0
        if action2==6 and action1!=5:
            self.e2+=self.e1
            self.e1 = 0
        self.record.append((self.text[action1], self.text[action2]))
    def determine(self, act1, act2):
        # 不分顺序，看看1是否能将2打死
        if act1 == 2 and act2 == 5: return True # 拜佛拜死
        if act1 == 3 and (act2 in [1, 5, 6, 8]): return True # 开枪打死
        if act1 == 4 and act2 == 3: return True # 反弹
        if act1 == 5 and act2 == 4: return True # 鄙视佛祖
        if act1 == 7 and act2 <= 6: return True # 鸡无敌
        if act1 == 8 and act2 == 5: return True # 大盾秀
        return False
    def pretty_record(self):
        return list((self.text[a], self.text[b]) for (a,b) in self.record)

game = Game()
buddha, shield = game.text.index('buddha'), game.text.index('shield')
game.act(buddha, shield)
print(game.over, game.winner)

True 2

class GameAI(Game):
    def __init__(self):
        super().__init__()
        # 此时机器作为player2
    def act(self, action1):
        actions_available = [i for i in range(1, 9) if self.requires[i]<=self.e2]
        act = random.choice(actions_available)   
        super().act(action1, act)
        return act

game = GameAI()
game.text[game.act(game.text.index('zan'))]

'shield'

class DataGen(Game):
    def __init__(self):
        super().__init__()
        self.text_record = []
        
    def act(self):
        actions_available_1 = [i for i in range(1, 9) if self.requires[i]<=self.e1]
        actions_available_2 = [i for i in range(1, 9) if self.requires[i]<=self.e2]
        act_combination = random.choice(actions_available_1), random.choice(actions_available_2)
        super().act(*act_combination)
        text = self.text[act_combination[0]], self.text[act_combination[1]]
        self.text_record.append(text)
        return text

game = DataGen()
while not game.over:
    game.act()
print(game.text_record)

[('shield', 'buddha')]

def get_one_rec():
    game = DataGen()
    while not game.over:
        game.act()
    return game.text_record

data = []
for i in range(100000):
    data.append(get_one_rec())

data[:5]

[[('zan', 'buddha'),
  ('zan', 'buddha'),
  ('buddha', 'zan'),
  ('shield', 'shield'),
  ('buddha', 'buddha'),
  ('buddha', 'zan'),
  ('buddha', 'shield')],
 [('buddha', 'buddha'), ('shield', 'shield'), ('buddha', 'shield')],
 [('shield', 'buddha')],
 [('shield', 'buddha')],
 [('buddha', 'buddha'), ('buddha', 'shield')]]

def lst_to_doc(lst):
    stream = ''
    for each in lst:
        stream += ' to '.join(each)+' , '
    stream += 'end . '
    return stream

lst_to_doc([('buddha', 'zan'), ('buddha', 'zan'), ('zan', 'zan'), ('buddha', 'shoot')])

'buddha to zan , buddha to zan , zan to zan , buddha to shoot , end . '

documents = []
for each in data:
    documents.append(lst_to_doc(each))

documents[:3]

['zan to buddha , zan to buddha , buddha to zan , shield to shield , buddha to buddha , buddha to zan , buddha to shield , end . ',
 'buddha to buddha , shield to shield , buddha to shield , end . ',
 'shield to buddha , end . ']

train_document = ''.join(documents[:80000])
valid_document = ''.join(documents[80000:])

train_document[900: 1000]

'eld to shield , zan to buddha , buddha to buddha , zan to buddha , shield to shield , buddha to shie'

from fastbook import *
from fastai.text.all import *

tokens = train_document.split(' ')
tokens[:10]

['zan', 'to', 'buddha', ',', 'zan', 'to', 'buddha', ',', 'buddha', 'to']

vocab = L(*tokens).unique()
vocab

(#13) ['zan','to','buddha',',','shield','end','.','shoot','steal','reflect','chick','big-shield','']

word2idx = {w:i for i,w in enumerate(vocab)}
nums = L(word2idx[i] for i in tokens)
nums

(#1435321) [0,1,2,3,0,1,2,3,2,1,0,3,4,1,4,3,2,1,2,3...]

tokens_v = valid_document.split(' ')
nums_v = L(word2idx[i] for i in tokens_v)

seqs = L((tensor(nums[i:i+6]), nums[i+6]) for i in range(0,len(nums)-7,6))
seqs

(#239219) [(tensor([0, 1, 2, 3, 0, 1]), 2),(tensor([2, 3, 2, 1, 0, 3]), 4),(tensor([4, 1, 4, 3, 2, 1]), 2),(tensor([2, 3, 2, 1, 0, 3]), 2),(tensor([2, 1, 4, 3, 5, 6]), 2),(tensor([2, 1, 2, 3, 4, 1]), 4),(tensor([4, 3, 2, 1, 4, 3]), 5),(tensor([5, 6, 4, 1, 2, 3]), 5),(tensor([5, 6, 4, 1, 2, 3]), 5),(tensor([5, 6, 2, 1, 2, 3]), 2),(tensor([2, 1, 4, 3, 5, 6]), 0),(tensor([0, 1, 0, 3, 2, 1]), 7),(tensor([7, 3, 5, 6, 2, 1]), 2),(tensor([2, 3, 0, 1, 0, 3]), 8),(tensor([8, 1, 8, 3, 2, 1]), 4),(tensor([4, 3, 5, 6, 2, 1]), 0),(tensor([0, 3, 2, 1, 2, 3]), 0),(tensor([0, 1, 2, 3, 0, 1]), 4),(tensor([4, 3, 0, 1, 0, 3]), 2),(tensor([2, 1, 7, 3, 5, 6]), 0)...]

seqs_v = L((tensor(nums_v[i:i+6]), nums_v[i+6]) for i in range(0,len(nums_v)-7,6))

bs = 128
dls = DataLoaders.from_dsets(seqs, seqs_v, bs=128, shuffle=False)

class LMModel1(Module):
    def __init__(self, vocab_sz, n_hidden):
        self.i_h = nn.Embedding(vocab_sz, n_hidden)  
        self.h_h = nn.Linear(n_hidden, n_hidden)     
        self.h_o = nn.Linear(n_hidden,vocab_sz)
        
    def forward(self, x):
        h = F.relu(self.h_h(self.i_h(x[:,0])))
        h = h + self.i_h(x[:,1])
        h = F.relu(self.h_h(h))
        h = h + self.i_h(x[:,2])
        h = F.relu(self.h_h(h))
        return self.h_o(h)

learn = Learner(dls, LMModel1(len(vocab), 64), loss_func=F.cross_entropy, 
                metrics=accuracy)
learn.fit_one_cycle(4, 1e-3)

n,counts = 0,torch.zeros(len(vocab))
for x,y in dls.valid:
    n += y.shape[0]
    for i in range_of(vocab): counts[i] += (y==i).long().sum()
idx = torch.argmax(counts)
idx, vocab[idx.item()], counts[idx].item()/n

(tensor(2), 'buddha', 0.2601081081081081)

class LMModel2(Module):
    def __init__(self, vocab_sz, n_hidden):
        self.i_h = nn.Embedding(vocab_sz, n_hidden)  
        self.h_h = nn.Linear(n_hidden, n_hidden)     
        self.h_o = nn.Linear(n_hidden,vocab_sz)
        
    def forward(self, x):
        h = 0
        for i in range(6):
            h = h + self.i_h(x[:,i])
            h = F.relu(self.h_h(h))
        return self.h_o(h)
learn = Learner(dls, LMModel2(len(vocab), 64), loss_func=F.cross_entropy, 
                metrics=accuracy)
learn.fit_one_cycle(4, 1e-3)

class LMModel3(Module):
    def __init__(self, vocab_sz, n_hidden):
        self.i_h = nn.Embedding(vocab_sz, n_hidden)  
        self.h_h = nn.Linear(n_hidden, n_hidden)     
        self.h_o = nn.Linear(n_hidden,vocab_sz)
        self.h = 0
        
    def forward(self, x):
        for i in range(6):
            self.h = self.h + self.i_h(x[:,i])
            self.h = F.relu(self.h_h(self.h))
        out = self.h_o(self.h)
        self.h = self.h.detach()
        return out
    
    def reset(self): self.h = 0

dls = DataLoaders.from_dsets(
    seqs, seqs_v, 
    bs=128, drop_last=True, shuffle=False)

learn = Learner(dls, LMModel3(len(vocab), 64), loss_func=F.cross_entropy,
                metrics=accuracy, cbs=ModelResetter)
learn.fit_one_cycle(4, 0.01)

def group_chunks(ds, bs):
    m = len(ds) // bs
    new_ds = L()
    for i in range(m): new_ds += L(ds[i + m*j] for j in range(bs))
    return new_ds

group_chunks(list(range(1, 1001)), 100)

(#1000) [1,11,21,31,41,51,61,71,81,91,101,111,121,131,141,151,161,171,181,191...]

sl = 6
seqs_mult = L((tensor(nums[i:i+sl]), tensor(nums[i+1:i+sl+1]))
         for i in range(0,len(nums)-sl-1,sl))
seqs_mult_v = L((tensor(nums_v[i:i+sl]), tensor(nums_v[i+1:i+sl+1]))
         for i in range(0,len(nums_v)-sl-1,sl))
dls = DataLoaders.from_dsets(group_chunks(seqs_mult, bs),
                             group_chunks(seqs_mult_v, bs),
                             bs=bs, shuffle=False)

class LMModel4(Module):
    def __init__(self, vocab_sz, n_hidden):
        self.i_h = nn.Embedding(vocab_sz, n_hidden)  
        self.h_h = nn.Linear(n_hidden, n_hidden)     
        self.h_o = nn.Linear(n_hidden,vocab_sz)
        self.h = 0
        
    def forward(self, x):
        outs = []
        for i in range(sl):
            self.h = self.h + self.i_h(x[:,i])
            self.h = F.relu(self.h_h(self.h))
            outs.append(self.h_o(self.h))
        self.h = self.h.detach()
        return torch.stack(outs, dim=1)
    
    def reset(self): self.h = 0

def loss_func(inp, targ):
    return F.cross_entropy(inp.view(-1, len(vocab)), targ.view(-1))

learn = Learner(dls, LMModel4(len(vocab), 64), loss_func=loss_func,
                metrics=accuracy, cbs=ModelResetter)
learn.fit_one_cycle(5, 3e-3)

class LMModel5(Module):
    def __init__(self, vocab_sz, n_hidden, n_layers):
        self.i_h = nn.Embedding(vocab_sz, n_hidden)
        self.rnn = nn.RNN(n_hidden, n_hidden, n_layers, batch_first=True)
        self.h_o = nn.Linear(n_hidden, vocab_sz)
        self.h = torch.zeros(n_layers, bs, n_hidden)
        
    def forward(self, x):
        res,h = self.rnn(self.i_h(x), self.h)
        self.h = h.detach()
        return self.h_o(res)
    
    def reset(self): self.h.zero_()
learn = Learner(dls, LMModel5(len(vocab), 64, 3), 
                loss_func=CrossEntropyLossFlat(), 
                metrics=accuracy, cbs=ModelResetter)
learn.fit_one_cycle(5, 3e-3)

class LSTMCell1(Module):
    def __init__(self, ni, nh):
        self.forget_gate = nn.Linear(ni + nh, nh)
        self.input_gate  = nn.Linear(ni + nh, nh)
        self.cell_gate   = nn.Linear(ni + nh, nh)
        self.output_gate = nn.Linear(ni + nh, nh)

    def forward(self, input, state):
        h,c = state
        h = torch.cat([h, input], dim=1)
        forget = torch.sigmoid(self.forget_gate(h))
        c = c * forget
        inp = torch.sigmoid(self.input_gate(h))
        cell = torch.tanh(self.cell_gate(h))
        c = c + inp * cell
        out = torch.sigmoid(self.output_gate(h))
        h = out * torch.tanh(c)
        return h, (h,c)

class LSTMCell(Module):
    def __init__(self, ni, nh):
        self.ih = nn.Linear(ni,4*nh)
        self.hh = nn.Linear(nh,4*nh)

    def forward(self, input, state):
        h,c = state
        # One big multiplication for all the gates is better than 4 smaller ones
        gates = (self.ih(input) + self.hh(h)).chunk(4, 1)
        ingate,forgetgate,outgate = map(torch.sigmoid, gates[:3])
        cellgate = gates[3].tanh()

        c = (forgetgate*c) + (ingate*cellgate)
        h = outgate * c.tanh()
        return h, (h,c)
class LMModel6(Module):
    def __init__(self, vocab_sz, n_hidden, n_layers):
        self.i_h = nn.Embedding(vocab_sz, n_hidden)
        self.rnn = nn.LSTM(n_hidden, n_hidden, n_layers, batch_first=True)
        self.h_o = nn.Linear(n_hidden, vocab_sz)
        self.h = [torch.zeros(n_layers, bs, n_hidden) for _ in range(2)]
        
    def forward(self, x):
        res,h = self.rnn(self.i_h(x), self.h)
        self.h = [h_.detach() for h_ in h]
        return self.h_o(res)
    
    def reset(self): 
        for h in self.h: h.zero_()

learn = Learner(dls, LMModel6(len(vocab), 128, 2), 
                loss_func=CrossEntropyLossFlat(), 
                metrics=accuracy, cbs=ModelResetter)
learn.fit_one_cycle(5, 1e-2)

class LMModel7(Module):
    def __init__(self, vocab_sz, n_hidden, n_layers, p):
        self.i_h = nn.Embedding(vocab_sz, n_hidden)
        self.rnn = nn.LSTM(n_hidden, n_hidden, n_layers, batch_first=True)
        self.drop = nn.Dropout(p)
        self.h_o = nn.Linear(n_hidden, vocab_sz)
        self.h_o.weight = self.i_h.weight
        self.h = [torch.zeros(n_layers, bs, n_hidden) for _ in range(2)]
        
    def forward(self, x):
        raw,h = self.rnn(self.i_h(x), self.h)
        out = self.drop(raw)
        self.h = [h_.detach() for h_ in h]
        return self.h_o(out),raw,out
    
    def reset(self): 
        for h in self.h: h.zero_()

learn = TextLearner(dls, LMModel7(len(vocab), 64, 2, 0.4),
                    loss_func=CrossEntropyLossFlat(), metrics=accuracy)

learn.fit_one_cycle(5, 1e-2, wd=0.1)

epoch	train_loss	valid_loss	accuracy	time
0	1.440551	1.438455	0.297314	00:04
1	1.426698	1.425912	0.301389	00:04
2	1.420663	1.420298	0.302786	00:04
3	1.418132	1.419128	0.302586	00:04

epoch	train_loss	valid_loss	accuracy	time
0	1.212522	1.203242	0.371975	00:05
1	1.195914	1.188813	0.369530	00:05
2	1.189458	1.183382	0.368815	00:05
3	1.186409	1.182275	0.372624	00:05

epoch	train_loss	valid_loss	accuracy	time
0	1.216328	1.203639	0.371385	00:06
1	1.202349	1.197370	0.371319	00:06
2	1.191182	1.185475	0.371069	00:06
3	1.187282	1.183284	0.372052	00:06

epoch	train_loss	valid_loss	accuracy	time
0	0.585487	0.591583	0.683835	00:07
1	0.572355	0.574745	0.685032	00:07
2	0.569663	0.570365	0.684243	00:07
3	0.567611	0.569115	0.684288	00:07
4	0.567054	0.568130	0.684638	00:07

epoch	train_loss	valid_loss	accuracy	time
0	0.578014	0.584330	0.684266	00:12
1	0.574130	0.577295	0.685140	00:11
2	0.569712	0.574084	0.685457	00:12
3	0.568163	0.571038	0.685151	00:11
4	0.567217	0.569991	0.684593	00:12

Introduction¶

Preparing the data¶

Defining the rules¶

Generating game data¶

Tokenization and Numericalization¶

Creating the Dataloaders¶

A simple recurrent neural network¶

Using a loop¶

Adding detach¶

Creating more signal¶

Trying a multilayered RNN¶

Using LSTMs¶

Weight-tied Regularized LSTM¶

epoch	train_loss	valid_loss	accuracy	time
0	0.575987	0.578387	0.685143	00:20
1	0.571813	0.574853	0.684710	00:20
2	0.569074	0.570702	0.685215	00:21
3	0.567799	0.569034	0.684810	00:20
4	0.567116	0.568250	0.684554	00:20

epoch	train_loss	valid_loss	accuracy	time
0	0.577216	0.579187	0.684898	00:13
1	0.573831	0.572951	0.684896	00:13
2	0.570968	0.571168	0.685196	00:13
3	0.568685	0.569038	0.684913	00:13
4	0.567818	0.568213	0.684796	00:13