TuanScientist commited on
Commit
fe7b65d
1 Parent(s): f7d4fd5

Update objects_9x9.py

Browse files
Files changed (1) hide show
  1. objects_9x9.py +5 -5
objects_9x9.py CHANGED
@@ -76,13 +76,13 @@ class State:
76
  # backpropagate reward
77
  if result == 1:
78
  self.p1.feedReward(1)
79
- self.p2.feedReward(0)
80
  elif result == -1:
81
- self.p1.feedReward(0)
82
  self.p2.feedReward(1)
83
  else:
84
  self.p1.feedReward(0.1)
85
- self.p2.feedReward(0.5)
86
 
87
  # board reset
88
  def reset(self):
@@ -189,7 +189,7 @@ class Player:
189
  def __init__(self, name, exp_rate=0.3):
190
  self.name = name
191
  self.states = [] # record all positions taken
192
- self.lr = 0.2
193
  self.exp_rate = exp_rate
194
  self.decay_gamma = 0.9
195
  self.states_value = {} # state -> value
@@ -270,7 +270,7 @@ if __name__ == "__main__":
270
 
271
  st = State(p1, p2)
272
  print("training...")
273
- st.playwithbot(200000)
274
 
275
  p1.savePolicy()
276
  p2.savePolicy()
 
76
  # backpropagate reward
77
  if result == 1:
78
  self.p1.feedReward(1)
79
+ self.p2.feedReward(-1)
80
  elif result == -1:
81
+ self.p1.feedReward(-1)
82
  self.p2.feedReward(1)
83
  else:
84
  self.p1.feedReward(0.1)
85
+ self.p2.feedReward(0.1)
86
 
87
  # board reset
88
  def reset(self):
 
189
  def __init__(self, name, exp_rate=0.3):
190
  self.name = name
191
  self.states = [] # record all positions taken
192
+ self.lr = 0.1
193
  self.exp_rate = exp_rate
194
  self.decay_gamma = 0.9
195
  self.states_value = {} # state -> value
 
270
 
271
  st = State(p1, p2)
272
  print("training...")
273
+ st.playwithbot(500000)
274
 
275
  p1.savePolicy()
276
  p2.savePolicy()