Aimbot-PPO/Aimbot-PPO-Python/Tensorflow/PPOBuffer.py
Koha9 742529ccd7 Archive all tensorflow agents and env
archive all TF py&ipynb
turn face to pytorch.
2022-10-26 03:15:37 +09:00

66 lines
1.8 KiB
Python

import numpy as np
class PPOBuffer(object):
def __init__(self):
self.states = []
self.actorProbs = []
self.actions = []
self.rewards = []
self.dones = []
print("√√√√√Buffer Initialized Success√√√√√")
def clearBuffer(self):
self.states = []
self.actorProbs = []
self.actions = []
self.rewards = []
self.dones = []
def getStates(self):
return self.standDims(np.asarray(self.states))
def getActorProbs(self):
return self.standDims(np.asarray(self.actorProbs))
def getActions(self):
return self.standDims(np.asarray(self.actions))
def getRewards(self):
return self.standDims(np.asarray(self.rewards))
def getDones(self):
return self.standDims(np.asarray(self.dones))
def saveState(self, state):
self.states.append(state)
def saveAction(self, action):
self.actions.append(action)
def saveReward(self, reward):
self.rewards.append(reward)
def standDims(self, data):
# standarlize data's dimension
if np.ndim(data) > 2:
return np.squeeze(data, axis=1)
elif np.ndim(data) < 2:
return np.expand_dims(data, axis=1)
else:
return np.asarray(data)
def saveBuffers(self, state, actorProb, action, reward, done):
self.states.append(state)
self.actorProbs.append(actorProb)
self.actions.append(action)
self.rewards.append(reward)
self.dones.append(done)
"""
print("self.states", self.states)
print("self.actions", self.actions)
print("self.rewards", self.rewards)
print("self.dones", self.dones)
print("self.values", self.values)
"""