import numpy as np class PPOBuffer(object): def __init__(self): self.states = [] self.actorProbs = [] self.actions = [] self.rewards = [] self.dones = [] print("√√√√√Buffer Initialized Success√√√√√") def clearBuffer(self): self.states = [] self.actorProbs = [] self.actions = [] self.rewards = [] self.dones = [] def getStates(self): return self.standDims(np.asarray(self.states)) def getActorProbs(self): return self.standDims(np.asarray(self.actorProbs)) def getActions(self): return self.standDims(np.asarray(self.actions)) def getRewards(self): return self.standDims(np.asarray(self.rewards)) def getDones(self): return self.standDims(np.asarray(self.dones)) def saveState(self, state): self.states.append(state) def saveAction(self, action): self.actions.append(action) def saveReward(self, reward): self.rewards.append(reward) def standDims(self, data): # standarlize data's dimension if np.ndim(data) > 2: return np.squeeze(data, axis=1) elif np.ndim(data) < 2: return np.expand_dims(data, axis=1) else: return np.asarray(data) def saveBuffers(self, state, actorProb, action, reward, done): self.states.append(state) self.actorProbs.append(actorProb) self.actions.append(action) self.rewards.append(reward) self.dones.append(done) """ print("self.states", self.states) print("self.actions", self.actions) print("self.rewards", self.rewards) print("self.dones", self.dones) print("self.values", self.values) """