66 lines
1.8 KiB
Python
66 lines
1.8 KiB
Python
import numpy as np
|
|
|
|
|
|
class PPOBuffer(object):
|
|
def __init__(self):
|
|
self.states = []
|
|
self.actorProbs = []
|
|
self.actions = []
|
|
self.rewards = []
|
|
self.dones = []
|
|
print("√√√√√Buffer Initialized Success√√√√√")
|
|
|
|
def clearBuffer(self):
|
|
self.states = []
|
|
self.actorProbs = []
|
|
self.actions = []
|
|
self.rewards = []
|
|
self.dones = []
|
|
|
|
def getStates(self):
|
|
return self.standDims(np.asarray(self.states))
|
|
|
|
def getActorProbs(self):
|
|
return self.standDims(np.asarray(self.actorProbs))
|
|
|
|
def getActions(self):
|
|
return self.standDims(np.asarray(self.actions))
|
|
|
|
def getRewards(self):
|
|
return self.standDims(np.asarray(self.rewards))
|
|
|
|
def getDones(self):
|
|
return self.standDims(np.asarray(self.dones))
|
|
|
|
def saveState(self, state):
|
|
self.states.append(state)
|
|
|
|
def saveAction(self, action):
|
|
self.actions.append(action)
|
|
|
|
def saveReward(self, reward):
|
|
self.rewards.append(reward)
|
|
|
|
def standDims(self, data):
|
|
# standarlize data's dimension
|
|
if np.ndim(data) > 2:
|
|
return np.squeeze(data, axis=1)
|
|
elif np.ndim(data) < 2:
|
|
return np.expand_dims(data, axis=1)
|
|
else:
|
|
return np.asarray(data)
|
|
|
|
def saveBuffers(self, state, actorProb, action, reward, done):
|
|
self.states.append(state)
|
|
self.actorProbs.append(actorProb)
|
|
self.actions.append(action)
|
|
self.rewards.append(reward)
|
|
self.dones.append(done)
|
|
"""
|
|
print("self.states", self.states)
|
|
print("self.actions", self.actions)
|
|
print("self.rewards", self.rewards)
|
|
print("self.dones", self.dones)
|
|
print("self.values", self.values)
|
|
"""
|