import os import random import numpy as np class GAILMem(): def __init__(self, targetNum): self.targetNum = targetNum self.states = [[] for i in range(self.targetNum)] self.actorProbs = [[] for i in range(self.targetNum)] self.actions = [[] for i in range(self.targetNum)] self.rewards = [[] for i in range(self.targetNum)] self.dones = [[] for i in range(self.targetNum)] self.memNum = [0 for i in range(self.targetNum)] def clearMem(self,targetType): """clearMemories""" self.states[targetType] = [] self.actorProbs[targetType] = [] self.actions[targetType] = [] self.rewards[targetType] = [] self.dones[targetType] = [] self.memNum[targetType] = 0 def saveMemtoFile(self, dir: str): """save memories ndarray to npz file Args: dir (str): save direction,like"GAIL-Expert-Data/",end with "/" """ for i in range(self.targetNum): statesNP = np.asarray(self.states[i]) actorProbsNP = np.asarray(self.actorProbs[i]) actionsNP = np.asarray(self.actions[i]) rewardsNP = np.asarray(self.rewards[i]) donesNP = np.asarray(self.dones[i]) thisSaveDir = dir + "pack-" + str(self.memNum) + str(i) try: np.savez( thisSaveDir, states=statesNP, actorProbs=actorProbsNP, actions=actionsNP, rewards=rewardsNP, dones=donesNP, ) except FileNotFoundError: os.mkdir(dir) np.savez( thisSaveDir, states=statesNP, actorProbs=actorProbsNP, actions=actionsNP, rewards=rewardsNP, dones=donesNP, ) def loadMemFile(self, dir: str): """load memories from mpz file Args: dir (str): file direction """ for i in range(self.targetNum): self.clearMem(i) loadDir = dir + "pack-" + str(self.memNum) + str(i) + ".npz" memFile = np.load(loadDir, allow_pickle=True) self.states[i] = memFile["states"].tolist() self.actorProbs[i] = memFile["actorProbs"].tolist() self.actions[i] = memFile["actions"].tolist() self.rewards[i] = memFile["rewards"].tolist() self.dones[i] = memFile["dones"].tolist() self.memNum = len(self.states[i]) def getRandomSample(self,sampleNum: int,targetType:int): """get random unique sample set. Args: sampleNum (int, optional): sample number, while 0 return all samples. Defaults to 0. Returns: tuple: (states,actorProbs,actions,rewards,dones) """ if sampleNum == 0: return ( self.getStates(), self.getActorProbs(), self.getActions(), self.getRewards(), self.getDones(), ) else: randIndex = random.sample(range(0, self.memNum), sampleNum) return ( self.standDims(np.asarray(self.states)[randIndex]), self.standDims(np.asarray(self.actorProbs)[randIndex]), self.standDims(np.asarray(self.actions)[randIndex]), self.standDims(np.asarray(self.rewards)[randIndex]), self.standDims(np.asarray(self.dones)[randIndex]), ) def getStates(self,targetType): """get all States data as ndarray Returns: ndarray: ndarray type State data """ return self.standDims(np.asarray(self.states[targetType])) def getActorProbs(self,targetType): """get all ActorProbs data as ndarray Returns: ndarray: ndarray type ActorProbs data """ return self.standDims(np.asarray(self.actorProbs[targetType])) def getActions(self,targetType): """get all Actions data as ndarray Returns: ndarray: ndarray type Actions data """ return self.standDims(np.asarray(self.actions[targetType])) def getRewards(self,targetType): """get all Rewards data as ndarray Returns: ndarray: ndarray type Rewards data """ return self.standDims(np.asarray(self.rewards[targetType])) def getDones(self,targetType): """get all Dones data as ndarray Returns: ndarray: ndarray type Dones data """ return self.standDims(np.asarray(self.dones[targetType])) def standDims(self, data): """standalize data's dimension Args: data (list): data list Returns: ndarray: ndarra type data """ # standarlize data's dimension if np.ndim(data) > 2: return np.squeeze(data, axis=1) elif np.ndim(data) < 2: return np.expand_dims(data, axis=1) else: return np.asarray(data) def saveMems(self, state, actorProb, action, reward, done): """save memories Args: state (_type_): sates actorProb (_type_): actor predict result action (_type_): actor choosed action reward (_type_): reward done (function): done """ targetType = int(state[0,0]) self.states[targetType].append(state) self.actorProbs[targetType].append(actorProb) self.actions[targetType].append(action) self.rewards[targetType].append(reward) self.dones[targetType].append(done) self.memNum[targetType] += 1