GAIL class fix some bug, now runnable

GAIL class fix , no new function
This commit is contained in:
Koha9 2022-10-15 01:08:08 +09:00
parent 2a498f18f6
commit 6ab56880d8
8 changed files with 555 additions and 29 deletions

File diff suppressed because one or more lines are too long

View File

@ -9,7 +9,7 @@ from tensorflow.keras import optimizers
from GAILConfig import GAILConfig
EPS = 1e-8
EPS = 1e-6
class GAIL(object):
@ -32,6 +32,7 @@ class GAIL(object):
self.discriminatorNNShape = gailConfig.discrimNNShape
self.discrimLR = gailConfig.discrimLR
self.discrimTrainEpochs = gailConfig.discrimTrainEpochs
self.discrimSaveDir = gailConfig.discrimSaveDir
self.ppoConfig = gailConfig.ppoConfig
self.ppo = PPO(stateSize, disActShape, conActSize, conActRange, self.ppoConfig)
@ -103,7 +104,7 @@ class GAIL(object):
actionsNum = int(len(actions) / self.totalActSize)
actions = actions.reshape([actionsNum, self.totalActSize])
thisTrajectory = tf.concat([states, actions], axis=1)
thisTrajectory = np.append(states, actions, axis=1)
discrimPredict = self.discriminator(thisTrajectory)
return discrimPredict
@ -136,8 +137,8 @@ class GAIL(object):
"""
if epochs == None:
epochs = self.discrimTrainEpochs
demoTrajectory = tf.concat([demoStates, demoActions], axis=1)
agentTrajectory = tf.concat([agentStates, agentActions], axis=1)
demoTrajectory = np.append(demoStates, demoActions, axis=1)
agentTrajectory = np.append(agentStates, agentActions, axis=1)
his = self.discriminator.fit(x=agentTrajectory, y=demoTrajectory, epochs=epochs, verbose=0)
demoAcc = np.mean(self.inference(demoStates, demoActions))
@ -145,6 +146,18 @@ class GAIL(object):
return his.history["loss"], demoAcc, 1 - agentAcc
def getActions(self, state: ndarray):
"""Agent choose action to take
Args:
state (ndarray): enviroment state
Returns:
np.array:
actions,
actions list,2dims like [[0],[1],[1.5]]
predictResult,
actor NN predict Result output
"""
actions, predictResult = self.ppo.chooseAction(state)
return actions, predictResult
@ -165,6 +178,12 @@ class GAIL(object):
actorLosses = self.ppo.trainActor(states, oldActorResult, actions, advantage, epochs)
return actorLosses, criticLosses
def saveWeights(self, score: float):
saveDir = self.discrimSaveDir + "discriminator/discriminator.ckpt"
self.discriminator.save_weights(saveDir, save_format="tf")
print("GAIL Model's Weights Saved")
self.ppo.saveWeights(score=score)
def generateAction(self, states: ndarray):
act, actorP = self.ppo.chooseAction(states)
return act, actorP

View File

@ -0,0 +1,45 @@
import matplotlib.pyplot as plt
DarkBlue = "#011627"
DarkWhite = "#c9d2df"
class GAILHistory(object):
def __init__(self):
self.meanRewards = []
self.discrimLosses = []
self.actorLosses = []
self.criticLosses = []
self.demoAccs = []
self.agentAccs = []
def saveHis(self, rewards, dLosses, aLosses, cLosses, demoAcc, agentAcc):
self.meanRewards.extend([rewards])
self.discrimLosses.extend(dLosses)
self.actorLosses.extend(aLosses)
self.criticLosses.extend(cLosses)
self.demoAccs.extend([demoAcc])
self.agentAccs.extend([agentAcc])
def drawHis(self):
def setSubFig(subFig, data, title):
subFig.set_facecolor(DarkBlue)
subFig.tick_params(colors=DarkWhite)
subFig.spines["top"].set_color(DarkWhite)
subFig.spines["bottom"].set_color(DarkWhite)
subFig.spines["left"].set_color(DarkWhite)
subFig.spines["right"].set_color(DarkWhite)
subFig.plot(range(len(data)), data, color=DarkWhite, label=title)
subFig.set_title(title, color=DarkWhite)
fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6)) = plt.subplots(
3, 2, figsize=(21, 13), facecolor=DarkBlue
)
plt.tick_params()
setSubFig(ax1, self.meanRewards, "meanRewards")
setSubFig(ax2, self.discrimLosses, "discrimLosses")
setSubFig(ax3, self.demoAccs, "demoAccs")
setSubFig(ax4, self.actorLosses, "actorLosses")
setSubFig(ax5, self.agentAccs, "agentAccs")
setSubFig(ax6, self.criticLosses, "criticLosses")
plt.show()

View File

@ -61,7 +61,7 @@ class GAILMem(object):
dir (str): file direction
"""
self.clearMem()
memFile = np.load(dir)
memFile = np.load(dir, allow_pickle=True)
self.states = memFile["states"].tolist()
self.actorProbs = memFile["actorProbs"].tolist()
self.actions = memFile["actions"].tolist()

View File

@ -68,7 +68,28 @@
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"√√√√√Enviroment Initialized Success√√√√√\n",
"√√√√√Buffer Initialized Success√√√√√\n"
]
},
{
"ename": "AttributeError",
"evalue": "module 'numpy' has no attribute 'aa'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_40408/576030716.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[0mACTSPEC\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mACTION_SPEC\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloadDir\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetSteps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 12\u001b[1;33m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0maa\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 13\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 14\u001b[0m agent = PPO(\n",
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\__init__.py\u001b[0m in \u001b[0;36m__getattr__\u001b[1;34m(attr)\u001b[0m\n\u001b[0;32m 313\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mTester\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 314\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 315\u001b[1;33m raise AttributeError(\"module {!r} has no attribute \"\n\u001b[0m\u001b[0;32m 316\u001b[0m \"{!r}\".format(__name__, attr))\n\u001b[0;32m 317\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mAttributeError\u001b[0m: module 'numpy' has no attribute 'aa'"
]
}
],
"source": [
"# initialize enviroment & buffer class\n",
"env = aimBotEnv.makeEnv(envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT)\n",
@ -81,6 +102,7 @@
"CONTINUOUS_SIZE = env.CONTINUOUS_SIZE\n",
"ACTSPEC = env.ACTION_SPEC\n",
"_, _, _, loadDir, _ = env.getSteps()\n",
"np.aa\n",
"\n",
"agent = PPO(\n",
" stateSize=STATE_SIZE,\n",

View File

@ -343,8 +343,8 @@ class PPO(object):
totalALoss += continuousAloss
totalActionNum += 1.0
lastConAct += self.muSigSize
loss = tf.divide(totalALoss, totalActionNum)
return loss
# loss = tf.divide(totalALoss, totalActionNum)
return totalALoss
return loss
@ -358,7 +358,7 @@ class PPO(object):
Returns:
np.array:
actions,
actions list,2dims like [[0],[1],[1.5]]
actions list,1dims like [0,1,1.5]
predictResult,
actor NN predict Result output
"""
@ -392,8 +392,9 @@ class PPO(object):
if math.isnan(thisMu) or math.isnan(thisSig):
# check mu or sigma is nan
print("chooseAction:mu or sigma is nan")
print(predictResult)
thisDist = np.random.normal(loc=thisMu, scale=thisSig)
actions.append(np.clip(thisDist, -self.conActRange, self.conActRange))
actions.append(np.clip(thisDist, -self.conActRange, self.conActRange)[0])
lastConAct += 2
return actions, predictResult
@ -534,7 +535,7 @@ class PPO(object):
)
scorefile = open(score_dir, "w")
scorefile.close()
print("Model's Weights Saved")
print("PPO Model's Weights Saved")
def loadWeightToModels(self, loadDir: str):
"""load NN Model. Use "models.load_weights()" method.

View File

@ -74,7 +74,7 @@ class makeEnv(object):
continuousActions = np.asanyarray([[0.0]])
else:
# create continuous actions from actions list
continuousActions = np.asanyarray(actions[self.DISCRETE_SIZE :])
continuousActions = np.asanyarray([actions[self.DISCRETE_SIZE :]])
if behaviorName is None:
behaviorName = self.BEHA_NAME
@ -154,7 +154,7 @@ class makeEnv(object):
self.statesBuffer[-1] = state
# return stacked states
return self.statesBuffer[self.STACK_INDEX]
return np.reshape(self.statesBuffer[self.STACK_INDEX], (self.STATE_SIZE))
def render(self):
"""render enviroment"""

View File

@ -431,32 +431,178 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 54,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1\n",
"[0]\n"
]
"data": {
"text/plain": [
"array([0, 1, 2, 0, 1, 2, 0, 1, 2])"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from collections import deque\n",
"import numpy as np\n",
"\n",
"ss = 1\n",
"si = 0\n",
"buffersize = ss + ((ss-1)*si)\n",
"print(buffersize)\n",
"stackedStates = deque([[0.0] * 10]*6, maxlen=3)\n",
"stackedStates.append([1.0]*10)\n",
"ssnp = stackedStates\n",
"aa = np.array([range(0,3)]*5)\n",
"np.reshape(aa[[0,1,2]],(9))"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'int'>\n",
"<class 'float'>\n",
"<class 'list'>\n",
"300\n",
"256.1\n",
"[300, 256.1]\n",
"300\n",
"256.1\n"
]
}
],
"source": [
"# 変数を設定\n",
"ringo_int = 300\n",
"ringo_float = 256.1\n",
"ringo_list = [ringo_int, ringo_float]\n",
"\n",
"aa = list(range(0,buffersize,si+1))\n",
"print(aa)"
"# 型を確認\n",
"print(type(ringo_int))\n",
"print(type(ringo_float))\n",
"print(type(ringo_list))\n",
"\n",
"# 値を表示\n",
"print(ringo_int)\n",
"print(ringo_float)\n",
"print(ringo_list)\n",
"\n",
"# 配列から要素を取り出す\n",
"print(ringo_list[0]) # ここでエラーになるという。どぼじで???\n",
"print(ringo_list[1])"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"dirrr = \"GAIL-Expert-Data/1014-1302/pack-24957.npz\"\n",
"\n",
"memFile = np.load(dirrr, allow_pickle=True)\n",
"states = memFile[\"states\"].tolist()\n",
"actorProbs = memFile[\"actorProbs\"].tolist()\n",
"actions = memFile[\"actions\"].tolist()\n",
"rewards = memFile[\"rewards\"].tolist()\n",
"dones = memFile[\"dones\"].tolist()\n",
"memNum = len(states)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\UCUNI\\AppData\\Local\\Temp/ipykernel_39608/3742051961.py:3: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
" npact = np.array(actions)\n"
]
}
],
"source": [
"states = np.reshape(states, (24957, 90))\n",
"\n",
"npact = np.array(actions)\n",
"\n",
"last = npact[:,3]\n",
"newlast = []\n",
"last[2][0]\n",
"for i in range(len(last)):\n",
" newlast.append(last[i][0])\n",
"\n",
"#print(newlast)\n",
"npact[:,3] = newlast\n",
"\n",
"statesNP = np.asarray(states)\n",
"actorProbsNP = np.asarray(actorProbs)\n",
"actionsNP = np.asarray(npact)\n",
"rewardsNP = np.asarray(rewards)\n",
"donesNP = np.asarray(dones)\n",
"thisSaveDir = \"GAIL-Expert-Data/1014-1302/pack-24957-RE.npz\"\n",
"\n",
"np.savez(\n",
" thisSaveDir,\n",
" states=statesNP,\n",
" actorProbs=actorProbsNP,\n",
" actions=actionsNP,\n",
" rewards=rewardsNP,\n",
" dones=donesNP,\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(actions)\n",
"npact = np.array(actions)\n",
"\n",
"last = npact[:,3]\n",
"newlast = []\n",
"last[2][0]\n",
"for i in range(len(last)):\n",
" newlast.append(last[i][0])\n",
"\n",
"#print(newlast)\n",
"npact[:,3] = newlast\n",
"print(npact)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[1. 2. 3. 1. 2. 1.]\n",
" [2. 2. 3. 2. 2. 1.]]\n",
"tf.Tensor(\n",
"[[1 2 3 1 2 1]\n",
" [2 2 3 2 2 1]], shape=(2, 6), dtype=int32)\n"
]
}
],
"source": [
"from matplotlib.pyplot import axis\n",
"import tensorflow as tf\n",
"import numpy as np\n",
"\n",
"aa = np.array([[1,2,3],[2,2,3]])\n",
"bb = np.array([[1,2,1.],[2,2,1.]])\n",
"print(np.append(aa,bb,axis=1))\n",
"print(tf.concat([aa,bb],axis=1))"
]
}
],