GAIL class fix some bug, now runnable
GAIL class fix , no new function
This commit is contained in:
parent
2a498f18f6
commit
6ab56880d8
293
Aimbot-PPO-Python/GAIL-Main.ipynb
Normal file
293
Aimbot-PPO-Python/GAIL-Main.ipynb
Normal file
File diff suppressed because one or more lines are too long
@ -9,7 +9,7 @@ from tensorflow.keras import optimizers
|
||||
|
||||
from GAILConfig import GAILConfig
|
||||
|
||||
EPS = 1e-8
|
||||
EPS = 1e-6
|
||||
|
||||
|
||||
class GAIL(object):
|
||||
@ -32,6 +32,7 @@ class GAIL(object):
|
||||
self.discriminatorNNShape = gailConfig.discrimNNShape
|
||||
self.discrimLR = gailConfig.discrimLR
|
||||
self.discrimTrainEpochs = gailConfig.discrimTrainEpochs
|
||||
self.discrimSaveDir = gailConfig.discrimSaveDir
|
||||
self.ppoConfig = gailConfig.ppoConfig
|
||||
|
||||
self.ppo = PPO(stateSize, disActShape, conActSize, conActRange, self.ppoConfig)
|
||||
@ -103,7 +104,7 @@ class GAIL(object):
|
||||
actionsNum = int(len(actions) / self.totalActSize)
|
||||
actions = actions.reshape([actionsNum, self.totalActSize])
|
||||
|
||||
thisTrajectory = tf.concat([states, actions], axis=1)
|
||||
thisTrajectory = np.append(states, actions, axis=1)
|
||||
discrimPredict = self.discriminator(thisTrajectory)
|
||||
return discrimPredict
|
||||
|
||||
@ -136,8 +137,8 @@ class GAIL(object):
|
||||
"""
|
||||
if epochs == None:
|
||||
epochs = self.discrimTrainEpochs
|
||||
demoTrajectory = tf.concat([demoStates, demoActions], axis=1)
|
||||
agentTrajectory = tf.concat([agentStates, agentActions], axis=1)
|
||||
demoTrajectory = np.append(demoStates, demoActions, axis=1)
|
||||
agentTrajectory = np.append(agentStates, agentActions, axis=1)
|
||||
his = self.discriminator.fit(x=agentTrajectory, y=demoTrajectory, epochs=epochs, verbose=0)
|
||||
|
||||
demoAcc = np.mean(self.inference(demoStates, demoActions))
|
||||
@ -145,6 +146,18 @@ class GAIL(object):
|
||||
return his.history["loss"], demoAcc, 1 - agentAcc
|
||||
|
||||
def getActions(self, state: ndarray):
|
||||
"""Agent choose action to take
|
||||
|
||||
Args:
|
||||
state (ndarray): enviroment state
|
||||
|
||||
Returns:
|
||||
np.array:
|
||||
actions,
|
||||
actions list,2dims like [[0],[1],[1.5]]
|
||||
predictResult,
|
||||
actor NN predict Result output
|
||||
"""
|
||||
actions, predictResult = self.ppo.chooseAction(state)
|
||||
return actions, predictResult
|
||||
|
||||
@ -165,6 +178,12 @@ class GAIL(object):
|
||||
actorLosses = self.ppo.trainActor(states, oldActorResult, actions, advantage, epochs)
|
||||
return actorLosses, criticLosses
|
||||
|
||||
def saveWeights(self, score: float):
|
||||
saveDir = self.discrimSaveDir + "discriminator/discriminator.ckpt"
|
||||
self.discriminator.save_weights(saveDir, save_format="tf")
|
||||
print("GAIL Model's Weights Saved")
|
||||
self.ppo.saveWeights(score=score)
|
||||
|
||||
def generateAction(self, states: ndarray):
|
||||
act, actorP = self.ppo.chooseAction(states)
|
||||
return act, actorP
|
||||
|
45
Aimbot-PPO-Python/GAILHistory.py
Normal file
45
Aimbot-PPO-Python/GAILHistory.py
Normal file
@ -0,0 +1,45 @@
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
DarkBlue = "#011627"
|
||||
DarkWhite = "#c9d2df"
|
||||
|
||||
|
||||
class GAILHistory(object):
|
||||
def __init__(self):
|
||||
self.meanRewards = []
|
||||
self.discrimLosses = []
|
||||
self.actorLosses = []
|
||||
self.criticLosses = []
|
||||
self.demoAccs = []
|
||||
self.agentAccs = []
|
||||
|
||||
def saveHis(self, rewards, dLosses, aLosses, cLosses, demoAcc, agentAcc):
|
||||
self.meanRewards.extend([rewards])
|
||||
self.discrimLosses.extend(dLosses)
|
||||
self.actorLosses.extend(aLosses)
|
||||
self.criticLosses.extend(cLosses)
|
||||
self.demoAccs.extend([demoAcc])
|
||||
self.agentAccs.extend([agentAcc])
|
||||
|
||||
def drawHis(self):
|
||||
def setSubFig(subFig, data, title):
|
||||
subFig.set_facecolor(DarkBlue)
|
||||
subFig.tick_params(colors=DarkWhite)
|
||||
subFig.spines["top"].set_color(DarkWhite)
|
||||
subFig.spines["bottom"].set_color(DarkWhite)
|
||||
subFig.spines["left"].set_color(DarkWhite)
|
||||
subFig.spines["right"].set_color(DarkWhite)
|
||||
subFig.plot(range(len(data)), data, color=DarkWhite, label=title)
|
||||
subFig.set_title(title, color=DarkWhite)
|
||||
|
||||
fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6)) = plt.subplots(
|
||||
3, 2, figsize=(21, 13), facecolor=DarkBlue
|
||||
)
|
||||
plt.tick_params()
|
||||
setSubFig(ax1, self.meanRewards, "meanRewards")
|
||||
setSubFig(ax2, self.discrimLosses, "discrimLosses")
|
||||
setSubFig(ax3, self.demoAccs, "demoAccs")
|
||||
setSubFig(ax4, self.actorLosses, "actorLosses")
|
||||
setSubFig(ax5, self.agentAccs, "agentAccs")
|
||||
setSubFig(ax6, self.criticLosses, "criticLosses")
|
||||
plt.show()
|
@ -61,7 +61,7 @@ class GAILMem(object):
|
||||
dir (str): file direction
|
||||
"""
|
||||
self.clearMem()
|
||||
memFile = np.load(dir)
|
||||
memFile = np.load(dir, allow_pickle=True)
|
||||
self.states = memFile["states"].tolist()
|
||||
self.actorProbs = memFile["actorProbs"].tolist()
|
||||
self.actions = memFile["actions"].tolist()
|
||||
|
@ -68,7 +68,28 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"√√√√√Enviroment Initialized Success√√√√√\n",
|
||||
"√√√√√Buffer Initialized Success√√√√√\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "AttributeError",
|
||||
"evalue": "module 'numpy' has no attribute 'aa'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_40408/576030716.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[0mACTSPEC\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mACTION_SPEC\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloadDir\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetSteps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 12\u001b[1;33m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0maa\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 13\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 14\u001b[0m agent = PPO(\n",
|
||||
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\__init__.py\u001b[0m in \u001b[0;36m__getattr__\u001b[1;34m(attr)\u001b[0m\n\u001b[0;32m 313\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mTester\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 314\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 315\u001b[1;33m raise AttributeError(\"module {!r} has no attribute \"\n\u001b[0m\u001b[0;32m 316\u001b[0m \"{!r}\".format(__name__, attr))\n\u001b[0;32m 317\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;31mAttributeError\u001b[0m: module 'numpy' has no attribute 'aa'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# initialize enviroment & buffer class\n",
|
||||
"env = aimBotEnv.makeEnv(envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT)\n",
|
||||
@ -81,6 +102,7 @@
|
||||
"CONTINUOUS_SIZE = env.CONTINUOUS_SIZE\n",
|
||||
"ACTSPEC = env.ACTION_SPEC\n",
|
||||
"_, _, _, loadDir, _ = env.getSteps()\n",
|
||||
"np.aa\n",
|
||||
"\n",
|
||||
"agent = PPO(\n",
|
||||
" stateSize=STATE_SIZE,\n",
|
||||
|
@ -343,8 +343,8 @@ class PPO(object):
|
||||
totalALoss += continuousAloss
|
||||
totalActionNum += 1.0
|
||||
lastConAct += self.muSigSize
|
||||
loss = tf.divide(totalALoss, totalActionNum)
|
||||
return loss
|
||||
# loss = tf.divide(totalALoss, totalActionNum)
|
||||
return totalALoss
|
||||
|
||||
return loss
|
||||
|
||||
@ -358,7 +358,7 @@ class PPO(object):
|
||||
Returns:
|
||||
np.array:
|
||||
actions,
|
||||
actions list,2dims like [[0],[1],[1.5]]
|
||||
actions list,1dims like [0,1,1.5]
|
||||
predictResult,
|
||||
actor NN predict Result output
|
||||
"""
|
||||
@ -392,8 +392,9 @@ class PPO(object):
|
||||
if math.isnan(thisMu) or math.isnan(thisSig):
|
||||
# check mu or sigma is nan
|
||||
print("chooseAction:mu or sigma is nan")
|
||||
print(predictResult)
|
||||
thisDist = np.random.normal(loc=thisMu, scale=thisSig)
|
||||
actions.append(np.clip(thisDist, -self.conActRange, self.conActRange))
|
||||
actions.append(np.clip(thisDist, -self.conActRange, self.conActRange)[0])
|
||||
lastConAct += 2
|
||||
return actions, predictResult
|
||||
|
||||
@ -534,7 +535,7 @@ class PPO(object):
|
||||
)
|
||||
scorefile = open(score_dir, "w")
|
||||
scorefile.close()
|
||||
print("Model's Weights Saved")
|
||||
print("PPO Model's Weights Saved")
|
||||
|
||||
def loadWeightToModels(self, loadDir: str):
|
||||
"""load NN Model. Use "models.load_weights()" method.
|
||||
|
@ -74,7 +74,7 @@ class makeEnv(object):
|
||||
continuousActions = np.asanyarray([[0.0]])
|
||||
else:
|
||||
# create continuous actions from actions list
|
||||
continuousActions = np.asanyarray(actions[self.DISCRETE_SIZE :])
|
||||
continuousActions = np.asanyarray([actions[self.DISCRETE_SIZE :]])
|
||||
|
||||
if behaviorName is None:
|
||||
behaviorName = self.BEHA_NAME
|
||||
@ -154,7 +154,7 @@ class makeEnv(object):
|
||||
self.statesBuffer[-1] = state
|
||||
|
||||
# return stacked states
|
||||
return self.statesBuffer[self.STACK_INDEX]
|
||||
return np.reshape(self.statesBuffer[self.STACK_INDEX], (self.STATE_SIZE))
|
||||
|
||||
def render(self):
|
||||
"""render enviroment"""
|
||||
|
@ -431,32 +431,178 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"execution_count": 54,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1\n",
|
||||
"[0]\n"
|
||||
]
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([0, 1, 2, 0, 1, 2, 0, 1, 2])"
|
||||
]
|
||||
},
|
||||
"execution_count": 54,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from collections import deque\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"ss = 1\n",
|
||||
"si = 0\n",
|
||||
"buffersize = ss + ((ss-1)*si)\n",
|
||||
"print(buffersize)\n",
|
||||
"stackedStates = deque([[0.0] * 10]*6, maxlen=3)\n",
|
||||
"stackedStates.append([1.0]*10)\n",
|
||||
"ssnp = stackedStates\n",
|
||||
"aa = np.array([range(0,3)]*5)\n",
|
||||
"np.reshape(aa[[0,1,2]],(9))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'int'>\n",
|
||||
"<class 'float'>\n",
|
||||
"<class 'list'>\n",
|
||||
"300\n",
|
||||
"256.1\n",
|
||||
"[300, 256.1]\n",
|
||||
"300\n",
|
||||
"256.1\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 変数を設定\n",
|
||||
"ringo_int = 300\n",
|
||||
"ringo_float = 256.1\n",
|
||||
"ringo_list = [ringo_int, ringo_float]\n",
|
||||
"\n",
|
||||
"aa = list(range(0,buffersize,si+1))\n",
|
||||
"print(aa)"
|
||||
"# 型を確認\n",
|
||||
"print(type(ringo_int))\n",
|
||||
"print(type(ringo_float))\n",
|
||||
"print(type(ringo_list))\n",
|
||||
"\n",
|
||||
"# 値を表示\n",
|
||||
"print(ringo_int)\n",
|
||||
"print(ringo_float)\n",
|
||||
"print(ringo_list)\n",
|
||||
"\n",
|
||||
"# 配列から要素を取り出す\n",
|
||||
"print(ringo_list[0]) # ここでエラーになるという。どぼじで???\n",
|
||||
"print(ringo_list[1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"dirrr = \"GAIL-Expert-Data/1014-1302/pack-24957.npz\"\n",
|
||||
"\n",
|
||||
"memFile = np.load(dirrr, allow_pickle=True)\n",
|
||||
"states = memFile[\"states\"].tolist()\n",
|
||||
"actorProbs = memFile[\"actorProbs\"].tolist()\n",
|
||||
"actions = memFile[\"actions\"].tolist()\n",
|
||||
"rewards = memFile[\"rewards\"].tolist()\n",
|
||||
"dones = memFile[\"dones\"].tolist()\n",
|
||||
"memNum = len(states)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\UCUNI\\AppData\\Local\\Temp/ipykernel_39608/3742051961.py:3: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
|
||||
" npact = np.array(actions)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"states = np.reshape(states, (24957, 90))\n",
|
||||
"\n",
|
||||
"npact = np.array(actions)\n",
|
||||
"\n",
|
||||
"last = npact[:,3]\n",
|
||||
"newlast = []\n",
|
||||
"last[2][0]\n",
|
||||
"for i in range(len(last)):\n",
|
||||
" newlast.append(last[i][0])\n",
|
||||
"\n",
|
||||
"#print(newlast)\n",
|
||||
"npact[:,3] = newlast\n",
|
||||
"\n",
|
||||
"statesNP = np.asarray(states)\n",
|
||||
"actorProbsNP = np.asarray(actorProbs)\n",
|
||||
"actionsNP = np.asarray(npact)\n",
|
||||
"rewardsNP = np.asarray(rewards)\n",
|
||||
"donesNP = np.asarray(dones)\n",
|
||||
"thisSaveDir = \"GAIL-Expert-Data/1014-1302/pack-24957-RE.npz\"\n",
|
||||
"\n",
|
||||
"np.savez(\n",
|
||||
" thisSaveDir,\n",
|
||||
" states=statesNP,\n",
|
||||
" actorProbs=actorProbsNP,\n",
|
||||
" actions=actionsNP,\n",
|
||||
" rewards=rewardsNP,\n",
|
||||
" dones=donesNP,\n",
|
||||
")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(actions)\n",
|
||||
"npact = np.array(actions)\n",
|
||||
"\n",
|
||||
"last = npact[:,3]\n",
|
||||
"newlast = []\n",
|
||||
"last[2][0]\n",
|
||||
"for i in range(len(last)):\n",
|
||||
" newlast.append(last[i][0])\n",
|
||||
"\n",
|
||||
"#print(newlast)\n",
|
||||
"npact[:,3] = newlast\n",
|
||||
"print(npact)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[1. 2. 3. 1. 2. 1.]\n",
|
||||
" [2. 2. 3. 2. 2. 1.]]\n",
|
||||
"tf.Tensor(\n",
|
||||
"[[1 2 3 1 2 1]\n",
|
||||
" [2 2 3 2 2 1]], shape=(2, 6), dtype=int32)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from matplotlib.pyplot import axis\n",
|
||||
"import tensorflow as tf\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"aa = np.array([[1,2,3],[2,2,3]])\n",
|
||||
"bb = np.array([[1,2,1.],[2,2,1.]])\n",
|
||||
"print(np.append(aa,bb,axis=1))\n",
|
||||
"print(tf.concat([aa,bb],axis=1))"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
Loading…
Reference in New Issue
Block a user