GAIL class fix some bug, now runnable

GAIL class fix , no new function
2022-10-15 01:08:08 +09:00 · 2022-10-15 01:08:08 +09:00 · 6ab56880d8
commit 6ab56880d8
parent 2a498f18f6
8 changed files with 555 additions and 29 deletions
--- a/Aimbot-PPO-Python/GAIL-Main.ipynb
+++ b/Aimbot-PPO-Python/GAIL-Main.ipynb
--- a/Aimbot-PPO-Python/GAIL.py
+++ b/Aimbot-PPO-Python/GAIL.py
@ -9,7 +9,7 @@ from tensorflow.keras import optimizers

 from GAILConfig import GAILConfig

-EPS = 1e-8
+EPS = 1e-6


 class GAIL(object):
@ -32,6 +32,7 @@ class GAIL(object):
        self.discriminatorNNShape = gailConfig.discrimNNShape
        self.discrimLR = gailConfig.discrimLR
        self.discrimTrainEpochs = gailConfig.discrimTrainEpochs
+        self.discrimSaveDir = gailConfig.discrimSaveDir
        self.ppoConfig = gailConfig.ppoConfig

        self.ppo = PPO(stateSize, disActShape, conActSize, conActRange, self.ppoConfig)
@ -103,7 +104,7 @@ class GAIL(object):
            actionsNum = int(len(actions) / self.totalActSize)
            actions = actions.reshape([actionsNum, self.totalActSize])

-        thisTrajectory = tf.concat([states, actions], axis=1)
+        thisTrajectory = np.append(states, actions, axis=1)
        discrimPredict = self.discriminator(thisTrajectory)
        return discrimPredict

@ -136,8 +137,8 @@ class GAIL(object):
        """
        if epochs == None:
            epochs = self.discrimTrainEpochs
-        demoTrajectory = tf.concat([demoStates, demoActions], axis=1)
-        agentTrajectory = tf.concat([agentStates, agentActions], axis=1)
+        demoTrajectory = np.append(demoStates, demoActions, axis=1)
+        agentTrajectory = np.append(agentStates, agentActions, axis=1)
        his = self.discriminator.fit(x=agentTrajectory, y=demoTrajectory, epochs=epochs, verbose=0)

        demoAcc = np.mean(self.inference(demoStates, demoActions))
@ -145,6 +146,18 @@ class GAIL(object):
        return his.history["loss"], demoAcc, 1 - agentAcc

    def getActions(self, state: ndarray):
+        """Agent choose action to take
+
+        Args:
+            state (ndarray): enviroment state
+
+        Returns:
+            np.array:
+                actions,
+                    actions list,2dims like [[0],[1],[1.5]]
+                predictResult,
+                    actor NN predict Result output
+        """
        actions, predictResult = self.ppo.chooseAction(state)
        return actions, predictResult

@ -165,6 +178,12 @@ class GAIL(object):
        actorLosses = self.ppo.trainActor(states, oldActorResult, actions, advantage, epochs)
        return actorLosses, criticLosses

+    def saveWeights(self, score: float):
+        saveDir = self.discrimSaveDir + "discriminator/discriminator.ckpt"
+        self.discriminator.save_weights(saveDir, save_format="tf")
+        print("GAIL Model's Weights Saved")
+        self.ppo.saveWeights(score=score)
+
    def generateAction(self, states: ndarray):
        act, actorP = self.ppo.chooseAction(states)
        return act, actorP
--- a/Aimbot-PPO-Python/GAILHistory.py
+++ b/Aimbot-PPO-Python/GAILHistory.py
@ -0,0 +1,45 @@
+import matplotlib.pyplot as plt
+
+DarkBlue = "#011627"
+DarkWhite = "#c9d2df"
+
+
+class GAILHistory(object):
+    def __init__(self):
+        self.meanRewards = []
+        self.discrimLosses = []
+        self.actorLosses = []
+        self.criticLosses = []
+        self.demoAccs = []
+        self.agentAccs = []
+
+    def saveHis(self, rewards, dLosses, aLosses, cLosses, demoAcc, agentAcc):
+        self.meanRewards.extend([rewards])
+        self.discrimLosses.extend(dLosses)
+        self.actorLosses.extend(aLosses)
+        self.criticLosses.extend(cLosses)
+        self.demoAccs.extend([demoAcc])
+        self.agentAccs.extend([agentAcc])
+
+    def drawHis(self):
+        def setSubFig(subFig, data, title):
+            subFig.set_facecolor(DarkBlue)
+            subFig.tick_params(colors=DarkWhite)
+            subFig.spines["top"].set_color(DarkWhite)
+            subFig.spines["bottom"].set_color(DarkWhite)
+            subFig.spines["left"].set_color(DarkWhite)
+            subFig.spines["right"].set_color(DarkWhite)
+            subFig.plot(range(len(data)), data, color=DarkWhite, label=title)
+            subFig.set_title(title, color=DarkWhite)
+
+        fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6)) = plt.subplots(
+            3, 2, figsize=(21, 13), facecolor=DarkBlue
+        )
+        plt.tick_params()
+        setSubFig(ax1, self.meanRewards, "meanRewards")
+        setSubFig(ax2, self.discrimLosses, "discrimLosses")
+        setSubFig(ax3, self.demoAccs, "demoAccs")
+        setSubFig(ax4, self.actorLosses, "actorLosses")
+        setSubFig(ax5, self.agentAccs, "agentAccs")
+        setSubFig(ax6, self.criticLosses, "criticLosses")
+        plt.show()
--- a/Aimbot-PPO-Python/GAILMem.py
+++ b/Aimbot-PPO-Python/GAILMem.py
@ -61,7 +61,7 @@ class GAILMem(object):
            dir (str): file direction
        """
        self.clearMem()
-        memFile = np.load(dir)
+        memFile = np.load(dir, allow_pickle=True)
        self.states = memFile["states"].tolist()
        self.actorProbs = memFile["actorProbs"].tolist()
        self.actions = memFile["actions"].tolist()
--- a/Aimbot-PPO-Python/PPO-mian.ipynb
+++ b/Aimbot-PPO-Python/PPO-mian.ipynb
@ -68,7 +68,28 @@
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "√√√√√Enviroment Initialized Success√√√√√\n",
+      "√√√√√Buffer Initialized Success√√√√√\n"
+     ]
+    },
+    {
+     "ename": "AttributeError",
+     "evalue": "module 'numpy' has no attribute 'aa'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_40408/576030716.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     10\u001b[0m \u001b[0mACTSPEC\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mACTION_SPEC\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     11\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloadDir\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetSteps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 12\u001b[1;33m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0maa\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     13\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     14\u001b[0m agent = PPO(\n",
+      "\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\__init__.py\u001b[0m in \u001b[0;36m__getattr__\u001b[1;34m(attr)\u001b[0m\n\u001b[0;32m    313\u001b[0m             \u001b[1;32mreturn\u001b[0m \u001b[0mTester\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    314\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 315\u001b[1;33m         raise AttributeError(\"module {!r} has no attribute \"\n\u001b[0m\u001b[0;32m    316\u001b[0m                              \"{!r}\".format(__name__, attr))\n\u001b[0;32m    317\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mAttributeError\u001b[0m: module 'numpy' has no attribute 'aa'"
+     ]
+    }
+   ],
   "source": [
    "# initialize enviroment & buffer class\n",
    "env = aimBotEnv.makeEnv(envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT)\n",
@ -81,6 +102,7 @@
    "CONTINUOUS_SIZE = env.CONTINUOUS_SIZE\n",
    "ACTSPEC = env.ACTION_SPEC\n",
    "_, _, _, loadDir, _ = env.getSteps()\n",
+    "np.aa\n",
    "\n",
    "agent = PPO(\n",
    "    stateSize=STATE_SIZE,\n",
--- a/Aimbot-PPO-Python/PPO.py
+++ b/Aimbot-PPO-Python/PPO.py
@ -343,8 +343,8 @@ class PPO(object):
                    totalALoss += continuousAloss
                    totalActionNum += 1.0
                    lastConAct += self.muSigSize
-            loss = tf.divide(totalALoss, totalActionNum)
-            return loss
+            # loss = tf.divide(totalALoss, totalActionNum)
+            return totalALoss

        return loss

@ -358,7 +358,7 @@ class PPO(object):
        Returns:
            np.array:
                actions,
-                    actions list,2dims like [[0],[1],[1.5]]
+                    actions list,1dims like [0,1,1.5]
                predictResult,
                    actor NN predict Result output
        """
@ -392,8 +392,9 @@ class PPO(object):
                if math.isnan(thisMu) or math.isnan(thisSig):
                    # check mu or sigma is nan
                    print("chooseAction:mu or sigma is nan")
+                    print(predictResult)
                thisDist = np.random.normal(loc=thisMu, scale=thisSig)
-                actions.append(np.clip(thisDist, -self.conActRange, self.conActRange))
+                actions.append(np.clip(thisDist, -self.conActRange, self.conActRange)[0])
                lastConAct += 2
        return actions, predictResult

@ -534,7 +535,7 @@ class PPO(object):
        )
        scorefile = open(score_dir, "w")
        scorefile.close()
-        print("Model's Weights Saved")
+        print("PPO Model's Weights Saved")

    def loadWeightToModels(self, loadDir: str):
        """load NN Model. Use "models.load_weights()" method.
--- a/Aimbot-PPO-Python/aimBotEnv.py
+++ b/Aimbot-PPO-Python/aimBotEnv.py
@ -74,7 +74,7 @@ class makeEnv(object):
            continuousActions = np.asanyarray([[0.0]])
        else:
            # create continuous actions from actions list
-            continuousActions = np.asanyarray(actions[self.DISCRETE_SIZE :])
+            continuousActions = np.asanyarray([actions[self.DISCRETE_SIZE :]])

        if behaviorName is None:
            behaviorName = self.BEHA_NAME
@ -154,7 +154,7 @@ class makeEnv(object):
        self.statesBuffer[-1] = state

        # return stacked states
-        return self.statesBuffer[self.STACK_INDEX]
+        return np.reshape(self.statesBuffer[self.STACK_INDEX], (self.STATE_SIZE))

    def render(self):
        """render enviroment"""
--- a/Aimbot-PPO-Python/testarea.ipynb
+++ b/Aimbot-PPO-Python/testarea.ipynb
@ -431,32 +431,178 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1\n",
-      "[0]\n"
-     ]
+     "data": {
+      "text/plain": [
+       "array([0, 1, 2, 0, 1, 2, 0, 1, 2])"
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
    }
   ],
   "source": [
    "from collections import deque\n",
    "import numpy as np\n",
    "\n",
-    "ss = 1\n",
-    "si = 0\n",
-    "buffersize = ss + ((ss-1)*si)\n",
-    "print(buffersize)\n",
-    "stackedStates = deque([[0.0] * 10]*6, maxlen=3)\n",
-    "stackedStates.append([1.0]*10)\n",
-    "ssnp = stackedStates\n",
+    "aa = np.array([range(0,3)]*5)\n",
+    "np.reshape(aa[[0,1,2]],(9))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'int'>\n",
+      "<class 'float'>\n",
+      "<class 'list'>\n",
+      "300\n",
+      "256.1\n",
+      "[300, 256.1]\n",
+      "300\n",
+      "256.1\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 変数を設定\n",
+    "ringo_int = 300\n",
+    "ringo_float = 256.1\n",
+    "ringo_list = [ringo_int, ringo_float]\n",
    "\n",
-    "aa = list(range(0,buffersize,si+1))\n",
-    "print(aa)"
+    "# 型を確認\n",
+    "print(type(ringo_int))\n",
+    "print(type(ringo_float))\n",
+    "print(type(ringo_list))\n",
+    "\n",
+    "# 値を表示\n",
+    "print(ringo_int)\n",
+    "print(ringo_float)\n",
+    "print(ringo_list)\n",
+    "\n",
+    "# 配列から要素を取り出す\n",
+    "print(ringo_list[0]) # ここでエラーになるという。どぼじで？？？\n",
+    "print(ringo_list[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "dirrr = \"GAIL-Expert-Data/1014-1302/pack-24957.npz\"\n",
+    "\n",
+    "memFile = np.load(dirrr, allow_pickle=True)\n",
+    "states = memFile[\"states\"].tolist()\n",
+    "actorProbs = memFile[\"actorProbs\"].tolist()\n",
+    "actions = memFile[\"actions\"].tolist()\n",
+    "rewards = memFile[\"rewards\"].tolist()\n",
+    "dones = memFile[\"dones\"].tolist()\n",
+    "memNum = len(states)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\UCUNI\\AppData\\Local\\Temp/ipykernel_39608/3742051961.py:3: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
+      "  npact = np.array(actions)\n"
+     ]
+    }
+   ],
+   "source": [
+    "states = np.reshape(states, (24957, 90))\n",
+    "\n",
+    "npact = np.array(actions)\n",
+    "\n",
+    "last = npact[:,3]\n",
+    "newlast = []\n",
+    "last[2][0]\n",
+    "for i in range(len(last)):\n",
+    "    newlast.append(last[i][0])\n",
+    "\n",
+    "#print(newlast)\n",
+    "npact[:,3] = newlast\n",
+    "\n",
+    "statesNP = np.asarray(states)\n",
+    "actorProbsNP = np.asarray(actorProbs)\n",
+    "actionsNP = np.asarray(npact)\n",
+    "rewardsNP = np.asarray(rewards)\n",
+    "donesNP = np.asarray(dones)\n",
+    "thisSaveDir = \"GAIL-Expert-Data/1014-1302/pack-24957-RE.npz\"\n",
+    "\n",
+    "np.savez(\n",
+    "    thisSaveDir,\n",
+    "    states=statesNP,\n",
+    "    actorProbs=actorProbsNP,\n",
+    "    actions=actionsNP,\n",
+    "    rewards=rewardsNP,\n",
+    "    dones=donesNP,\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(actions)\n",
+    "npact = np.array(actions)\n",
+    "\n",
+    "last = npact[:,3]\n",
+    "newlast = []\n",
+    "last[2][0]\n",
+    "for i in range(len(last)):\n",
+    "    newlast.append(last[i][0])\n",
+    "\n",
+    "#print(newlast)\n",
+    "npact[:,3] = newlast\n",
+    "print(npact)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[1. 2. 3. 1. 2. 1.]\n",
+      " [2. 2. 3. 2. 2. 1.]]\n",
+      "tf.Tensor(\n",
+      "[[1 2 3 1 2 1]\n",
+      " [2 2 3 2 2 1]], shape=(2, 6), dtype=int32)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from matplotlib.pyplot import axis\n",
+    "import tensorflow as tf\n",
+    "import numpy as np\n",
+    "\n",
+    "aa = np.array([[1,2,3],[2,2,3]])\n",
+    "bb = np.array([[1,2,1.],[2,2,1.]])\n",
+    "print(np.append(aa,bb,axis=1))\n",
+    "print(tf.concat([aa,bb],axis=1))"
   ]
  }
 ],