Aimbot-PPO/Aimbot-PPO-Python/Tensorflow/PPO-mian.ipynb

324 lines
82 KiB
Plaintext
Raw Permalink Normal View History

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import aimBotEnv\n",
"import PPO\n",
"import numpy as np\n",
"\n",
"import tensorflow as tf\n",
"import time\n",
"import datetime\n",
"\n",
"from PPO import PPO\n",
"from PPOBuffer import PPOBuffer\n",
"from PPOConfig import PPOConfig\n",
"from PPOHistoryRecorder import PPOHistory\n",
"from IPython.display import clear_output"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Attempts to allocate only the GPU memory needed for allocation\n",
"physical_devices = tf.config.list_physical_devices(\"GPU\")\n",
"tf.config.experimental.set_memory_growth(physical_devices[0], True)\n",
"tf.random.set_seed(9331)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Env\n",
"ENV_PATH = \"./Build-CloseEnemyCut/Aimbot-PPO\"\n",
"WORKER_ID = 1\n",
"BASE_PORT = 200\n",
"\n",
"MAX_EP = 1000\n",
"EP_LENGTH = 100000\n",
"BATCH = 256 # learning step\n",
"ACTION_INTERVAL = 1 # take action every ACTION_INTERVAL steps\n",
"\n",
"\n",
"TRAIN = True\n",
"SAVE_DIR = \"PPO-Model/\" + datetime.datetime.now().strftime(\"%m%d%H%M\") + \"/\"\n",
"LOAD_DIR = None\n",
"\n",
"CTN_ACTION_RANGE = 10\n",
"\n",
"ppoConfig = PPOConfig(\n",
" NNShape=[512, 512],\n",
" actorLR=2e-3,\n",
" criticLR=2e-3,\n",
" gamma=0.99,\n",
" lmbda=0.95,\n",
" clipRange=0.20,\n",
" entropyWeight=1e-2,\n",
" trainEpochs=5,\n",
" saveDir=\"PPO-Model/\" + datetime.datetime.now().strftime(\"%m%d-%H%M\") + \"/\",\n",
" loadModelDir=None,\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"√√√√√Enviroment Initialized Success√√√√√\n",
"√√√√√Buffer Initialized Success√√√√√\n",
"---------thisPPO Params---------\n",
"self.stateSize = 31\n",
"self.disActShape = [3, 3, 2]\n",
"self.disActSize 3\n",
"self.disOutputSize 8\n",
"self.conActSize = 1\n",
"self.conActRange = 10\n",
"self.conOutputSize = 2\n",
"---------thisPPO config---------\n",
"self.NNShape = [512, 512, 256]\n",
"self.criticLR = 0.002\n",
"self.actorLR = 0.002\n",
"self.gamma = 0.99\n",
"self.lmbda = 0.95\n",
"self.clipRange = 0.2\n",
"self.entropyWeight = 0.01\n",
"self.trainEpochs = 5\n",
"self.saveDir = GAIL-Model/1023-2324/\n",
"self.loadModelDir = None\n",
"---------Actor Model Create Success---------\n",
"Model: \"model_1\"\n",
"__________________________________________________________________________________________________\n",
" Layer (type) Output Shape Param # Connected to \n",
"==================================================================================================\n",
" stateInput (InputLayer) [(None, 31)] 0 [] \n",
" \n",
" dense0 (Dense) (None, 512) 16384 ['stateInput[0][0]'] \n",
" \n",
" dense1 (Dense) (None, 512) 262656 ['dense0[0][0]'] \n",
" \n",
" dense2 (Dense) (None, 256) 131328 ['dense1[0][0]'] \n",
" \n",
" muOut (Dense) (None, 1) 257 ['dense2[0][0]'] \n",
" \n",
" sigmaOut (Dense) (None, 1) 257 ['dense2[0][0]'] \n",
" \n",
" disAct0 (Dense) (None, 3) 771 ['dense2[0][0]'] \n",
" \n",
" disAct1 (Dense) (None, 3) 771 ['dense2[0][0]'] \n",
" \n",
" disAct2 (Dense) (None, 2) 514 ['dense2[0][0]'] \n",
" \n",
" tf.math.multiply (TFOpLambda) (None, 1) 0 ['muOut[0][0]'] \n",
" \n",
" tf.math.add (TFOpLambda) (None, 1) 0 ['sigmaOut[0][0]'] \n",
" \n",
" totalOut (Concatenate) (None, 10) 0 ['disAct0[0][0]', \n",
" 'disAct1[0][0]', \n",
" 'disAct2[0][0]', \n",
" 'tf.math.multiply[0][0]', \n",
" 'tf.math.add[0][0]'] \n",
" \n",
"==================================================================================================\n",
"Total params: 412,938\n",
"Trainable params: 412,938\n",
"Non-trainable params: 0\n",
"__________________________________________________________________________________________________\n",
"---------Critic Model Create Success---------\n",
"Model: \"model\"\n",
"_________________________________________________________________\n",
" Layer (type) Output Shape Param # \n",
"=================================================================\n",
" stateInput (InputLayer) [(None, 31)] 0 \n",
" \n",
" dense0 (Dense) (None, 512) 16384 \n",
" \n",
" dense1 (Dense) (None, 512) 262656 \n",
" \n",
" dense2 (Dense) (None, 256) 131328 \n",
" \n",
" dense (Dense) (None, 1) 257 \n",
" \n",
"=================================================================\n",
"Total params: 410,625\n",
"Trainable params: 410,625\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n",
"No loadDir specified,Create a New Model\n",
"CONTINUOUS_SIZE 1\n",
"DISCRETE_SIZE 3\n",
"STATE_SIZE 31\n"
]
}
],
"source": [
"# initialize enviroment & buffer class\n",
"env = aimBotEnv.makeEnv(envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT)\n",
"ppoBuffer = PPOBuffer()\n",
"ppoHistory = PPOHistory()\n",
"\n",
"STATE_SIZE = env.STATE_SIZE\n",
"DISCRETE_SHAPE = env.DISCRETE_SHAPE\n",
"DISCRETE_SIZE = env.DISCRETE_SIZE\n",
"CONTINUOUS_SIZE = env.CONTINUOUS_SIZE\n",
"ACTSPEC = env.ACTION_SPEC\n",
"_, _, _, loadDir, _ = env.getSteps()\n",
"\n",
"agent = PPO(\n",
" stateSize=STATE_SIZE,\n",
" disActShape=DISCRETE_SHAPE,\n",
" conActSize=CONTINUOUS_SIZE,\n",
" conActRange=CTN_ACTION_RANGE,\n",
" PPOConfig=ppoConfig,\n",
")\n",
"\n",
"# check load model or not\n",
"if np.any(loadDir == 0):\n",
" # create a new model\n",
" print(\"No loadDir specified,Create a New Model\")\n",
" LOAD_DIR = None\n",
"else:\n",
" # load model\n",
" loadDirDateSTR = str(int(loadDir[0]))\n",
" loadDirTimeSTR = str(int(loadDir[1]))\n",
" if len(loadDirDateSTR) != 8:\n",
" # fill lost 0 while converse float to string\n",
" for _ in range(8 - len(loadDirDateSTR)):\n",
" loadDirDateSTR = \"0\" + loadDirDateSTR\n",
" if len(loadDirTimeSTR) != 6:\n",
" # fill lost 0 while converse float to string\n",
" for _ in range(6 - len(loadDirTimeSTR)):\n",
" loadDirTimeSTR = \"0\" + loadDirTimeSTR\n",
" LOAD_DIR = \"PPO-Model/\" + loadDirDateSTR + \"/\" + loadDirTimeSTR\n",
" print(\"Load Model:\")\n",
" print(LOAD_DIR)\n",
"\n",
"print(\"CONTINUOUS_SIZE\", CONTINUOUS_SIZE)\n",
"print(\"DISCRETE_SIZE\", DISCRETE_SIZE)\n",
"print(\"STATE_SIZE\", STATE_SIZE)\n",
"\n",
"disActShape = [3, 3, 2]\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABMcAAALyCAYAAADe9LNzAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAADMfklEQVR4nOzddXyV9d/H8feKltxGlwIjVBSkRKSku7ubAYp0bqNTZHSO7m7pkBaQ7o7BxgYMGIzV/Qe6n7tFRRffs53X8/G4H/fOORdnLy732758doWNjbNLuAAAAAAAAAArZGs6AAAAAAAAADCF4RgAAAAAAACsFsMxAAAAAAAAWC2GYwAAAAAAALBaDMcAAAAAAABgtRiOAQAAAAAAwGoxHAOAGJA+bVod2blNdrZ8mwUAAAAAS2ZvOgAA/q1BvXqofJlSCg4JUXBwiC5fvarxk6fq9t17ptMAAAAQTdYumq8RP/yo4ydPmU4BEM9xSAOAOGnRilUqU62WqjdsIt/HfhrQo7uxFo4OAwAAiH2swQBEF76bAPhX1i6aryb162rRzGnas3Gd+vfortQpU2rCiKHatWGNJo0ZqQ+SJZMk5cuTWzMn/qAd61Zp4YypKpD/04j3qVKhnJbNmaldG9Zo9UIv1axSOeK1Avk/1YalC9W4bm1tWblMm5YvUZUK5d7ZE/TmjXbu26+cH30U8ZxjmtQa6TZQW1ct05qF81S/Zg1JUgIHB+3dvF4pkieXJLVs3FA//7RZSZIkkSS1b9lc33XqIEn6skhhzZ8+WbvWr9b6JQvVtnnTiPf//ZTJahUraN2SBZo8brRsbW3VtX1bbVu9XKsXeunLIoUjdVYpX06rF3pp14Y1WrNwniqUKf2f/xsAAADEJX+1NmvbvKmGDeqvwX16ateGNVoye4Zy58opSXLr00tpnZ00dqi7dm9cq6b1675zDWZjY6NWTRpp7eL52rJymQb36amkSd+u7X7fvkaVStq4bLE2LV+ixvXqSJJSp0qlvZvWKXnyDyI6XXLk0NZVy2RnZ6dMGdJr6vgx2rl+tbatXq5hA/vF8l4DEJs4rRLAv1a6RHF1691PdnZ2mj99ilxyfKTh4yfo1u07+mHEUNWvVUMbtmzTD8OHyH3UWB05/osKff6ZRroNVINW7fT02TM9efpMPQa66b63tz7/9BNNGDFUFy9f0eVr1yRJqVOnVtKkSVWtYRMVLlhAIwcP0P6Dh/X8xYtILYkSJVT50qV078EDSZKNjY3GDfXQ/kOHNWj4KDk7OWrSmJG6fe+ejv5yQhcvX1GB/J9oz4GD+vzTT/Tw0SPlz5dXh4//os8//UTLVq+VJL1+/VpDRo/TjVu39VG2bPIcM0JXrl3X/kOHIz735/k/UcPW7RQeFq4alSuqeNEiat7RVa9fv9ZIt0GRGr937ahWrt/qzr17SpM6tZJ/8IEAAADiu79bm0lSiWJF1dd9qIaN/UEdWrVQz66d1bZrd3mMHqvPPvk40mmV6dOmlRR5DValQjlVLl9Orj376MmTp3Lr00s9u7jKY/TYiIaC+fOrXsvWypg+vSaPHaWr12/o+MlTOnn6jL4p+bXWbNwsSapYrox27t2n0NBQtW/ZQsdOnJRrzz5ysLdXHpdcsbznAMQmjhwD8K+tXLtB/k+fytfPT6fPndP5S5d05dp1vQkO1r6Dh5Qrx0eq+E0ZHTp6XIePHVd4eLiOnTyli1eu6svChSRJh44e031vb0nSqTNndfTESeX/JF/E5wgJCdHchYsVGhqqw8eOK/DVa2XJnCni9cb16mjHulXavWGt8n+cTx6j3i6A8rrkUsqUKTR30RKFhITogfdDrd+yTeVKlYz4XJ9/+qnsbG2V48PsWrF2vT7P/4kSODgoj0sunTp7VpJ08vQZXb95S+Hh4bp286a2796rz/N/Emk/zF6wSK9fBynozRuVLfm1lq9ZKx/fxwp4/kLzly6PtG1YeLg+yp5VCRMkkJ+/v27evh3N/1UAAAAszz+tzc6cO6/Dx44rLCxMW3fsUo4PP/zH9/zjGqxCmdJatnqNHng/1KvXrzV1jpfKlS4Z6ZTLOQvfbn/95i1t/mmHypUuJUnasmOnKpYtI0mytbVV+dKltHXHLklSSGiI0qV1lmOaNHoTHKzT585H634BYFk4cgzAv+b/9GnEx0FBb+T/JPLjJIkTK11aZ5UpWUJfFSsS8Zq9nb1O/HpaklSs0Bdq07yJMmfMJFtbGyVKmFDXb96K2DYgIEChYWF/eN8gJUmcOOLxkpWrNcNrvtI6O+nHkcOUJXMmXbt5M2IRs2PdqohtbW1tdfrsOUlvh2Pfdmwvl5w5dP3mLR07cUoDenbXx3nz6N6DBwoIeC5JypfbRZ3bttaH2bLKwcFBDg4O2r3vQKT98MjHN+JjxzRp9Mj3ccTjh498Ij5+/TpIA4eNVJN6ddS/R3edOX9BntNncgMBAAAQ7/3d2uyhj4/8/J9EPB8UFKRECRPKztY20jrw//vjGswpTRp5/2Hd9fDRI9nb2yt1qlT/2973f9t7P/LRR9mzSZL2Hzys3t92Vfp0aZU1cya9eBmoC5evSJImz5yjDq2aa+7kiXr+4oWWrFqtTdu2//cdAcCiMRwDECMe+T7Wtp27NPKHiX96zcHBQSPdBspj9DjtP3RYoaGhGu0xWDY2Nv/+8/j4asKU6RrUu6cOHjmqRz6P5e39UPVatnnn9mfOX1CWTBlV8qviOnXmrG7duaO0zk76snAhnTp9NmI7j/59tGr9RnXvN1BvgoP1XacOSpkiReQ3Cw+P+NDP319pnRwjHqdzdoq06dFfTujoLyeUMEECdWjVQv2+/04du/f8139fAACAuOTv1mZ/vKbru4T/Ya31/16I+NDXz0/p0zpHPE7r7KyQkBD5P3kiZ6e367G0Tk4Rv5RM5+wkXz9/SdKb4GDt2ndAFcuWVbYsmbRt566I9/F/8iRiHZv/43zyHDNSv545q3sPvN/jbw0gruG0SgAxYtvOXfqqaBEV+aKgbG1tlcDBQQXyfyonR0c52NvLwcFBT589U2hoqIoV+kJFChb4z5/r2MlTeuznpxpVKunC5csKfPVKzRrUU8IECWRra6sPs2WNuE5EUFCQLl29prrVq+rk6TOSpLPnL6pW1co6deZ/w7EkiZMoIOC53gQHK69LLpX/hwvo79q3X/Vr1ZCTo6M+SJZMzRrWj3gtdcqUKvFlUSVKlFBvgoMV+OqVwv7mt6EAAADxxT+tzf6O/9MnypA+3d9us2PPPjWsXUvp06VV4kSJ1KlNS+3cuz/SkWetmzZWwoQJlT1rVlWpUF479+6LeG3rjp2qUuEbfVWsaMQplZJU5usScnJ8+4vPgOcvFB4errC/GtYBiPM4cgxAjPDxfazegz3k2q6NhvTvq7CwUF24fEVjJk5S4KtX+mHKdA0f1F8ODg76+fBRHTh8JEqfb/HKVerWsb3WbtqiHgMHq1uH9lqzaJ4cHBx05949zfBaELHtqTNnlSvHRxGHzZ86c0ZlS5bQr38Yjo31nKxuHdupR9fOOnXmrHbt2x9xF853Wb95qzJnyqhFM6fq5ctALV65WoUKfC5JsrG1VaM6teXWp5fCw8N19foNjZk4OUp/XwAAgLggLCzsH9dmf2X+0uXq0aWzurRrI6/FS7Vn/89/2mbjtp/kmCa1pv8wTgkSOOjoLyc1fvLUSNucPHNWq+bPla2tjZasXKVjJ05GvHbm/AWFh4Xr8tVreujzv9Mz87rk0nedOyhZ0qTyf/JEE6ZO1wPvh1HYEwAsmY2NswvjbwAAAABAvJI+bVqtXTxfxctX/ttrmE0eO0rbd+/Vhq3bYrEOgCXhtEoAAAAAgFXK45JLLjlzRDrVEoD14bRKAAAAAIDVGdS7h0oW/1ITpkxX4KtXpnMAGMRplQAAAIg2RQsVVPfOnWRra6sNW7dp4bIVkV7/tlN7FcyfX5KUKFFCpUqZUuVq1jWRCgAAIIkjxwAAABBNbG1t1bOrq7r16S8f38fymuKpA4eO6NadOxHbTJw2M+LjejWrK1eOj0ykAgAAROCaYwAAAIg
"text/plain": [
"<Figure size 1512x936 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"bestScore = 200.0\n",
"\n",
"maxTotalReward = -99999999999\n",
"\n",
"for ep in range(MAX_EP):\n",
" print(\"EP \", ep, \" START\")\n",
" # first time run game\n",
" s, _, _, _, _ = env.reset()\n",
" if ep == 0:\n",
" s = s.reshape([STATE_SIZE])\n",
" step = 0\n",
" done = False\n",
"\n",
" # save weight immediately?\n",
" saveNow = 0\n",
"\n",
" epTotalReward = 0\n",
" entropys = []\n",
"\n",
" while not done:\n",
" step += 1\n",
"\n",
" actions, predictResult = agent.chooseAction(s)\n",
" avrEntropy, _, _ = agent.getAverageEntropy(predictResult)\n",
" nextState, thisReward, done, _, saveNow = env.step(actions=actions)\n",
"\n",
" entropys.append(avrEntropy)\n",
" ppoBuffer.saveBuffers(\n",
" state=s, actorProb=predictResult, action=actions, reward=thisReward, done=done\n",
" )\n",
" epTotalReward += thisReward\n",
"\n",
" nextState = nextState.reshape([STATE_SIZE])\n",
" s = nextState\n",
"\n",
" if done:\n",
" print(\"EP OVER!\")\n",
" if saveNow != 0:\n",
" print(\"SAVENOW!\")\n",
" saveNow = 0\n",
" agent.saveWeights()\n",
" # update PPO after Batch step or GameOver\n",
" if (step + 1) % BATCH == 0 or done:\n",
" if TRAIN:\n",
" actorLosses, criticLosses = agent.trainCritcActor(\n",
" states=ppoBuffer.getStates(),\n",
" oldActorResult=ppoBuffer.getActorProbs(),\n",
" actions=ppoBuffer.getActions(),\n",
" rewards=ppoBuffer.getRewards(),\n",
" dones=ppoBuffer.getDones(),\n",
" nextState=nextState,\n",
" )\n",
" clear_output()\n",
" ppoBuffer.clearBuffer()\n",
" ppoHistory.saveHis(epTotalReward, np.mean(entropys), actorLosses, criticLosses)\n",
" ppoHistory.drawHis()\n",
" if epTotalReward > maxTotalReward and epTotalReward != 0:\n",
" maxTotalReward = epTotalReward\n",
" agent.saveWeights(epTotalReward)\n",
" print(\"New Record! Save NN\", epTotalReward)\n",
" epTotalReward = 0\n"
]
}
],
"metadata": {
"interpreter": {
"hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
},
"kernelspec": {
"display_name": "Python 3.9.7 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}