{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import aimBotEnv\n", "import PPO\n", "import buffer\n", "import numpy as np\n", "\n", "import tensorflow as tf\n", "import time\n", "import datetime" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Attempts to allocate only the GPU memory needed for allocation\n", "physical_devices = tf.config.list_physical_devices('GPU')\n", "tf.config.experimental.set_memory_growth(physical_devices[0], True)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "ENV_PATH = './Build-MultiScene-WithLoad/Aimbot-PPO'\n", "WORKER_ID = 1\n", "BASE_PORT = 200\n", "\n", "MAX_EP = 1000\n", "EP_LENGTH = 100000\n", "GAMMA = 0.99 # discount future reward (UP?)\n", "EPSILON = 0.2 # clip Ratio range[1-EPSILON,1+EPSILON]\n", "ACTOR_LR = 1e-5 # LR\n", "CRITIC_LR = 2e-5 # LR\n", "BATCH = 512 # learning step\n", "ACTOR_EPOCH = 15 # epoch\n", "CRITIC_EPOCH = 15 # epoch\n", "ENTROPY_WHEIGHT = 0.01 # sigma's entropy in Actor loss\n", "ACTION_INTERVAL = 1 # take action every ACTION_INTERVAL steps\n", "\n", "\n", "TRAIN = True\n", "SAVE_DIR = \"PPO-Model/\"+datetime.datetime.now().strftime(\"%m%d%H%M\")+\"/\"\n", "LOAD_DIR = None\n", "\n", "CTN_ACTION_RANGE = 10" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "√√√√√Enviroment Initialized Success√√√√√\n", "√√√√√Buffer Initialized Success√√√√√\n", "No loadDir specified,Create a New Model\n", "CONTINUOUS_SIZE 1\n", "DISCRETE_SIZE 5\n", "STATE_SIZE 29\n" ] } ], "source": [ "# initialize enviroment & buffer class\n", "env = aimBotEnv.makeEnv(envPath = ENV_PATH,\n", " workerID = WORKER_ID,\n", " basePort = BASE_PORT)\n", "epBuffer = buffer.buffer()\n", "\n", "STATE_SIZE = env.STATE_SIZE\n", "CONTINUOUS_SIZE = env.CONTINUOUS_SIZE\n", "DISCRETE_SIZE = env.DISCRETE_SIZE\n", "_,_,_,loadDir,_ = env.getSteps()\n", "\n", "# check load model or not\n", "if(np.any(loadDir == 0)):\n", " # create a new model\n", " print(\"No loadDir specified,Create a New Model\")\n", " LOAD_DIR = None\n", "else:\n", " # load model\n", " loadDirDateSTR = str(int(loadDir[0]))\n", " loadDirTimeSTR = str(int(loadDir[1]))\n", " if len(loadDirDateSTR)!=8:\n", " # fill lost 0 while converse float to string\n", " for _ in range(8 - len(loadDirDateSTR)):\n", " loadDirDateSTR = \"0\" + loadDirDateSTR\n", " if len(loadDirTimeSTR)!=6:\n", " # fill lost 0 while converse float to string\n", " for _ in range(6 - len(loadDirTimeSTR)):\n", " loadDirTimeSTR = \"0\" + loadDirTimeSTR\n", " LOAD_DIR = \"PPO-Model/\"+loadDirDateSTR+\"/\"+loadDirTimeSTR\n", " print(\"Load Model:\")\n", " print(LOAD_DIR)\n", "\n", "print(\"CONTINUOUS_SIZE\",CONTINUOUS_SIZE)\n", "print(\"DISCRETE_SIZE\",DISCRETE_SIZE)\n", "print(\"STATE_SIZE\",STATE_SIZE)\n", "\n", "disActShape = [3,3,2]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def actToKey(disAct1,disAct2,disAct3,conAct):\n", " kW = 0\n", " kS = 0\n", " kA = 0\n", " kD = 0\n", " mouseShoot = 0\n", " if disAct1 == 0:\n", " kW = 0\n", " kS = 1\n", " elif disAct1 == 1:\n", " kW = 0\n", " kS = 0\n", " elif disAct1 == 2:\n", " kW = 1\n", " kS = 0\n", " if disAct2 == 0:\n", " kA = 0\n", " kD = 1\n", " elif disAct2 == 1:\n", " kA = 0\n", " kD = 0\n", " elif disAct2 == 2:\n", " kA = 1\n", " kD = 0\n", " mouseShoot = disAct3\n", " return kW,kS,kA,kD,mouseShoot,conAct" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "EP 0 START\n", "√√√√√Buffer Initialized Success√√√√√\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\core\\fromnumeric.py:3474: RuntimeWarning: Mean of empty slice.\n", " return _methods._mean(a, axis=axis, dtype=dtype,\n", "c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\core\\_methods.py:189: RuntimeWarning: invalid value encountered in double_scalars\n", " ret = ret.dtype.type(ret / rcount)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "SAVENOW!\n", "Model's Weights Saved\n", "A_Loss: 9210259745450.666 C_Loss: 7842064320569890.0\n", "SAVENOW!\n", "Model's Weights Saved\n", "EP OVER!\n", "A_Loss: 4103941.316666667 C_Loss: 410607418692949.3\n" ] }, { "ename": "FileNotFoundError", "evalue": "[Errno 2] No such file or directory: 'PPO-Model/09052116/211645/-53'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_15440/420232317.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 86\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mepTotalReward\u001b[0m \u001b[1;33m>\u001b[0m \u001b[0mmaxTotalReward\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mepTotalReward\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 87\u001b[0m \u001b[0mmaxTotalReward\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mepTotalReward\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 88\u001b[1;33m \u001b[0magent\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msaveWeights\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mepTotalReward\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 89\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"New Record! Save NN\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mepTotalReward\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 90\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mc:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\PPO.py\u001b[0m in \u001b[0;36msaveWeights\u001b[1;34m(self, score)\u001b[0m\n\u001b[0;32m 403\u001b[0m \u001b[1;31m# create an empty file named as score to recored score\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 404\u001b[0m \u001b[0mscore_dir\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msaveDir\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnow\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstrftime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"%H%M%S\"\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m\"/\"\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mscore\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 405\u001b[1;33m \u001b[0mscorefile\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mscore_dir\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'w'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 406\u001b[0m \u001b[0mscorefile\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 407\u001b[0m \u001b[0mactor_save_dir\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msaveDir\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnow\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstrftime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"%H%M%S\"\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m\"/actor/\"\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m\"actor.ckpt\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'PPO-Model/09052116/211645/-53'" ] } ], "source": [ "bestScore = 200.\n", "stopTrainCounter = 0\n", "\n", "totalRewardHis = []\n", "totalActorLossHis = []\n", "totalCriticLossHis = []\n", "epHis = []\n", "maxTotalReward = -99999999999\n", "\n", "for ep in range(MAX_EP):\n", " print(\"EP \",ep,\" START\")\n", " # first time run game\n", " s,_,_,_,_ = env.reset()\n", " if (ep == 0):\n", " epBuffer = buffer.buffer()\n", " s = s.reshape([STATE_SIZE])\n", " agent = PPO.PPO(stateSize=STATE_SIZE,\n", " disActShape=disActShape,\n", " conActSize=1,\n", " conActRange=CTN_ACTION_RANGE,\n", " criticLR=CRITIC_LR,\n", " actorLR=ACTOR_LR,\n", " gamma=GAMMA,\n", " epsilon=EPSILON,\n", " entropyWeight=ENTROPY_WHEIGHT,\n", " saveDir=SAVE_DIR,\n", " loadModelDir=LOAD_DIR)\n", " step = 0\n", " done = False\n", " stopTrainCounter -= 1\n", " epHis.append(ep)\n", " \n", " # reset total reward\n", " epTotalReward = 0\n", " \n", " # Recorder list\n", " epStepHis = []\n", " epRewardHis = []\n", " epActorLossHis = []\n", " epCriticLossHis = []\n", " \n", " # save weight immediately?\n", " saveNow = 0;\n", "\n", " while not done:\n", " step += 1\n", " if step % ACTION_INTERVAL == 0: # take action every ACTION_INTERVAL steps\n", " epStepHis.append(step)\n", " disAct1,disAct2,disAct3,conAct,predictResult = agent.chooseAction(s)\n", " kW, kS, kA, kD, mouseShoot, mouseMove = actToKey(disAct1,disAct2,disAct3,conAct)\n", " \n", " nextState,thisReward,done,_,saveNow = env.step(discreteActions=np.array([[kW, kS, kA, kD, mouseShoot]]),continuousActions=np.array([[mouseMove]]))\n", "\n", " epTotalReward += thisReward\n", " epBuffer.saveBuffers(s,[disAct1,disAct2,disAct3,conAct],thisReward)\n", " else:\n", " disActs = np.array([[0,0,0,0,0]])\n", " conActs = np.array([[0]])\n", "\n", " nextState,thisReward,done,_,saveNow = env.step(discreteActions=disActs,continuousActions=conActs)\n", " epTotalReward += thisReward\n", " nextState = nextState.reshape([STATE_SIZE])\n", " s = nextState\n", " \n", " if done:\n", " print(\"EP OVER!\")\n", " if saveNow != 0:\n", " print(\"SAVENOW!\")\n", " saveNow = 0\n", " agent.saveWeights()\n", " # update PPO after Batch step or GameOver\n", " if (step+1)%BATCH == 0 or done:\n", " bs = epBuffer.getStates()\n", " ba = epBuffer.getActions()\n", " br = epBuffer.getRewards()\n", " epBuffer.clearBuffer()\n", " if TRAIN:\n", " epActorLoss,epCriticLoss = agent.trainCritcActor(bs,ba,br,s,CRITIC_EPOCH,ACTOR_EPOCH)\n", " epActorLossHis.append(epActorLoss)\n", " epCriticLossHis.append(epCriticLoss)\n", " # update History Recorder\n", " totalActorLossHis.append(np.mean(epActorLossHis))\n", " totalCriticLossHis.append(np.mean(epCriticLossHis))\n", " totalRewardHis.append(epTotalReward)\n", " \n", " if (epTotalReward > maxTotalReward and epTotalReward != 0):\n", " maxTotalReward = epTotalReward\n", " agent.saveWeights(epTotalReward)\n", " print(\"New Record! Save NN\",epTotalReward)\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "aaa = 0\n", "aaa = 1\n", "aaa = 2\n", "aaa = 3\n", "aaa = 4\n", "aaa = 5\n", "aaa = 6\n", "aaa = 7\n", "aaa = 8\n", "aaa = 9\n" ] } ], "source": [ "aaa = 0\n", "while aaa<10:\n", " print(\"aaa = \",aaa)\n", " aaa+=1" ] } ], "metadata": { "interpreter": { "hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32" }, "kernelspec": { "display_name": "Python 3.9.7 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }