2022-10-14 16:08:08 +00:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
2022-10-29 19:13:14 +00:00
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\requests\\__init__.py:102: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (5.0.0)/charset_normalizer (2.0.6) doesn't match a supported version!\n",
" warnings.warn(\"urllib3 ({}) or chardet ({})/charset_normalizer ({}) doesn't match a supported \"\n"
]
}
],
2022-10-14 16:08:08 +00:00
"source": [
"import numpy as np\n",
"import tensorflow as tf\n",
"import time\n",
"import datetime\n",
"import aimBotEnv\n",
"\n",
"from GAIL import GAIL\n",
"from GAILConfig import GAILConfig\n",
"from PPOConfig import PPOConfig\n",
"from GAILMem import GAILMem\n",
"from GAILHistory import GAILHistory\n",
"from IPython.display import clear_output\n",
"from tqdm.notebook import tqdm as tqdm"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Attempts to allocate only the GPU memory needed for allocation\n",
"physical_devices = tf.config.list_physical_devices(\"GPU\")\n",
"tf.config.experimental.set_memory_growth(physical_devices[0], True)\n",
"tf.random.set_seed(9331)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"√√√√√Enviroment Initialized Success√√√√√\n",
"√√√√√Buffer Initialized Success√√√√√\n",
"√√√√√Buffer Initialized Success√√√√√\n",
"---------thisPPO Params---------\n",
2022-10-23 14:38:07 +00:00
"self.stateSize = 93\n",
2022-10-14 16:08:08 +00:00
"self.disActShape = [3, 3, 2]\n",
"self.disActSize 3\n",
"self.disOutputSize 8\n",
"self.conActSize = 1\n",
"self.conActRange = 10\n",
"self.conOutputSize = 2\n",
"---------thisPPO config---------\n",
2022-10-23 14:38:07 +00:00
"self.NNShape = [512, 512, 256]\n",
2022-10-14 16:08:08 +00:00
"self.criticLR = 0.002\n",
"self.actorLR = 0.002\n",
"self.gamma = 0.99\n",
"self.lmbda = 0.95\n",
"self.clipRange = 0.2\n",
2022-10-23 14:38:07 +00:00
"self.entropyWeight = 0.005\n",
"self.trainEpochs = 5\n",
2022-10-29 19:13:14 +00:00
"self.saveDir = GAIL-Model/1027-2240/\n",
2022-10-14 16:08:08 +00:00
"self.loadModelDir = None\n",
"---------Actor Model Create Success---------\n",
"Model: \"model_1\"\n",
"__________________________________________________________________________________________________\n",
" Layer (type) Output Shape Param # Connected to \n",
"==================================================================================================\n",
2022-10-23 14:38:07 +00:00
" stateInput (InputLayer) [(None, 93)] 0 [] \n",
2022-10-14 16:08:08 +00:00
" \n",
2022-10-23 14:38:07 +00:00
" dense0 (Dense) (None, 512) 48128 ['stateInput[0][0]'] \n",
2022-10-14 16:08:08 +00:00
" \n",
2022-10-23 14:38:07 +00:00
" dense1 (Dense) (None, 512) 262656 ['dense0[0][0]'] \n",
2022-10-14 16:08:08 +00:00
" \n",
2022-10-23 14:38:07 +00:00
" dense2 (Dense) (None, 256) 131328 ['dense1[0][0]'] \n",
2022-10-14 16:08:08 +00:00
" \n",
2022-10-23 14:38:07 +00:00
" muOut (Dense) (None, 1) 257 ['dense2[0][0]'] \n",
2022-10-14 16:08:08 +00:00
" \n",
2022-10-23 14:38:07 +00:00
" sigmaOut (Dense) (None, 1) 257 ['dense2[0][0]'] \n",
2022-10-14 16:08:08 +00:00
" \n",
2022-10-23 14:38:07 +00:00
" disAct0 (Dense) (None, 3) 771 ['dense2[0][0]'] \n",
2022-10-14 16:08:08 +00:00
" \n",
2022-10-23 14:38:07 +00:00
" disAct1 (Dense) (None, 3) 771 ['dense2[0][0]'] \n",
2022-10-14 16:08:08 +00:00
" \n",
2022-10-23 14:38:07 +00:00
" disAct2 (Dense) (None, 2) 514 ['dense2[0][0]'] \n",
2022-10-14 16:08:08 +00:00
" \n",
" tf.math.multiply (TFOpLambda) (None, 1) 0 ['muOut[0][0]'] \n",
" \n",
" tf.math.add (TFOpLambda) (None, 1) 0 ['sigmaOut[0][0]'] \n",
" \n",
" totalOut (Concatenate) (None, 10) 0 ['disAct0[0][0]', \n",
" 'disAct1[0][0]', \n",
" 'disAct2[0][0]', \n",
" 'tf.math.multiply[0][0]', \n",
" 'tf.math.add[0][0]'] \n",
" \n",
"==================================================================================================\n",
2022-10-23 14:38:07 +00:00
"Total params: 444,682\n",
"Trainable params: 444,682\n",
2022-10-14 16:08:08 +00:00
"Non-trainable params: 0\n",
"__________________________________________________________________________________________________\n",
"---------Critic Model Create Success---------\n",
"Model: \"model\"\n",
"_________________________________________________________________\n",
" Layer (type) Output Shape Param # \n",
"=================================================================\n",
2022-10-23 14:38:07 +00:00
" stateInput (InputLayer) [(None, 93)] 0 \n",
2022-10-14 16:08:08 +00:00
" \n",
2022-10-23 14:38:07 +00:00
" dense0 (Dense) (None, 512) 48128 \n",
2022-10-14 16:08:08 +00:00
" \n",
2022-10-23 14:38:07 +00:00
" dense1 (Dense) (None, 512) 262656 \n",
2022-10-14 16:08:08 +00:00
" \n",
2022-10-23 14:38:07 +00:00
" dense2 (Dense) (None, 256) 131328 \n",
2022-10-14 16:08:08 +00:00
" \n",
2022-10-23 14:38:07 +00:00
" dense (Dense) (None, 1) 257 \n",
2022-10-14 16:08:08 +00:00
" \n",
"=================================================================\n",
2022-10-23 14:38:07 +00:00
"Total params: 442,369\n",
"Trainable params: 442,369\n",
2022-10-14 16:08:08 +00:00
"Non-trainable params: 0\n",
"_________________________________________________________________\n"
]
}
],
"source": [
2022-10-29 19:13:14 +00:00
"ENV_PATH = \"../Build-CloseEnemyCut/Aimbot-PPO\"\n",
"EXPERT_DIR = \"../GAIL-Expert-Data/1015-0148/pack-53518.npz\"\n",
2022-10-14 16:08:08 +00:00
"WORKER_ID = 1\n",
"BASE_PORT = 200\n",
2022-10-23 14:38:07 +00:00
"MAX_BUFFER_SIZE = 256\n",
2022-10-14 16:08:08 +00:00
"\n",
"MAX_EP = 1000000000\n",
"STACKSTATESSIZE = 3\n",
"STACKINTERCE = 29\n",
"\n",
"env = aimBotEnv.makeEnv(\n",
" envPath=ENV_PATH,\n",
" workerID=WORKER_ID,\n",
" basePort=BASE_PORT,\n",
" stackSize=STACKSTATESSIZE,\n",
" stackIntercal=STACKINTERCE,\n",
")\n",
"\n",
"STATE_SIZE = env.STATE_SIZE\n",
"DISACT_SHAPE = env.DISCRETE_SHAPE\n",
"CONACT_SIZE = env.CONTINUOUS_SIZE\n",
"CONACT_RANGE = 10\n",
"\n",
"ppoConf = PPOConfig(\n",
2022-10-23 14:38:07 +00:00
" NNShape=[512, 512, 256],\n",
2022-10-14 16:08:08 +00:00
" actorLR=2e-3,\n",
" criticLR=2e-3,\n",
" gamma=0.99,\n",
" lmbda=0.95,\n",
" clipRange=0.20,\n",
2022-10-23 14:38:07 +00:00
" entropyWeight=5e-3,\n",
" trainEpochs=5,\n",
2022-10-14 16:08:08 +00:00
" saveDir=\"GAIL-Model/\" + datetime.datetime.now().strftime(\"%m%d-%H%M\") + \"/\",\n",
" loadModelDir=None,\n",
")\n",
"gailConf = GAILConfig(\n",
" discrimNNShape=[256, 128],\n",
2022-10-23 14:38:07 +00:00
" discrimLR=1e-4,\n",
" discrimTrainEpochs=5,\n",
2022-10-14 16:08:08 +00:00
" discrimSaveDir=\"GAIL-Model/\" + datetime.datetime.now().strftime(\"%m%d-%H%M\") + \"/\",\n",
" ppoConfig=ppoConf\n",
")\n",
"\n",
"agentMem = GAILMem()\n",
"expertMem = GAILMem()\n",
"expertMem.loadMemFile(EXPERT_DIR)\n",
"gailHis = GAILHistory()\n",
"gail = GAIL(\n",
" stateSize=STATE_SIZE,\n",
" disActShape=DISACT_SHAPE,\n",
" conActSize=CONACT_SIZE,\n",
" conActRange=CONACT_RANGE,\n",
" gailConfig=gailConf,\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
2022-10-29 19:13:14 +00:00
"model_id": "d1f8428c32f04f8da4a43a905fd9481c",
2022-10-14 16:08:08 +00:00
"version_major": 2,
"version_minor": 0
},
"text/plain": [
2022-10-23 14:38:07 +00:00
" 0%| | 0/256 [00:00<?, ?it/s]"
2022-10-14 16:08:08 +00:00
]
},
"metadata": {},
"output_type": "display_data"
2022-10-29 19:13:14 +00:00
},
{
"ename": "ValueError",
"evalue": "not enough values to unpack (expected 5, got 2)",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_2500\\815726880.py\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 33\u001b[0m \u001b[0mdiscrimRewards\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgail\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minference\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0magentStates\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentActions\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m10.0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 34\u001b[0m \u001b[1;31m# train agentPPO\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 35\u001b[1;33m actorLosses, criticLosses, averageEntropy, discreteEntropys, continuousEntropys = gail.trainPPO(\n\u001b[0m\u001b[0;32m 36\u001b[0m \u001b[0magentStates\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentActorProbs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentActions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdiscrimRewards\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentDones\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 37\u001b[0m )\n",
"\u001b[1;31mValueError\u001b[0m: not enough values to unpack (expected 5, got 2)"
]
2022-10-14 16:08:08 +00:00
}
],
"source": [
"bestReward = 0\n",
"for ep in range(MAX_EP):\n",
" # get sample\n",
" state, _, _, _, _ = env.reset()\n",
" totalRewards = []\n",
" totalReward = 0\n",
" saveNow = 0\n",
" for step in tqdm(range(MAX_BUFFER_SIZE)):\n",
" actions, predictResult = gail.getActions(state)\n",
" nextState, reward, done, _, saveNow = env.step(actions)\n",
" agentMem.saveMems(\n",
" state=state, actorProb=predictResult, action=actions, reward=reward, done=done\n",
" )\n",
" state = nextState\n",
" totalReward += reward\n",
" if done:\n",
" totalRewards.append(totalReward)\n",
" totalReward = 0\n",
" state, _, _, _, _ = env.reset()\n",
" # add reward to history\n",
" totalRewards.append(totalReward)\n",
" # get all memory data\n",
" demoStates, _, demoActions, _, _ = expertMem.getRandomSample(MAX_BUFFER_SIZE)\n",
" agentStates = agentMem.getStates()\n",
" agentActions = agentMem.getActions()\n",
" agentActorProbs = agentMem.getActorProbs()\n",
" agentDones = agentMem.getDones()\n",
2022-10-23 14:38:07 +00:00
" # train discriminatorQ\n",
2022-10-14 16:08:08 +00:00
" discrimLosses, demoAcc, agentAcc = gail.trainDiscriminator(\n",
" demoStates, demoActions, agentStates, agentActions\n",
" )\n",
" # get disriminator predict rewards\n",
2022-10-23 14:38:07 +00:00
" discrimRewards = gail.inference(agentStates, agentActions) * 10.0\n",
2022-10-14 16:08:08 +00:00
" # train agentPPO\n",
2022-10-29 19:13:14 +00:00
" actorLosses, criticLosses, averageEntropy = gail.trainPPO(\n",
2022-10-14 16:08:08 +00:00
" agentStates, agentActorProbs, agentActions, discrimRewards, agentDones, nextState\n",
" )\n",
" gailHis.saveHis(\n",
2022-10-23 14:38:07 +00:00
" np.mean(totalRewards),\n",
" discrimLosses,\n",
" actorLosses,\n",
" criticLosses,\n",
" demoAcc,\n",
" agentAcc,\n",
" averageEntropy,\n",
" discrimRewards,\n",
2022-10-14 16:08:08 +00:00
" )\n",
" clear_output()\n",
" gailHis.drawHis()\n",
" # got best reward?\n",
2022-10-23 14:38:07 +00:00
" if np.mean(totalRewards) >= bestReward:\n",
2022-10-14 16:08:08 +00:00
" bestReward = np.mean(totalRewards)\n",
" gail.saveWeights(np.mean(totalRewards))\n",
" agentMem.clearMem()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.7 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}