Aimbot-PPO/Aimbot-PPO-Python/Tensorflow/GAIL-Main.ipynb
Koha9 7497ffcb0f Parallel Environment Discrete PPO finish
Parallel Environment Discrete PPO finish. Runnable.
2022-10-30 04:13:14 +09:00

311 lines
14 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\requests\\__init__.py:102: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (5.0.0)/charset_normalizer (2.0.6) doesn't match a supported version!\n",
" warnings.warn(\"urllib3 ({}) or chardet ({})/charset_normalizer ({}) doesn't match a supported \"\n"
]
}
],
"source": [
"import numpy as np\n",
"import tensorflow as tf\n",
"import time\n",
"import datetime\n",
"import aimBotEnv\n",
"\n",
"from GAIL import GAIL\n",
"from GAILConfig import GAILConfig\n",
"from PPOConfig import PPOConfig\n",
"from GAILMem import GAILMem\n",
"from GAILHistory import GAILHistory\n",
"from IPython.display import clear_output\n",
"from tqdm.notebook import tqdm as tqdm"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Attempts to allocate only the GPU memory needed for allocation\n",
"physical_devices = tf.config.list_physical_devices(\"GPU\")\n",
"tf.config.experimental.set_memory_growth(physical_devices[0], True)\n",
"tf.random.set_seed(9331)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"√√√√√Enviroment Initialized Success√√√√√\n",
"√√√√√Buffer Initialized Success√√√√√\n",
"√√√√√Buffer Initialized Success√√√√√\n",
"---------thisPPO Params---------\n",
"self.stateSize = 93\n",
"self.disActShape = [3, 3, 2]\n",
"self.disActSize 3\n",
"self.disOutputSize 8\n",
"self.conActSize = 1\n",
"self.conActRange = 10\n",
"self.conOutputSize = 2\n",
"---------thisPPO config---------\n",
"self.NNShape = [512, 512, 256]\n",
"self.criticLR = 0.002\n",
"self.actorLR = 0.002\n",
"self.gamma = 0.99\n",
"self.lmbda = 0.95\n",
"self.clipRange = 0.2\n",
"self.entropyWeight = 0.005\n",
"self.trainEpochs = 5\n",
"self.saveDir = GAIL-Model/1027-2240/\n",
"self.loadModelDir = None\n",
"---------Actor Model Create Success---------\n",
"Model: \"model_1\"\n",
"__________________________________________________________________________________________________\n",
" Layer (type) Output Shape Param # Connected to \n",
"==================================================================================================\n",
" stateInput (InputLayer) [(None, 93)] 0 [] \n",
" \n",
" dense0 (Dense) (None, 512) 48128 ['stateInput[0][0]'] \n",
" \n",
" dense1 (Dense) (None, 512) 262656 ['dense0[0][0]'] \n",
" \n",
" dense2 (Dense) (None, 256) 131328 ['dense1[0][0]'] \n",
" \n",
" muOut (Dense) (None, 1) 257 ['dense2[0][0]'] \n",
" \n",
" sigmaOut (Dense) (None, 1) 257 ['dense2[0][0]'] \n",
" \n",
" disAct0 (Dense) (None, 3) 771 ['dense2[0][0]'] \n",
" \n",
" disAct1 (Dense) (None, 3) 771 ['dense2[0][0]'] \n",
" \n",
" disAct2 (Dense) (None, 2) 514 ['dense2[0][0]'] \n",
" \n",
" tf.math.multiply (TFOpLambda) (None, 1) 0 ['muOut[0][0]'] \n",
" \n",
" tf.math.add (TFOpLambda) (None, 1) 0 ['sigmaOut[0][0]'] \n",
" \n",
" totalOut (Concatenate) (None, 10) 0 ['disAct0[0][0]', \n",
" 'disAct1[0][0]', \n",
" 'disAct2[0][0]', \n",
" 'tf.math.multiply[0][0]', \n",
" 'tf.math.add[0][0]'] \n",
" \n",
"==================================================================================================\n",
"Total params: 444,682\n",
"Trainable params: 444,682\n",
"Non-trainable params: 0\n",
"__________________________________________________________________________________________________\n",
"---------Critic Model Create Success---------\n",
"Model: \"model\"\n",
"_________________________________________________________________\n",
" Layer (type) Output Shape Param # \n",
"=================================================================\n",
" stateInput (InputLayer) [(None, 93)] 0 \n",
" \n",
" dense0 (Dense) (None, 512) 48128 \n",
" \n",
" dense1 (Dense) (None, 512) 262656 \n",
" \n",
" dense2 (Dense) (None, 256) 131328 \n",
" \n",
" dense (Dense) (None, 1) 257 \n",
" \n",
"=================================================================\n",
"Total params: 442,369\n",
"Trainable params: 442,369\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n"
]
}
],
"source": [
"ENV_PATH = \"../Build-CloseEnemyCut/Aimbot-PPO\"\n",
"EXPERT_DIR = \"../GAIL-Expert-Data/1015-0148/pack-53518.npz\"\n",
"WORKER_ID = 1\n",
"BASE_PORT = 200\n",
"MAX_BUFFER_SIZE = 256\n",
"\n",
"MAX_EP = 1000000000\n",
"STACKSTATESSIZE = 3\n",
"STACKINTERCE = 29\n",
"\n",
"env = aimBotEnv.makeEnv(\n",
" envPath=ENV_PATH,\n",
" workerID=WORKER_ID,\n",
" basePort=BASE_PORT,\n",
" stackSize=STACKSTATESSIZE,\n",
" stackIntercal=STACKINTERCE,\n",
")\n",
"\n",
"STATE_SIZE = env.STATE_SIZE\n",
"DISACT_SHAPE = env.DISCRETE_SHAPE\n",
"CONACT_SIZE = env.CONTINUOUS_SIZE\n",
"CONACT_RANGE = 10\n",
"\n",
"ppoConf = PPOConfig(\n",
" NNShape=[512, 512, 256],\n",
" actorLR=2e-3,\n",
" criticLR=2e-3,\n",
" gamma=0.99,\n",
" lmbda=0.95,\n",
" clipRange=0.20,\n",
" entropyWeight=5e-3,\n",
" trainEpochs=5,\n",
" saveDir=\"GAIL-Model/\" + datetime.datetime.now().strftime(\"%m%d-%H%M\") + \"/\",\n",
" loadModelDir=None,\n",
")\n",
"gailConf = GAILConfig(\n",
" discrimNNShape=[256, 128],\n",
" discrimLR=1e-4,\n",
" discrimTrainEpochs=5,\n",
" discrimSaveDir=\"GAIL-Model/\" + datetime.datetime.now().strftime(\"%m%d-%H%M\") + \"/\",\n",
" ppoConfig=ppoConf\n",
")\n",
"\n",
"agentMem = GAILMem()\n",
"expertMem = GAILMem()\n",
"expertMem.loadMemFile(EXPERT_DIR)\n",
"gailHis = GAILHistory()\n",
"gail = GAIL(\n",
" stateSize=STATE_SIZE,\n",
" disActShape=DISACT_SHAPE,\n",
" conActSize=CONACT_SIZE,\n",
" conActRange=CONACT_RANGE,\n",
" gailConfig=gailConf,\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d1f8428c32f04f8da4a43a905fd9481c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/256 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"ename": "ValueError",
"evalue": "not enough values to unpack (expected 5, got 2)",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_2500\\815726880.py\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 33\u001b[0m \u001b[0mdiscrimRewards\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgail\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minference\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0magentStates\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentActions\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m10.0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 34\u001b[0m \u001b[1;31m# train agentPPO\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 35\u001b[1;33m actorLosses, criticLosses, averageEntropy, discreteEntropys, continuousEntropys = gail.trainPPO(\n\u001b[0m\u001b[0;32m 36\u001b[0m \u001b[0magentStates\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentActorProbs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentActions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdiscrimRewards\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentDones\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 37\u001b[0m )\n",
"\u001b[1;31mValueError\u001b[0m: not enough values to unpack (expected 5, got 2)"
]
}
],
"source": [
"bestReward = 0\n",
"for ep in range(MAX_EP):\n",
" # get sample\n",
" state, _, _, _, _ = env.reset()\n",
" totalRewards = []\n",
" totalReward = 0\n",
" saveNow = 0\n",
" for step in tqdm(range(MAX_BUFFER_SIZE)):\n",
" actions, predictResult = gail.getActions(state)\n",
" nextState, reward, done, _, saveNow = env.step(actions)\n",
" agentMem.saveMems(\n",
" state=state, actorProb=predictResult, action=actions, reward=reward, done=done\n",
" )\n",
" state = nextState\n",
" totalReward += reward\n",
" if done:\n",
" totalRewards.append(totalReward)\n",
" totalReward = 0\n",
" state, _, _, _, _ = env.reset()\n",
" # add reward to history\n",
" totalRewards.append(totalReward)\n",
" # get all memory data\n",
" demoStates, _, demoActions, _, _ = expertMem.getRandomSample(MAX_BUFFER_SIZE)\n",
" agentStates = agentMem.getStates()\n",
" agentActions = agentMem.getActions()\n",
" agentActorProbs = agentMem.getActorProbs()\n",
" agentDones = agentMem.getDones()\n",
" # train discriminatorQ\n",
" discrimLosses, demoAcc, agentAcc = gail.trainDiscriminator(\n",
" demoStates, demoActions, agentStates, agentActions\n",
" )\n",
" # get disriminator predict rewards\n",
" discrimRewards = gail.inference(agentStates, agentActions) * 10.0\n",
" # train agentPPO\n",
" actorLosses, criticLosses, averageEntropy = gail.trainPPO(\n",
" agentStates, agentActorProbs, agentActions, discrimRewards, agentDones, nextState\n",
" )\n",
" gailHis.saveHis(\n",
" np.mean(totalRewards),\n",
" discrimLosses,\n",
" actorLosses,\n",
" criticLosses,\n",
" demoAcc,\n",
" agentAcc,\n",
" averageEntropy,\n",
" discrimRewards,\n",
" )\n",
" clear_output()\n",
" gailHis.drawHis()\n",
" # got best reward?\n",
" if np.mean(totalRewards) >= bestReward:\n",
" bestReward = np.mean(totalRewards)\n",
" gail.saveWeights(np.mean(totalRewards))\n",
" agentMem.clearMem()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.7 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}