Aimbot-PPO/Aimbot-PPO-Python/Tensorflow/PPO-mian.ipynb
Koha9 742529ccd7 Archive all tensorflow agents and env
archive all TF py&ipynb
turn face to pytorch.
2022-10-26 03:15:37 +09:00

324 lines
82 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import aimBotEnv\n",
"import PPO\n",
"import numpy as np\n",
"\n",
"import tensorflow as tf\n",
"import time\n",
"import datetime\n",
"\n",
"from PPO import PPO\n",
"from PPOBuffer import PPOBuffer\n",
"from PPOConfig import PPOConfig\n",
"from PPOHistoryRecorder import PPOHistory\n",
"from IPython.display import clear_output"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Attempts to allocate only the GPU memory needed for allocation\n",
"physical_devices = tf.config.list_physical_devices(\"GPU\")\n",
"tf.config.experimental.set_memory_growth(physical_devices[0], True)\n",
"tf.random.set_seed(9331)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Env\n",
"ENV_PATH = \"./Build-CloseEnemyCut/Aimbot-PPO\"\n",
"WORKER_ID = 1\n",
"BASE_PORT = 200\n",
"\n",
"MAX_EP = 1000\n",
"EP_LENGTH = 100000\n",
"BATCH = 256 # learning step\n",
"ACTION_INTERVAL = 1 # take action every ACTION_INTERVAL steps\n",
"\n",
"\n",
"TRAIN = True\n",
"SAVE_DIR = \"PPO-Model/\" + datetime.datetime.now().strftime(\"%m%d%H%M\") + \"/\"\n",
"LOAD_DIR = None\n",
"\n",
"CTN_ACTION_RANGE = 10\n",
"\n",
"ppoConfig = PPOConfig(\n",
" NNShape=[512, 512],\n",
" actorLR=2e-3,\n",
" criticLR=2e-3,\n",
" gamma=0.99,\n",
" lmbda=0.95,\n",
" clipRange=0.20,\n",
" entropyWeight=1e-2,\n",
" trainEpochs=5,\n",
" saveDir=\"PPO-Model/\" + datetime.datetime.now().strftime(\"%m%d-%H%M\") + \"/\",\n",
" loadModelDir=None,\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"√√√√√Enviroment Initialized Success√√√√√\n",
"√√√√√Buffer Initialized Success√√√√√\n",
"---------thisPPO Params---------\n",
"self.stateSize = 31\n",
"self.disActShape = [3, 3, 2]\n",
"self.disActSize 3\n",
"self.disOutputSize 8\n",
"self.conActSize = 1\n",
"self.conActRange = 10\n",
"self.conOutputSize = 2\n",
"---------thisPPO config---------\n",
"self.NNShape = [512, 512, 256]\n",
"self.criticLR = 0.002\n",
"self.actorLR = 0.002\n",
"self.gamma = 0.99\n",
"self.lmbda = 0.95\n",
"self.clipRange = 0.2\n",
"self.entropyWeight = 0.01\n",
"self.trainEpochs = 5\n",
"self.saveDir = GAIL-Model/1023-2324/\n",
"self.loadModelDir = None\n",
"---------Actor Model Create Success---------\n",
"Model: \"model_1\"\n",
"__________________________________________________________________________________________________\n",
" Layer (type) Output Shape Param # Connected to \n",
"==================================================================================================\n",
" stateInput (InputLayer) [(None, 31)] 0 [] \n",
" \n",
" dense0 (Dense) (None, 512) 16384 ['stateInput[0][0]'] \n",
" \n",
" dense1 (Dense) (None, 512) 262656 ['dense0[0][0]'] \n",
" \n",
" dense2 (Dense) (None, 256) 131328 ['dense1[0][0]'] \n",
" \n",
" muOut (Dense) (None, 1) 257 ['dense2[0][0]'] \n",
" \n",
" sigmaOut (Dense) (None, 1) 257 ['dense2[0][0]'] \n",
" \n",
" disAct0 (Dense) (None, 3) 771 ['dense2[0][0]'] \n",
" \n",
" disAct1 (Dense) (None, 3) 771 ['dense2[0][0]'] \n",
" \n",
" disAct2 (Dense) (None, 2) 514 ['dense2[0][0]'] \n",
" \n",
" tf.math.multiply (TFOpLambda) (None, 1) 0 ['muOut[0][0]'] \n",
" \n",
" tf.math.add (TFOpLambda) (None, 1) 0 ['sigmaOut[0][0]'] \n",
" \n",
" totalOut (Concatenate) (None, 10) 0 ['disAct0[0][0]', \n",
" 'disAct1[0][0]', \n",
" 'disAct2[0][0]', \n",
" 'tf.math.multiply[0][0]', \n",
" 'tf.math.add[0][0]'] \n",
" \n",
"==================================================================================================\n",
"Total params: 412,938\n",
"Trainable params: 412,938\n",
"Non-trainable params: 0\n",
"__________________________________________________________________________________________________\n",
"---------Critic Model Create Success---------\n",
"Model: \"model\"\n",
"_________________________________________________________________\n",
" Layer (type) Output Shape Param # \n",
"=================================================================\n",
" stateInput (InputLayer) [(None, 31)] 0 \n",
" \n",
" dense0 (Dense) (None, 512) 16384 \n",
" \n",
" dense1 (Dense) (None, 512) 262656 \n",
" \n",
" dense2 (Dense) (None, 256) 131328 \n",
" \n",
" dense (Dense) (None, 1) 257 \n",
" \n",
"=================================================================\n",
"Total params: 410,625\n",
"Trainable params: 410,625\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n",
"No loadDir specified,Create a New Model\n",
"CONTINUOUS_SIZE 1\n",
"DISCRETE_SIZE 3\n",
"STATE_SIZE 31\n"
]
}
],
"source": [
"# initialize enviroment & buffer class\n",
"env = aimBotEnv.makeEnv(envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT)\n",
"ppoBuffer = PPOBuffer()\n",
"ppoHistory = PPOHistory()\n",
"\n",
"STATE_SIZE = env.STATE_SIZE\n",
"DISCRETE_SHAPE = env.DISCRETE_SHAPE\n",
"DISCRETE_SIZE = env.DISCRETE_SIZE\n",
"CONTINUOUS_SIZE = env.CONTINUOUS_SIZE\n",
"ACTSPEC = env.ACTION_SPEC\n",
"_, _, _, loadDir, _ = env.getSteps()\n",
"\n",
"agent = PPO(\n",
" stateSize=STATE_SIZE,\n",
" disActShape=DISCRETE_SHAPE,\n",
" conActSize=CONTINUOUS_SIZE,\n",
" conActRange=CTN_ACTION_RANGE,\n",
" PPOConfig=ppoConfig,\n",
")\n",
"\n",
"# check load model or not\n",
"if np.any(loadDir == 0):\n",
" # create a new model\n",
" print(\"No loadDir specified,Create a New Model\")\n",
" LOAD_DIR = None\n",
"else:\n",
" # load model\n",
" loadDirDateSTR = str(int(loadDir[0]))\n",
" loadDirTimeSTR = str(int(loadDir[1]))\n",
" if len(loadDirDateSTR) != 8:\n",
" # fill lost 0 while converse float to string\n",
" for _ in range(8 - len(loadDirDateSTR)):\n",
" loadDirDateSTR = \"0\" + loadDirDateSTR\n",
" if len(loadDirTimeSTR) != 6:\n",
" # fill lost 0 while converse float to string\n",
" for _ in range(6 - len(loadDirTimeSTR)):\n",
" loadDirTimeSTR = \"0\" + loadDirTimeSTR\n",
" LOAD_DIR = \"PPO-Model/\" + loadDirDateSTR + \"/\" + loadDirTimeSTR\n",
" print(\"Load Model:\")\n",
" print(LOAD_DIR)\n",
"\n",
"print(\"CONTINUOUS_SIZE\", CONTINUOUS_SIZE)\n",
"print(\"DISCRETE_SIZE\", DISCRETE_SIZE)\n",
"print(\"STATE_SIZE\", STATE_SIZE)\n",
"\n",
"disActShape = [3, 3, 2]\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1512x936 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"bestScore = 200.0\n",
"\n",
"maxTotalReward = -99999999999\n",
"\n",
"for ep in range(MAX_EP):\n",
" print(\"EP \", ep, \" START\")\n",
" # first time run game\n",
" s, _, _, _, _ = env.reset()\n",
" if ep == 0:\n",
" s = s.reshape([STATE_SIZE])\n",
" step = 0\n",
" done = False\n",
"\n",
" # save weight immediately?\n",
" saveNow = 0\n",
"\n",
" epTotalReward = 0\n",
" entropys = []\n",
"\n",
" while not done:\n",
" step += 1\n",
"\n",
" actions, predictResult = agent.chooseAction(s)\n",
" avrEntropy, _, _ = agent.getAverageEntropy(predictResult)\n",
" nextState, thisReward, done, _, saveNow = env.step(actions=actions)\n",
"\n",
" entropys.append(avrEntropy)\n",
" ppoBuffer.saveBuffers(\n",
" state=s, actorProb=predictResult, action=actions, reward=thisReward, done=done\n",
" )\n",
" epTotalReward += thisReward\n",
"\n",
" nextState = nextState.reshape([STATE_SIZE])\n",
" s = nextState\n",
"\n",
" if done:\n",
" print(\"EP OVER!\")\n",
" if saveNow != 0:\n",
" print(\"SAVENOW!\")\n",
" saveNow = 0\n",
" agent.saveWeights()\n",
" # update PPO after Batch step or GameOver\n",
" if (step + 1) % BATCH == 0 or done:\n",
" if TRAIN:\n",
" actorLosses, criticLosses = agent.trainCritcActor(\n",
" states=ppoBuffer.getStates(),\n",
" oldActorResult=ppoBuffer.getActorProbs(),\n",
" actions=ppoBuffer.getActions(),\n",
" rewards=ppoBuffer.getRewards(),\n",
" dones=ppoBuffer.getDones(),\n",
" nextState=nextState,\n",
" )\n",
" clear_output()\n",
" ppoBuffer.clearBuffer()\n",
" ppoHistory.saveHis(epTotalReward, np.mean(entropys), actorLosses, criticLosses)\n",
" ppoHistory.drawHis()\n",
" if epTotalReward > maxTotalReward and epTotalReward != 0:\n",
" maxTotalReward = epTotalReward\n",
" agent.saveWeights(epTotalReward)\n",
" print(\"New Record! Save NN\", epTotalReward)\n",
" epTotalReward = 0\n"
]
}
],
"metadata": {
"interpreter": {
"hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
},
"kernelspec": {
"display_name": "Python 3.9.7 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}