Update PPO class,add python human control

Python:
Update PPO class
add python human control 
Unity: 
add FP/TP choose button
This commit is contained in:
Koha9 2022-10-11 06:40:15 +09:00
parent de066f3a65
commit ae8a1ba8e2
26 changed files with 3639 additions and 990 deletions

View File

@ -0,0 +1,8 @@
fileFormatVersion: 2
guid: d65d9ca7ae1253341b6790f3a23e3a11
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@ -0,0 +1,10 @@
fileFormatVersion: 2
guid: 39a127fc79ed92d4e88aec711f545d5f
ScriptedImporter:
internalIDToNameTable: []
externalObjects: {}
serializedVersion: 2
userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}

View File

@ -1 +1 @@
{"count":1,"self":33.6679968,"total":34.5046305,"children":{"InitializeActuators":{"count":2,"self":0.0010002,"total":0.0010002,"children":null},"InitializeSensors":{"count":2,"self":0.0010004,"total":0.0010004,"children":null},"AgentSendState":{"count":1489,"self":0.011503399999999999,"total":0.2010688,"children":{"CollectObservations":{"count":1489,"self":0.1780647,"total":0.1780647,"children":null},"WriteActionMask":{"count":1488,"self":0.0019993999999999997,"total":0.0019993999999999997,"children":null},"RequestDecision":{"count":1488,"self":0.009501299999999999,"total":0.009501299999999999,"children":null}}},"DecideAction":{"count":1488,"self":0.0117408,"total":0.0117408,"children":null},"AgentAct":{"count":1488,"self":0.6208231,"total":0.6208231,"children":null}},"gauges":{},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1663089804","unity_version":"2020.3.19f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2020.3.19f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-MultiScene -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-UCUNI -hubSessionId 4cf980b0-326c-11ed-87c2-a7333acffe7c -accessToken j61gZPw8-vc4ZH7TJMvrSAAPQLV9SK6U72z_dek2xhw00ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.0","scene_name":"InGame","end_time_seconds":"1663089838"}} {"count":1,"self":42.3855296,"total":42.4020608,"children":{"InitializeActuators":{"count":2,"self":0.0015155,"total":0.0015155,"children":null},"InitializeSensors":{"count":2,"self":0.0015017,"total":0.0015017,"children":null},"AgentSendState":{"count":1898,"self":0.0025031999999999997,"total":0.0025031999999999997,"children":null},"DecideAction":{"count":1898,"self":0.0070091999999999993,"total":0.0070091999999999993,"children":null},"AgentAct":{"count":1898,"self":0.0030023,"total":0.0030023,"children":null}},"gauges":{},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1665414279","unity_version":"2020.3.19f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2020.3.19f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-MultiScene -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-UCUNI -hubSessionId 39022900-48a5-11ed-b848-09be5949a456 -accessToken _47qt9I_MF3bhL7JS735Xdmfj8A4dGBOdRNKR0X2L_w00ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.0","scene_name":"InGame","end_time_seconds":"1665414322"}}

View File

@ -1 +1 @@
{"count":1,"self":114.25904639999999,"total":114.62062499999999,"children":{"InitializeActuators":{"count":2,"self":0.0010000999999999999,"total":0.0010000999999999999,"children":null},"InitializeSensors":{"count":2,"self":0.0010002,"total":0.0010002,"children":null},"AgentSendState":{"count":1382,"self":0.0080028,"total":0.0195053,"children":{"CollectObservations":{"count":1382,"self":0.0070022999999999995,"total":0.0070022999999999995,"children":null},"WriteActionMask":{"count":1382,"self":0.0004994,"total":0.0004994,"children":null},"RequestDecision":{"count":1382,"self":0.0040008,"total":0.0040008,"children":null}}},"DecideAction":{"count":1382,"self":0.0110034,"total":0.0110034,"children":null},"AgentAct":{"count":1382,"self":0.3290731,"total":0.3290731,"children":null}},"gauges":{},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1662500099","unity_version":"2020.3.19f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2020.3.19f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-MultiScene -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-UCUNI -hubSessionId 209fdf30-2c1f-11ed-916f-33e85f4223cc -accessToken 78EBbrn-dg5kE__h3rNOqQVTDU3b1xUmmwWF1c5sFLc00ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.0","scene_name":"Start","end_time_seconds":"1662500214"}} {"count":1,"self":100.7007424,"total":102.0526476,"children":{"InitializeActuators":{"count":2,"self":0.0015004999999999999,"total":0.0015004999999999999,"children":null},"InitializeSensors":{"count":2,"self":0.0010015,"total":0.0010015,"children":null},"AgentSendState":{"count":2851,"self":0.0227973,"total":0.3594312,"children":{"CollectObservations":{"count":2851,"self":0.3230326,"total":0.3230326,"children":null},"WriteActionMask":{"count":2850,"self":0.0040877,"total":0.0040877,"children":null},"RequestDecision":{"count":2850,"self":0.0095135999999999988,"total":0.0095135999999999988,"children":null}}},"DecideAction":{"count":2850,"self":0.0184923,"total":0.0184923,"children":null},"AgentAct":{"count":2850,"self":0.971482,"total":0.971482,"children":null}},"gauges":{"AKMAgent.CumulativeReward":{"count":1,"max":0,"min":0,"runningAverage":0,"value":0,"weightedAverage":0}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1665340408","unity_version":"2020.3.19f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2020.3.19f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-MultiScene -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-8AgJBC01I23iOtjIDvezn -hubSessionId a2bff0f0-47ee-11ed-98ba-e72fca9de6f1 -accessToken VHkJOvWIH11sBEzC18rl6YA9y6y2sRMQj2zrOyZdNeE00ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.0","scene_name":"Start","end_time_seconds":"1665340510"}}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -21,6 +21,8 @@ public class AgentWithGun : Agent
public Camera thisCam; public Camera thisCam;
public CharacterController PlayerController; public CharacterController PlayerController;
public GameObject enemyPrefab; public GameObject enemyPrefab;
public GameObject cameraChangerOBJ;
[Header("Rewards")] [Header("Rewards")]
[Tooltip("Nothing happened reward")] [Tooltip("Nothing happened reward")]
@ -76,10 +78,13 @@ public class AgentWithGun : Agent
private string LoadDirTime; private string LoadDirTime;
private float LoadDirDateF; private float LoadDirDateF;
private float loadDirTimeF; private float loadDirTimeF;
public bool defaultTPCamera = true;
private StartSeneData DataTransfer; private StartSeneData DataTransfer;
private UIController UICon; private UIController UICon;
private HistoryRecorder HistoryRec; private HistoryRecorder HistoryRec;
private RaySensors rayScript; private RaySensors rayScript;
private CameraChange camChanger;
[System.NonSerialized] public float nonReward; [System.NonSerialized] public float nonReward;
[System.NonSerialized] public float shootReward; [System.NonSerialized] public float shootReward;
@ -118,6 +123,8 @@ public class AgentWithGun : Agent
killRewardDefault = DataTransfer.killReward; killRewardDefault = DataTransfer.killReward;
winRewardDefault = DataTransfer.winReward; winRewardDefault = DataTransfer.winReward;
loseRewardDefault = DataTransfer.loseReward; loseRewardDefault = DataTransfer.loseReward;
lockMouse = DataTransfer.lockMouse;
defaultTPCamera = DataTransfer.defaultTPCamera;
// change Decision Period & Take Actions Between Decisions // change Decision Period & Take Actions Between Decisions
transform.GetComponent<DecisionRequester>().DecisionPeriod = DataTransfer.DecisionPeriod; transform.GetComponent<DecisionRequester>().DecisionPeriod = DataTransfer.DecisionPeriod;
@ -156,6 +163,7 @@ public class AgentWithGun : Agent
UICon = transform.GetComponent<UIController>(); UICon = transform.GetComponent<UIController>();
HistoryRec = transform.GetComponent<HistoryRecorder>(); HistoryRec = transform.GetComponent<HistoryRecorder>();
rayScript = GetComponent<RaySensors>(); rayScript = GetComponent<RaySensors>();
camChanger = cameraChangerOBJ.GetComponent<CameraChange>();
// give default Reward to Reward value will be used. // give default Reward to Reward value will be used.
nonReward = nonRewardDefault; nonReward = nonRewardDefault;
@ -167,6 +175,15 @@ public class AgentWithGun : Agent
killReward = killRewardDefault; killReward = killRewardDefault;
//initialize remainTime //initialize remainTime
remainTime = (int)(timeLimit - Time.time + startTime); remainTime = (int)(timeLimit - Time.time + startTime);
// change default camera view
if (defaultTPCamera)
{
camChanger.ShowTPSView();
}
else
{
camChanger.ShowFPSView();
}
} }
} }
@ -203,27 +220,9 @@ public class AgentWithGun : Agent
// ------------动作处理-------------- // ------------动作处理--------------
// moveAgent 用于模拟Input.GetAxis移动 // moveAgent 用于模拟Input.GetAxis移动
public void moveAgent(int kW, int kS,int kA,int kD) public void moveAgent(int vertical, int horizontal)
{ {
Vector3 thisMovement; Vector3 thisMovement;
int horizontal = 0;
int vertical = 0;
if (kW==1 && kS != 1)
{
vertical = 1;
}
else if (kS==1 && kW!=1)
{
vertical = -1;
}
if (kD==1 && kA!=1)
{
horizontal = 1;
}
else if (kA ==1 && kD!=1)
{
horizontal = -1;
}
if (horizontal != 0)//当按下按键(水平方向) if (horizontal != 0)//当按下按键(水平方向)
{ {
@ -506,7 +505,7 @@ public class AgentWithGun : Agent
} }
if (lockMouse) if (lockMouse)
{ {
Cursor.lockState = CursorLockMode.Locked; // 隐藏并且锁定鼠标 Cursor.lockState = CursorLockMode.Locked; // hide and lock the mouse
} }
//iniCharts(); //iniCharts();
thisAgentObj.name = thisAgentObj.GetInstanceID().ToString(); thisAgentObj.name = thisAgentObj.GetInstanceID().ToString();
@ -549,35 +548,26 @@ public class AgentWithGun : Agent
public override void OnActionReceived(ActionBuffers actionBuffers) public override void OnActionReceived(ActionBuffers actionBuffers)
{ {
//获取输入 //获取输入
int kW = actionBuffers.DiscreteActions[0]; int vertical = actionBuffers.DiscreteActions[0];
int kS = actionBuffers.DiscreteActions[1]; int horizontal = actionBuffers.DiscreteActions[1];
int kA = actionBuffers.DiscreteActions[2]; int mouseShoot = actionBuffers.DiscreteActions[2];
int kD = actionBuffers.DiscreteActions[3];
int mouseShoot = actionBuffers.DiscreteActions[4];
float Mouse_X = actionBuffers.ContinuousActions[0]; float Mouse_X = actionBuffers.ContinuousActions[0];
//float Mouse_Y = actionBuffers.ContinuousActions[1]; if (vertical == 2) vertical = -1;
//int timeLimitControl = (int)actionBuffers.ContinuousActions[2]; if (horizontal == 2) horizontal = -1;
//float nonRewardIn = actionBuffers.ContinuousActions[1];
//float shootRewardIn = actionBuffers.ContinuousActions[2];
//float shootWithoutReadyRewardIn = actionBuffers.ContinuousActions[3];
//float hitRewardIn = actionBuffers.ContinuousActions[4];
//float winRewardIn = actionBuffers.ContinuousActions[5];
// loseRewardIn = actionBuffers.ContinuousActions[6];
//float killRewardIn = actionBuffers.ContinuousActions[7];
//Rewards Update
remainTime = (int)(timeLimit - Time.time + startTime); remainTime = (int)(timeLimit - Time.time + startTime);
//应用输入 //应用输入
shoot = mouseShoot; shoot = mouseShoot;
HistoryRec.realTimeKeyCounter(kW, kS, kA, kD, shoot); HistoryRec.realTimeKeyCounter(vertical, horizontal, shoot);
(int kWCount, int kSCount, int kACount, int kDCount, int shootCount) = HistoryRec.getKeyCount(); (int kWCount, int kSCount, int kACount, int kDCount, int shootCount) = HistoryRec.getKeyCount();
UICon.updateRemainTime(remainTime); UICon.updateRemainTime(remainTime);
UICon.updateWASDKeyViewer(kW, kS, kA, kD); UICon.updateRemainEnemy(enemyNum);
UICon.updateWASDKeyViewer(vertical, horizontal);
UICon.updateKeyCounterChart(kWCount, kSCount, kACount, kDCount, shootCount); UICon.updateKeyCounterChart(kWCount, kSCount, kACount, kDCount, shootCount);
UICon.updateMouseMovementViewer(Mouse_X); UICon.updateMouseMovementViewer(Mouse_X);
UICon.updateRewardViewer(nonReward, shootReward, shootWithoutReadyReward, hitReward, winReward, loseReward, killReward); UICon.updateRewardViewer(nonReward, shootReward, shootWithoutReadyReward, hitReward, winReward, loseReward, killReward);
cameraControl(Mouse_X, 0); cameraControl(Mouse_X, 0);
moveAgent(kW, kS, kA, kD); moveAgent(vertical, horizontal);
float thisRoundReward = rewardCalculate(); float thisRoundReward = rewardCalculate();
//判断结束 //判断结束
@ -628,37 +618,45 @@ public class AgentWithGun : Agent
ActionSegment<float> continuousActions = actionsOut.ContinuousActions; ActionSegment<float> continuousActions = actionsOut.ContinuousActions;
ActionSegment<int> discreteActions = actionsOut.DiscreteActions; ActionSegment<int> discreteActions = actionsOut.DiscreteActions;
int kW = 0; int vertical = 0;
int kS = 0; int horizontal = 0;
int kA = 0; if (Input.GetKey(KeyCode.W) && !Input.GetKey(KeyCode.S))
int kD = 0;
if (Input.GetKey(KeyCode.W))
{ {
kW = 1; vertical = 1;
} }
if (Input.GetKey(KeyCode.S)) else if (Input.GetKey(KeyCode.S) && !Input.GetKey(KeyCode.W))
{ {
kS = 1; vertical = -1;
} }
if (Input.GetKey(KeyCode.A)) else
{ {
kA = 1; vertical = 0;
} }
if (Input.GetKey(KeyCode.D)) if (Input.GetKey(KeyCode.D) && !Input.GetKey(KeyCode.A))
{ {
kD = 1; horizontal = 1;
}
else if (Input.GetKey(KeyCode.A) && !Input.GetKey(KeyCode.D))
{
horizontal = -1;
}
else
{
horizontal = 0;
} }
discreteActions[0] = kW;
discreteActions[1] = kS;
discreteActions[2] = kA;
discreteActions[3] = kD;
if (Input.GetMouseButton(0)) if (Input.GetMouseButton(0))
{ {
// Debug.Log("mousebuttonhit"); // Debug.Log("mousebuttonhit");
shoot = 1; shoot = 1;
} }
discreteActions[4] = shoot; else
{
shoot = 0;
}
discreteActions[0] = vertical;
discreteActions[1] = horizontal;
discreteActions[2] = shoot;
//^^^^^^^^^^^^^^^^^^^^^discrete-Control^^^^^^^^^^^^^^^^^^^^^^ //^^^^^^^^^^^^^^^^^^^^^discrete-Control^^^^^^^^^^^^^^^^^^^^^^
//vvvvvvvvvvvvvvvvvvvvvvvvvvvvvcontinuous-Controlvvvvvvvvvvvvvvvvvvvvvv //vvvvvvvvvvvvvvvvvvvvvvvvvvvvvcontinuous-Controlvvvvvvvvvvvvvvvvvvvvvv

View File

@ -31,24 +31,24 @@ public class HistoryRecorder : MonoBehaviour
{ {
EPTotalShootCount.Add(TotalShootCount); EPTotalShootCount.Add(TotalShootCount);
} }
public void realTimeKeyCounter(int kW, int kS, int kA, int kD, int shoot) public void realTimeKeyCounter(int vertical, int horizontal, int shoot)
{ {
if (kW == 1) if (vertical == 1)
{ {
realTimeWKeyCount += 1; realTimeWKeyCount += 1;
} }
if (kS == 1) else if (vertical == -1)
{ {
realTimeSKeyCount += 1; realTimeSKeyCount += 1;
} }
if (kA == 1) if (horizontal == 1)
{
realTimeAKeyCount += 1;
}
if (kD == 1)
{ {
realTimeDKeyCount += 1; realTimeDKeyCount += 1;
} }
else if (horizontal == -1)
{
realTimeAKeyCount += 1;
}
if (shoot == 1) if (shoot == 1)
{ {
realTimeShootCount += 1; realTimeShootCount += 1;

View File

@ -69,39 +69,37 @@ public class UIController : MonoBehaviour
} }
//------------Key Viewer---------- //------------Key Viewer----------
public void updateWASDKeyViewer(int kW,int kS,int kA,int kD) public void updateWASDKeyViewer(int vertical,int horizontal)
{ {
if (kW == 1) if (vertical == 1)
{ {
upText.color = Color.red; upText.color = Color.red;
downText.color = Color.black;
} }
else else if (vertical == -1)
{
upText.color = Color.black;
}
if (kS == 1)
{ {
downText.color = Color.red; downText.color = Color.red;
upText.color = Color.black;
} }
else else
{ {
downText.color = Color.black; downText.color = Color.black;
upText.color = Color.black;
} }
if(kA == 1) if (horizontal == 1)
{
leftText.color = Color.red;
}
else
{
leftText.color = Color.black;
}
if( kD == 1)
{ {
rightText.color = Color.red; rightText.color = Color.red;
leftText.color = Color.black;
}
else if (horizontal == -1)
{
leftText.color = Color.red;
rightText.color = Color.black;
} }
else else
{ {
rightText.color = Color.black; downText.color = Color.black;
upText.color = Color.black;
} }
} }
public void updateShootKeyViewer(int shoot,bool isGunReady) public void updateShootKeyViewer(int shoot,bool isGunReady)

View File

@ -0,0 +1,27 @@
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
public class gameFlowController : MonoBehaviour
{
public GameObject Agent;
AgentWithGun agentWithGun;
// Start is called before the first frame update
void Start()
{
agentWithGun = Agent.GetComponent<AgentWithGun>();
}
// Update is called once per frame
void Update()
{
if (Input.GetKey(KeyCode.Escape))
{
Application.Quit();
}
if (Input.GetKey(KeyCode.L))
{
agentWithGun.lockMouse = !agentWithGun.lockMouse;
}
}
}

View File

@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: 9a8fb4d12d4b8fc4784f3e142e7fdcf8
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@ -19,6 +19,21 @@ public class EnvArgsChanger : MonoBehaviour
public Text DecisionPeriodDataText; public Text DecisionPeriodDataText;
public Toggle TakeActionsBetweenDecisionsToggle; public Toggle TakeActionsBetweenDecisionsToggle;
[Header("Lock Mouse")]
public Toggle LockMouseToggle;
[Header("Default Camera")]
public Toggle FPToggle;
public Text FPText;
public Toggle TPToggle;
public Text TPText;
private StartSeneData startSeneData;
private void Start()
{
startSeneData = DataTransfer.GetComponent<StartSeneData>();
}
public void onEnemynumValueChanged() public void onEnemynumValueChanged()
{ {
@ -30,7 +45,7 @@ public class EnvArgsChanger : MonoBehaviour
else else
{ {
EnemyNumText.color = Color.yellow; EnemyNumText.color = Color.yellow;
DataTransfer.GetComponent<StartSeneData>().EnemyNum = Math.Abs(int.Parse(EnemyNumInput.GetComponent<InputField>().text)); startSeneData.EnemyNum = Math.Abs(int.Parse(EnemyNumInput.GetComponent<InputField>().text));
} }
} }
@ -44,19 +59,48 @@ public class EnvArgsChanger : MonoBehaviour
else else
{ {
TimeLimText.color = Color.yellow; TimeLimText.color = Color.yellow;
DataTransfer.GetComponent<StartSeneData>().Timelim = Math.Abs(int.Parse(TimelimInput.GetComponent<InputField>().text)); startSeneData.Timelim = Math.Abs(int.Parse(TimelimInput.GetComponent<InputField>().text));
} }
} }
public void onDPSlideValueChanged() public void onDPSlideValueChanged()
{ {
// DecisionPeriod(DP) value Control // DecisionPeriod(DP) value Control
DataTransfer.GetComponent<StartSeneData>().DecisionPeriod = (int)(DecisionPeriodSlide.GetComponent<Slider>().value); startSeneData.DecisionPeriod = (int)(DecisionPeriodSlide.GetComponent<Slider>().value);
DecisionPeriodDataText.text = DataTransfer.GetComponent<StartSeneData>().DecisionPeriod.ToString(); DecisionPeriodDataText.text = startSeneData.DecisionPeriod.ToString();
} }
public void onABDToggleChanged() public void onABDToggleChanged()
{ {
// Actions Between Decisions(ABD) Toggle Control // Actions Between Decisions(ABD) Toggle Control
DataTransfer.GetComponent<StartSeneData>().ActionsBetweenDecisions = TakeActionsBetweenDecisionsToggle.isOn; startSeneData.ActionsBetweenDecisions = TakeActionsBetweenDecisionsToggle.isOn;
}
public void onLockMouseToggleChanged()
{
// lock mouse or not
startSeneData.lockMouse = LockMouseToggle.isOn;
}
public void onTPCamToggleChanged()
{
startSeneData.defaultTPCamera = true;
FPToggle.interactable = true;
FPToggle.SetIsOnWithoutNotify(false);
FPText.color = Color.gray;
TPToggle.SetIsOnWithoutNotify(true);
TPToggle.interactable = false;
TPText.color = Color.green;
}
public void onFPCameToggleChanged()
{
startSeneData.defaultTPCamera = false;
TPToggle.interactable = true;
TPToggle.SetIsOnWithoutNotify(false);
TPText.color = Color.gray;
FPToggle.SetIsOnWithoutNotify(true);
FPToggle.interactable = false;
FPText.color = Color.green;
} }
} }

View File

@ -13,6 +13,8 @@ public class StartSeneData : MonoBehaviour
public float killRewardDefault = 10.0f; public float killRewardDefault = 10.0f;
public float winRewardDefault = 20.0f; public float winRewardDefault = 20.0f;
public float loseRewardDefault = -10.0f; public float loseRewardDefault = -10.0f;
public bool lockMouse = false;
public bool defaultTPCamera = true;
// LoadDir // LoadDir
[System.NonSerialized]public string LoadDirDate = "0"; [System.NonSerialized]public string LoadDirDate = "0";

View File

@ -0,0 +1,90 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"√√√√√Enviroment Initialized Success√√√√√\n"
]
}
],
"source": [
"import time\n",
"import aimBotEnv\n",
"from HumanAction import HumanActions\n",
"\n",
"# Env\n",
"ENV_PATH = \"./Build-CloseEnemyCut/Aimbot-PPO\"\n",
"WORKER_ID = 1\n",
"BASE_PORT = 200\n",
"\n",
"MOUSEDISCOUNT = 8.0\n",
"MAX_EP = 10000000\n",
"\n",
"env = aimBotEnv.makeEnv(envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT)\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"ename": "UnityCommunicatorStoppedException",
"evalue": "Communicator has exited.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mUnityCommunicatorStoppedException\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_37248/645561173.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mactions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdemoAct\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetHumanActions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32mc:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\aimBotEnv.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, actions, behaviorName, trackedAgent)\u001b[0m\n\u001b[0;32m 72\u001b[0m \u001b[1;31m# take action to env\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 73\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_actions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehavior_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mthisActionTuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 74\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 75\u001b[0m \u001b[1;31m# get nextState & reward & done after this action\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 76\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloadDir\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msaveNow\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetSteps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrackedAgent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\mlagents_envs\\timers.py\u001b[0m in \u001b[0;36mwrapped\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 303\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapped\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 304\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mhierarchical_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__qualname__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 305\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 306\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 307\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mwrapped\u001b[0m \u001b[1;31m# type: ignore\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\mlagents_envs\\environment.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 333\u001b[0m \u001b[0moutputs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_communicator\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexchange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstep_input\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_poll_process\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 334\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0moutputs\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 335\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mUnityCommunicatorStoppedException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Communicator has exited.\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 336\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_update_behavior_specs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 337\u001b[0m \u001b[0mrl_output\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0moutputs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrl_output\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mUnityCommunicatorStoppedException\u001b[0m: Communicator has exited."
]
}
],
"source": [
"done = False\n",
"env.reset()\n",
"demoAct = HumanActions(mouseDiscount=MOUSEDISCOUNT)\n",
"for ep in range(MAX_EP):\n",
" while not done:\n",
" actions = demoAct.getHumanActions()\n",
" env.step(actions=actions)6\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.7 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,51 @@
import keyboard
import mouse
class HumanActions:
def __init__(self, mouseDiscount: float = 10, screenW: int = 1920, screenH: int = 1080):
def multiPressed():
pass
keyboard.add_hotkey("w+a", multiPressed)
keyboard.add_hotkey("w+d", multiPressed)
keyboard.add_hotkey("s+a", multiPressed)
keyboard.add_hotkey("s+d", multiPressed)
self.screenW = screenW
self.screenH = screenH
self.MOUSEDISCOUNT = mouseDiscount
def getHumanActions(self):
x, _ = mouse.get_position()
xMovement = (x - self.screenW / 2) / self.MOUSEDISCOUNT
ws = 0
ad = 0
click = 0
if keyboard.is_pressed("w"):
ws = 1
elif keyboard.is_pressed("s"):
ws = 2
if keyboard.is_pressed("d"):
ad = 1
elif keyboard.is_pressed("a"):
ad = 2
if keyboard.is_pressed("w+d"):
ws = 1
ad = 1
elif keyboard.is_pressed("w+a"):
ws = 1
ad = 2
elif keyboard.is_pressed("s+d"):
ws = 2
ad = 1
elif keyboard.is_pressed("s+a"):
ws = 2
ad = 2
if mouse.is_pressed(button="left"):
click = 1
actions = [ws, ad, click, [xMovement]]
mouse.move(self.screenW / 2, self.screenH / 2)
return actions

File diff suppressed because it is too large Load Diff

View File

@ -1,108 +1,213 @@
import tensorflow as tf import tensorflow as tf
from tensorflow.python.ops.numpy_ops import ndarray
import tensorflow_probability as tfp import tensorflow_probability as tfp
import numpy as np import numpy as np
import time
import math import math
import copy
import datetime import datetime
import os from PPOConfig import PPOConfig
from tensorflow import keras from tensorflow import keras
from tensorflow.keras import layers from tensorflow.keras import layers
from tensorflow.keras import optimizers from tensorflow.keras import optimizers
from keras_radam import RAdam
EPS = 1e-10
class PPO(object): class PPO(object):
"""Create PPO Agent def __init__(
self,
stateSize: int,
disActShape: list,
conActSize: int,
conActRange: float,
PPOConfig: PPOConfig,
):
"""initialize PPO
Args:
stateSize (int): enviroment state size
disActShape (numpy): discrete Action shape.
just like [3,2],means 2 type of dis actions,each act include 3 and 2 types
if no discrete action output then use [0].
conActSize (int): continuous Action Size. if no continuous action output then use 0.
conActRange (float): continuous action range. -conActRange to +conActRange
PPOConfig (PPOConfig): PPO configuration
""" """
# check use dis action or not.
def __init__(self, stateSize, disActShape, conActSize, conActRange, criticLR, actorLR, gamma, epsilon, entropyWeight, saveDir, loadModelDir): if disActShape == [0]:
# non dis action output
# check disActShape is correct(greater than 1) self.disActSize = 0
self.disOutputSize = 0
else:
# make sure disActShape greater than 1
try: try:
if np.any(np.array(disActShape) <= 1): if np.any(np.array(disActShape) <= 1):
raise ValueError("disActShape error,disActShape should greater than 1 but get",disActShape) raise ValueError(
except ValueError as e: "disActShape error,disActShape should greater than 1 but get", disActShape
)
except ValueError:
raise raise
self.disActSize = len(disActShape)
self.disOutputSize = sum(disActShape)
self.stateSize = stateSize self.stateSize = stateSize
# self.actionSize = actionSize self.disActShape = disActShape
self.disActShape = disActShape # shape of discrete action output. like [3,3,2]
self.disActSize = len(disActShape)
self.conActSize = conActSize self.conActSize = conActSize
self.conActRange = conActRange self.conActRange = conActRange
self.criticLR = criticLR self.muSigSize = 2
self.actorLR = actorLR self.conOutputSize = conActSize * self.muSigSize
self.GAMMA = gamma
self.EPSILON = epsilon
self.saveDir = saveDir
self.entropyWeight = entropyWeight
self.disOutputSize = sum(disActShape) # config
self.conOutputSize = conActSize * 2 self.NNShape = PPOConfig.NNShape
self.criticLR = PPOConfig.criticLR
self.actorLR = PPOConfig.actorLR
self.gamma = PPOConfig.gamma
self.lmbda = PPOConfig.lmbda
self.clipRange = PPOConfig.clipRange
self.entropyWeight = PPOConfig.entropyWeight
self.trainEpochs = PPOConfig.trainEpochs
self.saveDir = PPOConfig.saveDir
self.loadModelDir = PPOConfig.loadModelDir
print("---------thisPPO Params---------")
print("self.stateSize = ", self.stateSize)
print("self.disActShape = ", self.disActShape)
print("self.disActSize", self.disActSize)
print("self.disOutputSize", self.disOutputSize)
print("self.conActSize = ", self.conActSize)
print("self.conActRange = ", self.conActRange)
print("self.conOutputSize = ", self.conOutputSize)
if loadModelDir == None: # config
print("---------thisPPO config---------")
print("self.NNShape = ", self.NNShape)
print("self.criticLR = ", self.criticLR)
print("self.actorLR = ", self.actorLR)
print("self.gamma = ", self.gamma)
print("self.lmbda = ", self.lmbda)
print("self.clipRange = ", self.clipRange)
print("self.entropyWeight = ", self.entropyWeight)
print("self.trainEpochs = ", self.trainEpochs)
print("self.saveDir = ", self.saveDir)
print("self.loadModelDir = ", self.loadModelDir)
# load NN or not
if self.loadModelDir is None:
# critc NN # critc NN
self.critic = self.buildCriticNet(self.stateSize, 1, compileModel=True) self.critic = self.buildCriticNet(self.stateSize, 1, compileModel=True)
# actor NN # actor NN
self.actor = self.buildActorNet(self.stateSize, self.conActRange, compileModel = True) self.actor = self.buildActorNet(self.stateSize, compileModel=True)
print("---------Actor Model Create Success---------")
self.actor.summary()
print("---------Critic Model Create Success---------")
self.critic.summary()
else: else:
# critc NN # critc NN
self.critic = self.buildCriticNet(self.stateSize, 1, compileModel=True) self.critic = self.buildCriticNet(self.stateSize, 1, compileModel=True)
# actor NN # actor NN
self.actor = self.buildActorNet(self.stateSize, self.conActRange, compileModel=True) self.actor = self.buildActorNet(self.stateSize, compileModel=True)
# load weight to Critic&Actor NN # load weight to Critic&Actor NN
self.loadWeightToModels(loadModelDir) self.loadWeightToModels(self.loadModelDir)
print("---------Actor Model Load Success---------")
self.actor.summary()
print("---------Critic Model Load Success---------")
self.critic.summary()
# Build Net # Build Net
def buildActorNet(self, inputSize, continuousActionRange,compileModel): def buildActorNet(self, inputSize: int, compileModel: bool):
"""build Actor Nueral Net and compile.Output:[disAct1,disAct2,disAct3,mu,sigma] """build Actor Nueral Net and compile.Output:[disAct1,disAct2,disAct3,mu,sigma]
Args: Args:
inputSize (int): InputLayer Nueral size. inputSize (int): InputLayer Nueral size.
continuousActionRange (foat): continuous Action's max Range. compileModel (bool): compile Model or not.
Returns: Returns:
keras.Model: return Actor NN keras.Model: return Actor NN
""" """
stateInput = layers.Input(shape=(inputSize,), name='stateInput') # -----------Input Layers-----------
dense0 = layers.Dense(500, activation='relu',name='dense0',)(stateInput) stateInput = layers.Input(shape=(inputSize,), name="stateInput")
dense1 = layers.Dense(200, activation='relu',name='dense1',)(dense0)
dense2 = layers.Dense(100, activation='relu', name='dense2')(dense1)
disAct1 = layers.Dense(3, activation='softmax',name='WSAction')(dense2) # WS # -------Intermediate layers--------
disAct2 = layers.Dense(3, activation='softmax',name='ADAction')(dense2) # AD interLayers = []
disAct3 = layers.Dense(2, activation='softmax',name='ShootAction')(dense2) # Mouse shoot interLayersIndex = 0
mu = continuousActionRange * layers.Dense(1, activation='tanh', name='muOut')(dense2) # mu既正态分布mean for neuralUnit in self.NNShape:
sigma = 1e-8 + layers.Dense(1, activation='softplus',name='sigmaOut')(dense2) # sigma既正态分布 thisLayerName = "dense" + str(interLayersIndex)
# musig = layers.concatenate([mu,sigma],name = 'musig') if interLayersIndex == 0:
totalOut = layers.concatenate( interLayers.append(
[disAct1, disAct2, disAct3, mu, sigma], name='totalOut') # package layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(stateInput)
)
else:
interLayers.append(
layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(interLayers[-1])
)
interLayersIndex += 1
# ----------Output Layers-----------
outputLayersList = []
if self.disActSize != 0:
# while NN have discrete action output.
disActIndex = 0
for thisDisActDepth in self.disActShape:
thisDisActName = "disAct" + str(disActIndex)
outputLayersList.append(
layers.Dense(thisDisActDepth, activation="softmax", name=thisDisActName)(
interLayers[-1]
)
)
disActIndex += 1
if self.conActSize != 0:
# while NN have continuous action output.
mu = tf.multiply(
layers.Dense(1, activation="tanh", name="muOut")(interLayers[-1]), self.conActRange
) # mu既正态分布位置参数
sigma = tf.add(
layers.Dense(1, activation="softplus", name="sigmaOut")(interLayers[-1]), EPS
) # sigma既正态分布尺度参数
outputLayersList.append(mu)
outputLayersList.append(sigma)
totalOut = layers.concatenate(outputLayersList, name="totalOut") # package
# ----------Model Compile-----------
model = keras.Model(inputs=stateInput, outputs=totalOut) model = keras.Model(inputs=stateInput, outputs=totalOut)
#actorOPT = optimizers.Adam(learning_rate = self.actorLR) if compileModel: # Compile Model
if compileModel: actorOPT = optimizers.Adam(learning_rate=self.actorLR)
actorOPT = RAdam(self.actorLR)
model.compile(optimizer=actorOPT, loss=self.aLoss()) model.compile(optimizer=actorOPT, loss=self.aLoss())
return model return model
def buildCriticNet(self, inputSize, outputSize,compileModel): def buildCriticNet(self, inputSize: int, outputSize: int, compileModel: bool):
"""build Critic Nueral Net and compile.Output:[Q] """build Critic Nueral Net and compile.Output:[Q]
Args: Args:
inputSize (int): InputLayer Neural Size inputSize (int): input size
outputSize (float): Q size outputSize (int): output size
compileModel (bool): compile Model or not.
Returns: Returns:
keras.Model: return Critic NN keras.Model: return Critic NN
""" """
stateInput = keras.Input(shape=(inputSize,)) # -----------Input Layers-----------
dense0 = layers.Dense(500, activation='relu', stateInput = keras.Input(shape=(inputSize,), name="stateInput")
name='dense0',)(stateInput)
dense1 = layers.Dense(200, activation='relu')(dense0) # -------Intermediate layers--------
dense2 = layers.Dense(100, activation='relu')(dense1) interLayers = []
output = layers.Dense(outputSize)(dense2) interLayersIndex = 0
for neuralUnit in self.NNShape:
thisLayerName = "dense" + str(interLayersIndex)
if interLayersIndex == 0:
interLayers.append(
layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(stateInput)
)
else:
interLayers.append(
layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(interLayers[-1])
)
interLayersIndex += 1
# ----------Output Layers-----------
output = layers.Dense(outputSize, activation=None)(interLayers[-1])
# ----------Model Compile-----------
model = keras.Model(inputs=stateInput, outputs=output) model = keras.Model(inputs=stateInput, outputs=output)
if compileModel: if compileModel:
criticOPT = optimizers.Adam(learning_rate=self.criticLR) criticOPT = optimizers.Adam(learning_rate=self.criticLR)
@ -110,36 +215,50 @@ class PPO(object):
return model return model
# loss Function # loss Function
# critic loss
def cLoss(self): def cLoss(self):
"""Critic Loss function """Critic Loss function"""
"""
def loss(y_true, y_pred): def loss(y_true, y_pred):
# y_true: discountedR # y_true: discountedR
# y_pred: critcV = model.predict(states) # y_pred: critcV = model.predict(states)
advantage = y_true - y_pred # TD error adv = y_true - y_pred # TD error
loss = tf.reduce_mean(tf.square(advantage)) loss = tf.reduce_mean(tf.square(adv))
return loss
return loss return loss
return loss
# actor loss
def aLoss(self): def aLoss(self):
def getDiscreteALoss(nowProbs,oldProbs,advantage): """Actor Loss function"""
def getDiscreteALoss(nowProbs, oldProbs, disOneHotAct, actShape, advantage):
"""get Discrete Action Loss """get Discrete Action Loss
Args: Args:
nowProbs (tf.constant): (length,actionSize) nowProbs (tf.constant): (length,actionProbSize)
oldProbs (tf.constant): (length,actionSize) oldProbs (tf.constant): (length,actionProbSize)
advantage (tf.constant): (length,) advantage (tf.constant): (length,)
Returns: Returns:
tf.constant: (length,) tf.constant: (length,)
""" """
entropy = tf.reduce_mean(tf.math.multiply(nowProbs,tf.math.log(nowProbs+1e-6))) entropy = tf.negative(
ratio = tf.math.divide(nowProbs,oldProbs+1e-6) tf.reduce_mean(tf.math.multiply(nowProbs, tf.math.log(nowProbs + EPS)))
value = tf.math.multiply(ratio,tf.expand_dims(advantage,axis = 1)) )
clipRatio = tf.clip_by_value(ratio,1. - self.EPSILON,1.+self.EPSILON) nowSingleProbs = tf.reduce_mean(tf.multiply(nowProbs, disOneHotAct), axis=1)
clipValue = tf.math.multiply(clipRatio,tf.expand_dims(advantage,axis = 1)) nowSingleProbs = tf.multiply(nowSingleProbs, actShape)
loss = -tf.reduce_mean(tf.math.minimum(value,clipValue)) + self.entropyWeight * entropy oldSingleProbs = tf.reduce_mean(tf.multiply(oldProbs, disOneHotAct), axis=1)
oldSingleProbs = tf.multiply(oldSingleProbs, actShape)
ratio = tf.math.divide(nowSingleProbs, oldSingleProbs + EPS)
value = tf.math.multiply(ratio, advantage)
clipRatio = tf.clip_by_value(ratio, 1.0 - self.clipRange, 1.0 + self.clipRange)
clipValue = tf.math.multiply(clipRatio, advantage)
loss = tf.math.negative(
tf.reduce_mean(tf.math.minimum(value, clipValue))
- tf.multiply(self.entropyWeight, entropy)
)
return loss return loss
def getContinuousALoss(musig, actions, oldProbs, advantage): def getContinuousALoss(musig, actions, oldProbs, advantage):
@ -159,103 +278,286 @@ class PPO(object):
dist = tfp.distributions.Normal(mu, sigma) dist = tfp.distributions.Normal(mu, sigma)
nowProbs = dist.prob(actions) nowProbs = dist.prob(actions)
ratio = tf.math.divide(nowProbs,oldProbs+1e-6)
entropy = tf.reduce_mean(dist.entropy()) entropy = tf.reduce_mean(dist.entropy())
value = tf.math.multiply(ratio,tf.expand_dims(advantage,axis = 1)) ratio = tf.math.divide(nowProbs, oldProbs + EPS)
clipValue = tf.clip_by_value(ratio,1. - self.EPSILON,1.+self.EPSILON) * advantage value = tf.math.multiply(ratio, advantage)
loss = -tf.reduce_mean(tf.math.minimum(value,clipValue)) + self.entropyWeight * entropy clipRatio = tf.clip_by_value(ratio, 1.0 - self.clipRange, 1.0 + self.clipRange)
clipValue = tf.math.multiply(clipRatio, advantage)
loss = tf.negative(
tf.reduce_mean(tf.math.minimum(value, clipValue))
- tf.multiply(self.entropyWeight, entropy)
)
return loss return loss
def loss(y_true, y_pred): def loss(y_true, y_pred):
# y_true: [[disAct1, disAct2, disAct3, mu, sigma]] # y_true: [[disActProb..., conActProbs..., disOneHotActs..., conAct..., advantage]]
# y_pred: muSigma = self.actor(state) = # y_pred: [[disActProb..., mu, sigma...]]
# [[disAct1, disAct2, disAct3, mu, sigma]] totalALoss = 0
oldDisProbs = y_true[:,0:self.disOutputSize]
oldConMusigs = y_true[:,self.disOutputSize:self.disOutputSize+self.conActSize]
conActions = y_true[:,self.disOutputSize+self.conActSize:self.disOutputSize+(self.conActSize*2)]
advantage = y_true[:,-1]
nowDisProbs = y_pred[:,0:self.disOutputSize] # [disAct1, disAct2, disAct3]
nowConMusigs = y_pred[:,self.disOutputSize:] #[musig1,musig2]
totalALoss = tf.constant([0.])
totalActionNum = 0 totalActionNum = 0
advantage = tf.expand_dims(y_true[:, -1], axis=1)
# for nowProb,oldProb in zip(tf.transpose(nowDisProbs,perm=[1,0,2]),tf.transpose(oldDisProbs,perm=[1,0,2])): if self.disActSize != 0:
# while NN have discrete action output.
oldDisProbs = y_true[:, 0 : self.disOutputSize]
nowDisProbs = y_pred[:, 0 : self.disOutputSize] # [disAct1, disAct2, disAct3]
disOneHotActs = y_true[
:,
self.disOutputSize
+ self.conActSize : self.disOutputSize
+ self.conActSize
+ self.disOutputSize,
]
lastDisActShape = 0 lastDisActShape = 0
for shape in self.disActShape: for thisShape in self.disActShape:
thisNowDisProbs = nowDisProbs[:,lastDisActShape:lastDisActShape+shape] thisNowDisProbs = nowDisProbs[:, lastDisActShape : lastDisActShape + thisShape]
thisOldDisProbs = oldDisProbs[:,lastDisActShape:lastDisActShape+shape] thisOldDisProbs = oldDisProbs[:, lastDisActShape : lastDisActShape + thisShape]
discreteALoss = getDiscreteALoss(thisNowDisProbs,thisOldDisProbs,advantage) thisDisOneHotActs = disOneHotActs[
lastDisActShape += shape :, lastDisActShape : lastDisActShape + thisShape
]
discreteALoss = getDiscreteALoss(
thisNowDisProbs, thisOldDisProbs, thisDisOneHotActs, thisShape, advantage
)
lastDisActShape += thisShape
totalALoss += discreteALoss totalALoss += discreteALoss
totalActionNum += 1 totalActionNum += 1.0
# for nowConMusig,conAction,oldPiProb in zip(tf.transpose(nowConMusigs,perm=[1,0,2]),conActions,oldPiProbs): if self.conActSize != 0:
# while NN have continuous action output.
oldConProbs = y_true[:, self.disOutputSize : self.disOutputSize + self.conActSize]
conActions = y_true[
:,
self.disOutputSize
+ self.conActSize : self.disOutputSize
+ self.conActSize
+ self.conActSize,
]
nowConMusigs = y_pred[:, self.disOutputSize :] # [musig1,musig2]
lastConAct = 0 lastConAct = 0
for act in range(self.conActSize): for conAct in range(self.conActSize):
thisNowConMusig = nowConMusigs[:,lastConAct:lastConAct+((act+1)*2)] thisNowConMusig = nowConMusigs[:, lastConAct : lastConAct + self.muSigSize]
thisOldConMusig = oldConMusigs[:,lastConAct:lastConAct+((act+1)*2)] thisOldConProb = oldConProbs[:, conAct : conAct + 1]
thisConAction = conActions[:,act] thisConAction = conActions[:, conAct]
continuousAloss = getContinuousALoss(thisNowConMusig,thisConAction,thisOldConMusig,advantage) continuousAloss = getContinuousALoss(
thisNowConMusig, thisConAction, thisOldConProb, advantage
)
totalALoss += continuousAloss totalALoss += continuousAloss
totalActionNum += 1 totalActionNum += 1.0
lastConAct += self.muSigSize
loss = tf.divide(totalALoss, totalActionNum) loss = tf.divide(totalALoss, totalActionNum)
return loss return loss
return loss return loss
# get Action&V # get Actions&values
def chooseAction(self, state): def chooseAction(self, state: ndarray):
"""Agent choose action to take """Agent choose action to take
Args: Args:
state (np.array): enviroment state state (ndarray): enviroment state
Returns: Returns:
np.array: np.array:
disAct1, actions,
discreteAction1 actions list,2dims like [[0],[1],[1.5]]
disAct2,
discreteAction2
disAct3,
discreteAction3
conAction,
continuousAction
predictResult, predictResult,
actor NN predict Result output actor NN predict Result output
""" """
# let actor choose action,use the normal distribution # let actor choose action,use the normal distribution
# state = np.expand_dims(state,0) # state = np.expand_dims(state,0)
# check state dimension is [1,statesize] # check state dimension is [stateNum,statesize]
if state.ndim != 2: if state.ndim != 2:
state = state.reshape([1,self.stateSize]) stateNum = int(len(state) / self.stateSize)
state = state.reshape([stateNum, self.stateSize])
predictResult = self.actor(state) # get predict result [[disAct1, disAct2, disAct3, musig]] predictResult = self.actor(state) # get predict result [[disAct1, disAct2, disAct3, musig]]
predictResult = predictResult.numpy() # print("predictResult",predictResult)
disAct1Prob = predictResult[0][0:3] # predictResult = predictResult.numpy()
disAct2Prob = predictResult[0][3:6] actions = []
disAct3Prob = predictResult[0][6:8] if self.disActSize != 0:
mu = predictResult[0][8] # while NN have discrete action output.
sigma = predictResult[0][9] lastDisActShape = 0
if math.isnan(mu) or math.isnan(sigma): for shape in self.disActShape:
thisDisActProbs = predictResult[:, lastDisActShape : lastDisActShape + shape]
dist = tfp.distributions.Categorical(probs=thisDisActProbs, dtype=tf.float32)
action = int(dist.sample().numpy()[0])
# action = np.argmax(thisDisActProbs)
actions.append(action)
lastDisActShape += shape
if self.conActSize != 0:
# while NN have continuous action output.
lastConAct = 0
for actIndex in range(self.conActSize):
thisMu = predictResult[:, self.disOutputSize + lastConAct]
thisSig = predictResult[:, self.disOutputSize + lastConAct + 1]
if math.isnan(thisMu) or math.isnan(thisSig):
# check mu or sigma is nan # check mu or sigma is nan
print("mu or sigma is nan") print("chooseAction:mu or sigma is nan")
thisDist = np.random.normal(loc=thisMu, scale=thisSig)
actions.append(np.clip(thisDist, -self.conActRange, self.conActRange))
lastConAct += 2
return actions, predictResult
disAct1 = np.argmax(disAct1Prob) # WS 0 or 1 or 2 def trainCritcActor(
disAct2 = np.argmax(disAct2Prob) # AD 0 or 1 or 2 self,
disAct3 = np.argmax(disAct3Prob) # mouse shoot 0 or 1 states: ndarray,
normDist = np.random.normal(loc=mu, scale=sigma) # normalDistribution oldActorResult: ndarray,
conAction = np.clip(normDist, -self.conActRange, actions: ndarray,
self.conActRange) # 在正态分布中随机get一个action rewards: ndarray,
return disAct1, disAct2, disAct3, conAction, predictResult dones: ndarray,
nextState: ndarray,
epochs: int = None,
):
"""train critic&actor use PPO ways
def getCriticV(self, state): Args:
states (ndarray): states
oldActorResult (ndarray): actor predict result
actions (ndarray): predicted actions include both discrete actions and continuous actions
rewards (ndarray): rewards from enviroment
dones (ndarray): dones from enviroment
nextState (ndarray): next state from enviroment
epochs (int, optional): train epochs,default to ppoConfig. Defaults to None.
Returns:
tf.constant: criticLoss, actorLoss
"""
if epochs == None:
epochs = self.trainEpochs
criticValues = self.getCriticV(state=states)
discountedR = self.discountReward(nextState, criticValues, dones, rewards)
advantage = self.getGAE(discountedR, criticValues)
criticLoss = self.trainCritic(states, discountedR, epochs)
actorLoss = self.trainActor(states, oldActorResult, actions, advantage, epochs)
# print("A_Loss:", actorLoss, "C_Loss:", criticLoss)
return criticLoss, actorLoss
def trainCritic(self, states: ndarray, discountedR: ndarray, epochs: int = None):
"""critic NN trainning function
Args:
states (ndarray): states
discountedR (ndarray): discounted rewards
epochs (int, optional): train epochs,default to ppoConfig. Defaults to None.
Returns:
tf.constant: all critic losses
"""
if epochs == None:
epochs = self.trainEpochs
his = self.critic.fit(x=states, y=discountedR, epochs=epochs, verbose=0)
return his.history["loss"]
def trainActor(
self,
states: ndarray,
oldActorResult: ndarray,
actions: ndarray,
advantage: ndarray,
epochs: int = None,
):
"""actor NN trainning function
Args:
states (ndarray): states
oldActorResult (ndarray): actor predict results
actions (ndarray): acotor predict actions
advantage (ndarray): GAE advantage
epochs (int, optional): train epochs,default to ppoConfig. Defaults to None.
Returns:
tf.constant: all actor losses
"""
# Trian Actor
# states: Buffer States
# actions: Buffer Actions
# discountedR: Discounted Rewards
# Epochs: just Epochs
if epochs == None:
epochs = self.trainEpochs
actions = np.asarray(actions, dtype=np.float32)
disActions = actions[:, 0 : self.disActSize]
conActions = actions[:, self.disActSize :]
oldDisProbs = oldActorResult[:, 0 : self.disOutputSize] # [disAct1, disAct2, disAct3]
oldConMusigs = oldActorResult[:, self.disOutputSize :] # [musig1,musig2]
if self.disActSize != 0:
disOneHotActs = self.getOneHotActs(disActions)
if self.conActSize != 0:
# while NN have discrete6 & continuous actions output.
oldPiProbs = self.conProb(oldConMusigs[:, 0], oldConMusigs[:, 1], conActions)
# pack [oldDisProbs,oldPiProbs,conActions,advantage] as y_true
y_true = np.hstack((oldDisProbs, oldPiProbs, disOneHotActs, conActions, advantage))
else:
# while NN have only discrete actions output.
# pack [oldDisProbs,advantage] as y_true
y_true = np.hstack((oldDisProbs, disOneHotActs, advantage))
else:
if self.conActSize != 0:
# while NN have only continuous action output.
oldPiProbs = self.conProb(oldConMusigs[:, 0], oldConMusigs[:, 1], conActions)
# pack [oldPiProbs,conActions,advantage] as y_true
y_true = np.hstack((oldPiProbs, conActions, advantage))
else:
print("trainActor:disActSize & conActSize error")
time.sleep(999999)
# assembly Actions history
# train start
if np.any(tf.math.is_nan(y_true)):
print("y_true got nan")
print("y_true", y_true)
his = self.actor.fit(x=states, y=y_true, epochs=epochs, verbose=0)
if np.any(tf.math.is_nan(his.history["loss"])):
print("his.history['loss'] is nan!")
print(his.history["loss"])
return his.history["loss"]
def saveWeights(self, score: float):
"""save now NN's Weight. Use "models.save_weights" method.
Save as "tf" format "ckpt" file.
Args:
score (float): now score
"""
actor_save_dir = (
self.saveDir + datetime.datetime.now().strftime("%H%M%S") + "/actor/" + "actor.ckpt"
)
critic_save_dir = (
self.saveDir + datetime.datetime.now().strftime("%H%M%S") + "/critic/" + "critic.ckpt"
)
self.actor.save_weights(actor_save_dir, save_format="tf")
self.critic.save_weights(critic_save_dir, save_format="tf")
# create an empty file named as score to recored score
score_dir = (
self.saveDir + datetime.datetime.now().strftime("%H%M%S") + "/" + str(round(score))
)
scorefile = open(score_dir, "w")
scorefile.close()
print("Model's Weights Saved")
def loadWeightToModels(self, loadDir: str):
"""load NN Model. Use "models.load_weights()" method.
Load "tf" format "ckpt" file.
Args:
loadDir (str): Model dir
"""
actorDir = loadDir + "/actor/" + "actor.ckpt"
criticDir = loadDir + "/critic/" + "critic.ckpt"
self.actor.load_weights(actorDir)
self.critic.load_weights(criticDir)
print("++++++++++++++++++++++++++++++++++++")
print("++++++++++++Model Loaded++++++++++++")
print(loadDir)
print("++++++++++++++++++++++++++++++++++++")
def getCriticV(self, state: ndarray):
"""get Critic predict V value """get Critic predict V value
Args: Args:
state (np.array): Env state state (ndarray): Env state
Returns: Returns:
tensor: retrun Critic predict result tensor: retrun Critic predict result
@ -263,41 +565,84 @@ class PPO(object):
# if state.ndim < 2: # if state.ndim < 2:
# state = np.expand_dims(state,0) # state = np.expand_dims(state,0)
if state.ndim != 2: if state.ndim != 2:
state = state.reshape([1,self.stateSize]) stateNum = int(len(state) / self.stateSize)
state = state.reshape([stateNum, self.stateSize])
return self.critic.predict(state) return self.critic.predict(state)
def discountReward(self, nextState, rewards): def discountReward(self, nextState: ndarray, values: ndarray, dones: ndarray, rewards: ndarray):
"""Discount future rewards """Discount future rewards
Args: Args:
nextState (np.array): next Env state nextState (ndarray): next Env state
rewards (np.array): reward list of this episode values (ndarray): critic predict values
dones (ndarray): dones from enviroment
rewards (ndarray): reward list of this episode
Returns: Returns:
np.array: discounted rewards list,same shape as rewards that input ndarray: discounted rewards list,same shape as rewards that input
"""
""" """
# 降低未来的rewards
nextV = self.getCriticV(nextState) nextV = self.getCriticV(nextState)
dones = 1 - dones
discountedRewards = [] discountedRewards = []
for r in rewards[::-1]: for i in reversed(range(len(rewards))):
nextV = r + self.GAMMA*nextV nextV = rewards[i] + dones[i] * self.gamma * nextV
discountedRewards.append(nextV) discountedRewards.append(nextV)
discountedRewards.reverse() # \ESREVER/ discountedRewards.reverse() # reverse
discountedRewards = np.squeeze(discountedRewards) discountedRewards = np.squeeze(discountedRewards)
discountedRewards = np.expand_dims(discountedRewards, axis=1) discountedRewards = np.expand_dims(discountedRewards, axis=1)
# discountedRewards = np.array(discountedRewards)[:, np.newaxis] # discountedRewards = np.array(discountedRewards)[:, np.newaxis]
return discountedRewards return discountedRewards
"""
"""
nextV = self.getCriticV(nextState)
discountedRewards = []
for r in rewards[::-1]:
nextV = r + self.gamma * nextV
discountedRewards.append(nextV)
discountedRewards.reverse() # reverse
discountedRewards = np.squeeze(discountedRewards)
discountedRewards = np.expand_dims(discountedRewards, axis=1)
# discountedRewards = np.array(discountedRewards)[:, np.newaxis]
print(discountedRewards)
return discountedRewards
"""
g = 0
discountedRewards = []
lastValue = self.getCriticV(nextState)
values = np.append(values, lastValue, axis=0)
dones = 1 - dones
for i in reversed(range(len(rewards))):
delta = rewards[i] + self.gamma * values[i + 1] * dones[i] - values[i]
g = delta + self.gamma * self.lmbda * dones[i] * g
discountedRewards.append(g + values[i])
discountedRewards.reverse()
return np.asarray(discountedRewards)
def conProb(self, mu, sig, x): def getGAE(self, discountedRewards: ndarray, values: ndarray):
"""compute GAE adcantage
Args:
discountedRewards (ndarray): discounted rewards
values (ndarray): critic predict values
Returns:
ndarray: GAE advantage
"""
advantage = discountedRewards - values
advantage = (advantage - np.mean(advantage)) / (np.std(advantage) + EPS)
return advantage
def conProb(self, mu: ndarray, sig: ndarray, x: ndarray):
"""calculate probability when x in Normal distribution(mu,sigma) """calculate probability when x in Normal distribution(mu,sigma)
Args: Args:
mu (np,array): mu mu (ndarray): mu
sig (np.array): sigma sig (ndarray): sigma
x (np.array): x x (ndarray): x
Returns: Returns:
np.array: probabilities ndarray: probability
""" """
# 获取在正态分布mu,sig下当取x值时的概率 # 获取在正态分布mu,sig下当取x值时的概率
# return shape : (length,1) # return shape : (length,1)
@ -313,116 +658,58 @@ class PPO(object):
# prob = dist*tf.exp(-tf.square(x-mu)/(2.*tf.square(sig))) # prob = dist*tf.exp(-tf.square(x-mu)/(2.*tf.square(sig)))
return prob return prob
def trainCritcActor(self, states, actions, rewards, nextState, criticEpochs, actorEpochs): def getOneHotActs(self, disActions):
# Train ActorNN and CriticNN """one hot action encoder
# states: Buffer States
# actions: Buffer Actions
# rewards: Buffer Rewards,没有Discount处理
# nextState: 下一个单独state
# criticEpochs: just criticNN'Epochs
# acotrEpochs: just acotrNN'Epochs
discountedR = self.discountReward(nextState, rewards)
criticMeanLoss = self.trainCritic(states, discountedR, criticEpochs)
actorMeanLoss = self.trainActor(
states, actions, discountedR, actorEpochs)
print("A_Loss:", actorMeanLoss, "C_Loss:", criticMeanLoss)
return actorMeanLoss, criticMeanLoss
def trainCritic(self, states, discountedR, epochs):
# Trian Critic
# states: Buffer States
# discountedR: Discounted Rewards
# Epochs: just Epochs
# IDK why this should be list...It just work...
# If discountR in np.array type it will throw 'Failed to find data adapter that can handle'
# discountedR = discountedR.tolist()
his = self.critic.fit(x=states, y=discountedR,
epochs=epochs, verbose=0)
return np.mean(his.history['loss'])
def trainActor(self, states, actions, discountedR, epochs):
"""Actor NN trainning function
Args: Args:
states (np.array): Env states disActions (ndarray): discrete actions
actions (np.array): action history
discountedR (np.array): discountedR
epochs (int): epochs,how many time NN learning
Returns: Returns:
Average actor loss: this learning round's average actor loss ndarray: one hot actions
""" """
# Trian Actor actIndex = 0
# states: Buffer States for thisShape in self.disActShape:
# actions: Buffer Actions thisActs = disActions[:, actIndex]
# discountedR: Discounted Rewards thisOneHotAct = tf.squeeze(tf.one_hot(thisActs, thisShape)).numpy()
# Epochs: just Epochs if actIndex == 0:
oneHotActs = thisOneHotAct
else:
oneHotActs = np.append(oneHotActs, thisOneHotAct, axis=1)
actIndex += 1
return oneHotActs
states = np.asarray(states) def getAverageEntropy(self, probs: ndarray):
actions = np.asarray(actions, dtype=np.float32) """get average dis&con ACT Entropys
# predict with old Actor NN
oldActorResult = self.actor.predict(states)
# assembly Actions history
disActions = actions[:,0:self.disActSize]
conActions = actions[:,self.disActSize:]
# assembly predictResult as old Actor's Result
oldDisProbs = oldActorResult[:,0:self.disOutputSize] # [disAct1, disAct2, disAct3]
oldConMusigs = oldActorResult[:,self.disOutputSize:] # [musig1,musig2]
oldPiProbs = self.conProb(oldConMusigs[:, 0], oldConMusigs[:, 1], conActions)
criticV = self.critic.predict(states)
advantage = copy.deepcopy(discountedR - criticV)
# pack [oldDisProbs,oldPiProbs,conActions,advantage] as y_true
y_true = np.hstack((oldDisProbs,oldPiProbs,conActions,advantage))
# train start
if np.any(tf.math.is_nan(y_true)):
print("y_true got nan")
print("oldConMusigs",oldConMusigs)
print("oldPiProbs",oldPiProbs)
print("conActions",conActions)
print("oldConMusigs",oldConMusigs)
his = self.actor.fit(x=states, y=y_true, epochs=epochs, verbose=0)
if np.any(tf.math.is_nan(his.history['loss'])):
print("his.history['loss'] is nan!")
print(his.history['loss'])
return np.mean(his.history['loss'])
def saveWeights(self,score = None):
"""save now NN's Weight. Use "models.save_weights" method.
Save as "tf" format "ckpt" file.
Args: Args:
score (int): now score probs (ndarray): actor NN predict result
Returns:
float: average total entropy
list: discrete entropys
list: continuous entropys
""" """
actor_save_dir = self.saveDir+datetime.datetime.now().strftime("%H%M%S") + "/actor/" + "actor.ckpt" discreteEntropys = []
critic_save_dir = self.saveDir+datetime.datetime.now().strftime("%H%M%S") + "/critic/" + "critic.ckpt" continuousEntropys = []
self.actor.save_weights(actor_save_dir, save_format="tf") if self.disActSize != 0:
self.critic.save_weights(critic_save_dir, save_format="tf") disProbs = probs[:, 0 : self.disOutputSize]
if score != None: lastDisActIndex = 0
# create an empty file named as score to recored score for actShape in self.disActShape:
score_dir = self.saveDir+datetime.datetime.now().strftime("%H%M%S") + "/" + str(round(score)) thisDisProbs = disProbs[:, lastDisActIndex : lastDisActIndex + actShape]
scorefile = open(score_dir,'w') lastDisActIndex += actShape
scorefile.close() discreteEntropys.append(
print("Model's Weights Saved") tf.negative(
tf.reduce_mean(
def loadWeightToModels(self,loadDir): tf.math.multiply(thisDisProbs, tf.math.log(thisDisProbs + EPS))
"""load NN Model. Use "models.load_weights()" method. )
Load "tf" format "ckpt" file. )
)
Args: if self.conActSize != 0:
loadDir (string): Model dir conProbs = probs[:, self.disOutputSize :]
""" conActIndex = 0
actorDir = loadDir + "/actor/" + "actor.ckpt" for i in range(self.conActSize):
criticDir = loadDir + "/critic/" + "critic.ckpt" thisConProbs = conProbs[:, conActIndex : conActIndex + 2]
self.actor.load_weights(actorDir) conActIndex += 2
self.critic.load_weights(criticDir) continuousEntropys.append(tf.reduce_mean(thisConProbs[:, 1]))
averageEntropy = np.mean([np.mean(discreteEntropys), np.mean(continuousEntropys)])
print("++++++++++++++++++++++++++++++++++++") return averageEntropy, discreteEntropys, continuousEntropys
print("++++++++++++Model Loaded++++++++++++")
print(loadDir)
print("++++++++++++++++++++++++++++++++++++")

View File

@ -0,0 +1,65 @@
import numpy as np
class PPOBuffer(object):
def __init__(self):
self.states = []
self.actorProbs = []
self.actions = []
self.rewards = []
self.dones = []
print("√√√√√Buffer Initialized Success√√√√√")
def clearBuffer(self):
self.states = []
self.actorProbs = []
self.actions = []
self.rewards = []
self.dones = []
def getStates(self):
return self.standDims(np.asarray(self.states))
def getActorProbs(self):
return self.standDims(np.asarray(self.actorProbs))
def getActions(self):
return self.standDims(np.asarray(self.actions))
def getRewards(self):
return self.standDims(np.asarray(self.rewards))
def getDones(self):
return self.standDims(np.asarray(self.dones))
def saveState(self, state):
self.states.append(state)
def saveAction(self, action):
self.actions.append(action)
def saveReward(self, reward):
self.rewards.append(reward)
def standDims(self, data):
# standarlize data's dimension
if np.ndim(data) > 2:
return np.squeeze(data, axis=1)
elif np.ndim(data) < 2:
return np.expand_dims(data, axis=1)
else:
return np.asarray(data)
def saveBuffers(self, state, actorProb, action, reward, done):
self.states.append(state)
self.actorProbs.append(actorProb)
self.actions.append(action)
self.rewards.append(reward)
self.dones.append(done)
"""
print("self.states", self.states)
print("self.actions", self.actions)
print("self.rewards", self.rewards)
print("self.dones", self.dones)
print("self.values", self.values)
"""

View File

@ -0,0 +1,15 @@
import datetime
from typing import NamedTuple, Optional
class PPOConfig(NamedTuple):
NNShape: list = [256, 256, 128]
actorLR: float = 2e-3 # Actor Net Learning
criticLR: float = 2e-3 # Critic Net Learning
gamma: float = 0.99
lmbda: float = 0.95
clipRange: float = 0.20
entropyWeight: float = 1e-2
trainEpochs: int = 8
saveDir: str = "PPO-Model/" + datetime.datetime.now().strftime("%m%d-%H%M") + "/"
loadModelDir: Optional[str] = None

View File

@ -0,0 +1,58 @@
from turtle import color
import matplotlib.pyplot as plt
class PPOHistory(object):
def __init__(self):
self.meanRewards = []
self.entropys = []
self.actorLosses = []
self.criticLosses = []
def saveHis(self, rewards, entropys, aLosses, cLosses):
self.meanRewards.extend([rewards])
self.entropys.extend([entropys])
self.actorLosses.extend(aLosses)
self.criticLosses.extend(cLosses)
def drawHis(self):
plt.figure(figsize=(21, 13), facecolor="#011627")
ax = plt.subplot(2, 2, 1)
ax.set_facecolor("#011627")
ax.spines["top"].set_color("#c9d2df")
ax.spines["bottom"].set_color("#c9d2df")
ax.spines["left"].set_color("#c9d2df")
ax.spines["right"].set_color("#c9d2df")
ax.plot(
range(len(self.meanRewards)), self.meanRewards, color="#c9d2df", label="AverageRewards"
)
ax.set_title("meanRewards", color="#c9d2df")
ax = plt.subplot(2, 2, 2)
ax.set_facecolor("#011627")
ax.spines["top"].set_color("#c9d2df")
ax.spines["bottom"].set_color("#c9d2df")
ax.spines["left"].set_color("#c9d2df")
ax.spines["right"].set_color("#c9d2df")
ax.plot(range(len(self.entropys)), self.entropys, color="#c9d2df", label="AverageEntropys")
ax.set_title("entropys", color="#c9d2df")
ax = plt.subplot(2, 2, 3)
ax.set_facecolor("#011627")
ax.spines["top"].set_color("#c9d2df")
ax.spines["bottom"].set_color("#c9d2df")
ax.spines["left"].set_color("#c9d2df")
ax.spines["right"].set_color("#c9d2df")
ax.plot(
range(len(self.actorLosses)), self.actorLosses, color="#c9d2df", label="actorLosses"
)
ax.set_title("actorLosses", color="#c9d2df")
ax = plt.subplot(2, 2, 4)
ax.set_facecolor("#011627")
ax.spines["top"].set_color("#c9d2df")
ax.spines["bottom"].set_color("#c9d2df")
ax.spines["left"].set_color("#c9d2df")
ax.spines["right"].set_color("#c9d2df")
ax.plot(
range(len(self.criticLosses)), self.criticLosses, color="#c9d2df", label="criticLosses"
)
ax.set_title("criticLosses", color="#c9d2df")
plt.show()

View File

@ -1,8 +1,8 @@
import mlagents_envs
from mlagents_envs.base_env import ActionTuple from mlagents_envs.base_env import ActionTuple
from mlagents_envs.environment import UnityEnvironment from mlagents_envs.environment import UnityEnvironment
import numpy as np import numpy as np
from numpy import ndarray
class makeEnv(object): class makeEnv(object):
@ -22,69 +22,71 @@ class makeEnv(object):
self.BEHA_SPECS = self.env.behavior_specs self.BEHA_SPECS = self.env.behavior_specs
self.BEHA_NAME = list(self.BEHA_SPECS)[0] self.BEHA_NAME = list(self.BEHA_SPECS)[0]
self.SPEC = self.BEHA_SPECS[self.BEHA_NAME] self.SPEC = self.BEHA_SPECS[self.BEHA_NAME]
self.OBSERVATION_SPECS = self.SPEC.observation_specs[ self.OBSERVATION_SPECS = self.SPEC.observation_specs[0] # observation spec
0
] # observation spec
self.ACTION_SPEC = self.SPEC.action_spec # action specs self.ACTION_SPEC = self.SPEC.action_spec # action specs
self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size #  連続的な動作のSize self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size #  連続的な動作のSize
self.DISCRETE_SHAPE = list(self.ACTION_SPEC.discrete_branches)
self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size #  離散的な動作のSize self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size #  離散的な動作のSize
self.STATE_SIZE = ( self.STATE_SIZE = self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE # 環境観測データ数
self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE
) # 環境観測データ数
print("√√√√√Enviroment Initialized Success√√√√√") print("√√√√√Enviroment Initialized Success√√√√√")
def step( def step(
self, self,
discreteActions=None, actions: list,
continuousActions=None, behaviorName: ndarray = None,
behaviorName=None, trackedAgent: ndarray = None,
trackedAgent=None,
): ):
"""change ations list to ActionTuple then send it to enviroment
Args:
actions (list): PPO chooseAction output action list
behaviorName (ndarray, optional): behaviorName. Defaults to None.
trackedAgent (ndarray, optional): trackedAgentID. Defaults to None.
Returns:
ndarray: nextState, reward, done, loadDir, saveNow
"""
# take action to enviroment # take action to enviroment
# return mextState,reward,done # return mextState,reward,done
if self.DISCRETE_SIZE == 0:
# create empty discrete action
discreteActions = np.asarray([[0]])
else:
# create discrete action from actions list
discreteActions = np.asanyarray([actions[0:self.DISCRETE_SIZE]])
if self.CONTINUOUS_SIZE == 0:
# create empty continuous action
continuousActions = np.asanyarray([[0.0]])
else:
# create continuous actions from actions list
continuousActions = np.asanyarray(actions[self.DISCRETE_SIZE:])
# check if arg is include None or IS None
try:
isDisNone = discreteActions.any() is None
if discreteActions.all() is None:
print("step() Error!:discreteActions include None")
except:
isDisNone = True
try:
isConNone = continuousActions.any() is None
if continuousActions.all() is None:
print("step() Error!:continuousActions include None")
except:
isConNone = True
if isDisNone:
# if discreteActions is enpty just give nothing[[0]] to Enviroment
discreteActions = np.array([[0]], dtype=np.int)
if isConNone:
# if continuousActions is enpty just give nothing[[0]] to Enviroment
continuousActions = np.array([[0]], dtype=np.float)
if behaviorName is None: if behaviorName is None:
behaviorName = self.BEHA_NAME behaviorName = self.BEHA_NAME
if trackedAgent is None: if trackedAgent is None:
trackedAgent = self.TRACKED_AGENT trackedAgent = self.TRACKED_AGENT
# create actionTuple # create actionTuple
thisActionTuple = ActionTuple( thisActionTuple = ActionTuple(continuous=continuousActions, discrete=discreteActions)
continuous=continuousActions, discrete=discreteActions
)
# take action to env # take action to env
self.env.set_actions( self.env.set_actions(behavior_name=behaviorName, action=thisActionTuple)
behavior_name=behaviorName, action=thisActionTuple
)
self.env.step() self.env.step()
# get nextState & reward & done after this action # get nextState & reward & done after this action
nextState, reward, done, loadDir, saveNow = self.getSteps( nextState, reward, done, loadDir, saveNow = self.getSteps(behaviorName, trackedAgent)
behaviorName, trackedAgent
)
return nextState, reward, done, loadDir, saveNow return nextState, reward, done, loadDir, saveNow
def getSteps(self, behaviorName=None, trackedAgent=None): def getSteps(self, behaviorName=None, trackedAgent=None):
"""get enviroment now observations.
Include State, Reward, Done, LoadDir, SaveNow
Args:
behaviorName (_type_, optional): behaviorName. Defaults to None.
trackedAgent (_type_, optional): trackedAgent. Defaults to None.
Returns:
ndarray: nextState, reward, done, loadDir, saveNow
"""
# get nextState & reward & done # get nextState & reward & done
if behaviorName is None: if behaviorName is None:
behaviorName = self.BEHA_NAME behaviorName = self.BEHA_NAME
@ -94,25 +96,17 @@ class makeEnv(object):
if trackedAgent is None: if trackedAgent is None:
trackedAgent = self.TRACKED_AGENT trackedAgent = self.TRACKED_AGENT
if ( if trackedAgent in decisionSteps: # ゲーム終了していない場合、環境状態がdecision_stepsに保存される
trackedAgent in decisionSteps
): # ゲーム終了していない場合、環境状態がdecision_stepsに保存される
nextState = decisionSteps[trackedAgent].obs[0] nextState = decisionSteps[trackedAgent].obs[0]
nextState = np.reshape( nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE])
nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
)
saveNow = nextState[0][-1] saveNow = nextState[0][-1]
loadDir = nextState[0][-3:-1] loadDir = nextState[0][-3:-1]
nextState = nextState[0][:-3] nextState = nextState[0][:-3]
reward = decisionSteps[trackedAgent].reward reward = decisionSteps[trackedAgent].reward
done = False done = False
if ( if trackedAgent in terminalSteps: # ゲーム終了した場合、環境状態がterminal_stepsに保存される
trackedAgent in terminalSteps
): # ゲーム終了した場合、環境状態がterminal_stepsに保存される
nextState = terminalSteps[trackedAgent].obs[0] nextState = terminalSteps[trackedAgent].obs[0]
nextState = np.reshape( nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE])
nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
)
saveNow = nextState[0][-1] saveNow = nextState[0][-1]
loadDir = nextState[0][-3:-1] loadDir = nextState[0][-3:-1]
nextState = nextState[0][:-3] nextState = nextState[0][:-3]
@ -121,9 +115,16 @@ class makeEnv(object):
return nextState, reward, done, loadDir, saveNow return nextState, reward, done, loadDir, saveNow
def reset(self): def reset(self):
"""reset enviroment and get observations
Returns:
ndarray: nextState, reward, done, loadDir, saveNow
"""
self.env.reset() self.env.reset()
nextState, reward, done, loadDir, saveNow = self.getSteps() nextState, reward, done, loadDir, saveNow = self.getSteps()
return nextState, reward, done, loadDir, saveNow return nextState, reward, done, loadDir, saveNow
def render(self): def render(self):
"""render enviroment
"""
self.env.render() self.env.render()

View File

@ -1,29 +0,0 @@
import numpy as np
class buffer(object):
def __init__(self):
self.states = []
self.actions = []
self.rewards = []
print("√√√√√Buffer Initialized Success√√√√√")
def clearBuffer(self):
self.states = []
self.actions = []
self.rewards = []
def getStates(self):
return np.asarray(self.states)
def getActions(self):
return np.asarray(self.actions)
def getRewards(self):
return np.asarray(self.rewards)
def saveState(self,state):
self.states.append(state)
def saveAction(self,action):
self.actions.append(action)
def saveReward(self,reward):
self.rewards.append(reward)
def saveBuffers(self,state,action,reward):
self.states.append(state)
self.actions.append(action)
self.rewards.append(reward)

View File

@ -1,356 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import aimBotEnv\n",
"import PPO\n",
"import buffer\n",
"import numpy as np\n",
"\n",
"import tensorflow as tf\n",
"import time\n",
"import datetime\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Attempts to allocate only the GPU memory needed for allocation\n",
"physical_devices = tf.config.list_physical_devices('GPU')\n",
"tf.config.experimental.set_memory_growth(physical_devices[0], True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Env\n",
"ENV_PATH = \"./Build-CloseEnemyCut/Aimbot-PPO\"\n",
"WORKER_ID = 1\n",
"BASE_PORT = 200\n",
"\n",
"MAX_EP = 1000\n",
"EP_LENGTH = 100000\n",
"GAMMA = 0.99 # discount future reward (UP?)\n",
"EPSILON = 0.2 # clip Ratio range[1-EPSILON,1+EPSILON]\n",
"ACTOR_LR = 1e-5 # LR\n",
"CRITIC_LR = 2e-5 # LR\n",
"BATCH = 256 # learning step\n",
"ACTOR_EPOCH = 15 # epoch\n",
"CRITIC_EPOCH = 15 # epoch\n",
"ENTROPY_WHEIGHT = 0.001 # sigma's entropy in Actor loss\n",
"ACTION_INTERVAL = 1 # take action every ACTION_INTERVAL steps\n",
"\n",
"\n",
"TRAIN = True\n",
"SAVE_DIR = \"PPO-Model/\" + datetime.datetime.now().strftime(\"%m%d%H%M\") + \"/\"\n",
"LOAD_DIR = None\n",
"\n",
"CTN_ACTION_RANGE = 10\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"√√√√√Enviroment Initialized Success√√√√√\n",
"√√√√√Buffer Initialized Success√√√√√\n",
"No loadDir specified,Create a New Model\n",
"CONTINUOUS_SIZE 1\n",
"DISCRETE_SIZE 5\n",
"STATE_SIZE 30\n"
]
}
],
"source": [
"# initialize enviroment & buffer class\n",
"env = aimBotEnv.makeEnv(\n",
" envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT\n",
")\n",
"epBuffer = buffer.buffer()\n",
"\n",
"STATE_SIZE = env.STATE_SIZE\n",
"CONTINUOUS_SIZE = env.CONTINUOUS_SIZE\n",
"DISCRETE_SIZE = env.DISCRETE_SIZE\n",
"_, _, _, loadDir, _ = env.getSteps()\n",
"\n",
"# check load model or not\n",
"if np.any(loadDir == 0):\n",
" # create a new model\n",
" print(\"No loadDir specified,Create a New Model\")\n",
" LOAD_DIR = None\n",
"else:\n",
" # load model\n",
" loadDirDateSTR = str(int(loadDir[0]))\n",
" loadDirTimeSTR = str(int(loadDir[1]))\n",
" if len(loadDirDateSTR) != 8:\n",
" # fill lost 0 while converse float to string\n",
" for _ in range(8 - len(loadDirDateSTR)):\n",
" loadDirDateSTR = \"0\" + loadDirDateSTR\n",
" if len(loadDirTimeSTR) != 6:\n",
" # fill lost 0 while converse float to string\n",
" for _ in range(6 - len(loadDirTimeSTR)):\n",
" loadDirTimeSTR = \"0\" + loadDirTimeSTR\n",
" LOAD_DIR = \"PPO-Model/\" + loadDirDateSTR + \"/\" + loadDirTimeSTR\n",
" print(\"Load Model:\")\n",
" print(LOAD_DIR)\n",
"\n",
"print(\"CONTINUOUS_SIZE\", CONTINUOUS_SIZE)\n",
"print(\"DISCRETE_SIZE\", DISCRETE_SIZE)\n",
"print(\"STATE_SIZE\", STATE_SIZE)\n",
"\n",
"disActShape = [3, 3, 2]\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def actToKey(disAct1,disAct2,disAct3,conAct):\n",
" kW = 0\n",
" kS = 0\n",
" kA = 0\n",
" kD = 0\n",
" mouseShoot = 0\n",
" if disAct1 == 0:\n",
" kW = 0\n",
" kS = 1\n",
" elif disAct1 == 1:\n",
" kW = 0\n",
" kS = 0\n",
" elif disAct1 == 2:\n",
" kW = 1\n",
" kS = 0\n",
" if disAct2 == 0:\n",
" kA = 0\n",
" kD = 1\n",
" elif disAct2 == 1:\n",
" kA = 0\n",
" kD = 0\n",
" elif disAct2 == 2:\n",
" kA = 1\n",
" kD = 0\n",
" mouseShoot = disAct3\n",
" return kW,kS,kA,kD,mouseShoot,conAct"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"EP 0 START\n",
"√√√√√Buffer Initialized Success√√√√√\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\core\\fromnumeric.py:3474: RuntimeWarning: Mean of empty slice.\n",
" return _methods._mean(a, axis=axis, dtype=dtype,\n",
"c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\core\\_methods.py:189: RuntimeWarning: invalid value encountered in double_scalars\n",
" ret = ret.dtype.type(ret / rcount)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"A_Loss: 0.4477495511372884 C_Loss: 3.155759557088216\n",
"A_Loss: 0.14549287557601928 C_Loss: 0.5123071213563283\n",
"A_Loss: 0.055241942902406055 C_Loss: 0.13002794484297434\n",
"A_Loss: 0.057325509190559384 C_Loss: 0.11068039039770762\n",
"A_Loss: 0.04376962607105573 C_Loss: 0.03923700377345085\n"
]
}
],
"source": [
"bestScore = 200.0\n",
"stopTrainCounter = 0\n",
"\n",
"totalRewardHis = []\n",
"totalActorLossHis = []\n",
"totalCriticLossHis = []\n",
"epHis = []\n",
"maxTotalReward = -99999999999\n",
"\n",
"for ep in range(MAX_EP):\n",
" print(\"EP \", ep, \" START\")\n",
" # first time run game\n",
" s, _, _, _, _ = env.reset()\n",
" if ep == 0:\n",
" epBuffer = buffer.buffer()\n",
" s = s.reshape([STATE_SIZE])\n",
" agent = PPO.PPO(\n",
" stateSize=STATE_SIZE,\n",
" disActShape=disActShape,\n",
" conActSize=1,\n",
" conActRange=CTN_ACTION_RANGE,\n",
" criticLR=CRITIC_LR,\n",
" actorLR=ACTOR_LR,\n",
" gamma=GAMMA,\n",
" epsilon=EPSILON,\n",
" entropyWeight=ENTROPY_WHEIGHT,\n",
" saveDir=SAVE_DIR,\n",
" loadModelDir=LOAD_DIR,\n",
" )\n",
" step = 0\n",
" done = False\n",
" stopTrainCounter -= 1\n",
" epHis.append(ep)\n",
"\n",
" # reset total reward\n",
" epTotalReward = 0\n",
"\n",
" # Recorder list\n",
" epStepHis = []\n",
" epRewardHis = []\n",
" epActorLossHis = []\n",
" epCriticLossHis = []\n",
"\n",
" # save weight immediately?\n",
" saveNow = 0\n",
"\n",
" while not done:\n",
" step += 1\n",
" if (\n",
" step % ACTION_INTERVAL == 0\n",
" ): # take action every ACTION_INTERVAL steps\n",
" epStepHis.append(step)\n",
" (\n",
" disAct1,\n",
" disAct2,\n",
" disAct3,\n",
" conAct,\n",
" predictResult,\n",
" ) = agent.chooseAction(s)\n",
" kW, kS, kA, kD, mouseShoot, mouseMove = actToKey(\n",
" disAct1, disAct2, disAct3, conAct\n",
" )\n",
"\n",
" nextState, thisReward, done, _, saveNow = env.step(\n",
" discreteActions=np.array([[kW, kS, kA, kD, mouseShoot]]),\n",
" continuousActions=np.array([[mouseMove]]),\n",
" )\n",
"\n",
" epTotalReward += thisReward\n",
" epBuffer.saveBuffers(\n",
" s, [disAct1, disAct2, disAct3, conAct], thisReward\n",
" )\n",
" else:\n",
" disActs = np.array([[0, 0, 0, 0, 0]])\n",
" conActs = np.array([[0]])\n",
"\n",
" nextState, thisReward, done, _, saveNow = env.step(\n",
" discreteActions=disActs, continuousActions=conActs\n",
" )\n",
" epTotalReward += thisReward\n",
" nextState = nextState.reshape([STATE_SIZE])\n",
" s = nextState\n",
"\n",
" if done:\n",
" print(\"EP OVER!\")\n",
" if saveNow != 0:\n",
" print(\"SAVENOW!\")\n",
" saveNow = 0\n",
" agent.saveWeights()\n",
" # update PPO after Batch step or GameOver\n",
" if (step + 1) % BATCH == 0 or done:\n",
" bs = epBuffer.getStates()\n",
" ba = epBuffer.getActions()\n",
" br = epBuffer.getRewards()\n",
" epBuffer.clearBuffer()\n",
" if TRAIN:\n",
" epActorLoss, epCriticLoss = agent.trainCritcActor(\n",
" bs, ba, br, s, CRITIC_EPOCH, ACTOR_EPOCH\n",
" )\n",
" epActorLossHis.append(epActorLoss)\n",
" epCriticLossHis.append(epCriticLoss)\n",
" # update History Recorder\n",
" totalActorLossHis.append(np.mean(epActorLossHis))\n",
" totalCriticLossHis.append(np.mean(epCriticLossHis))\n",
" totalRewardHis.append(epTotalReward)\n",
"\n",
" if epTotalReward > maxTotalReward and epTotalReward != 0:\n",
" maxTotalReward = epTotalReward\n",
" agent.saveWeights(epTotalReward)\n",
" print(\"New Record! Save NN\", epTotalReward)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"aaa = 0\n",
"aaa = 1\n",
"aaa = 2\n",
"aaa = 3\n",
"aaa = 4\n",
"aaa = 5\n",
"aaa = 6\n",
"aaa = 7\n",
"aaa = 8\n",
"aaa = 9\n"
]
}
],
"source": [
"aaa = 0\n",
"while aaa<10:\n",
" print(\"aaa = \",aaa)\n",
" aaa+=1"
]
}
],
"metadata": {
"interpreter": {
"hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
},
"kernelspec": {
"display_name": "Python 3.9.7 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -1,25 +0,0 @@
import aimBotEnv
import PPO
ENV_PATH = './Build/Aimbot-PPO'
WORKER_ID = 100
MAX_EP = 1000
EP_LENGTH = 400
GAMMA = 0.99 # discount future reward (UP?)
EPSILON = 0.2 # clip Ratio range[1-EPSILON,1+EPSILON]
ACTOR_LR = 1e-5 # LR
CRITIC_LR = 2e-5 # LR
BATCH = 32 # learning step
ACTOR_EPOCH = 10 # epoch
CRITIC_EPOCH = 10 # epoch
ENTROPY_WHEIGHT = 0.01 # sigma's entropy in Actor loss
ACTION_INTERVAL = 1 # take action every ACTION_INTERVAL steps
TRAIN = True
env = aimBotEnv.makeEnv(envPath = ENV_PATH,workerID = WORKER_ID)
STATE_SIZE = env.STATE_SIZE
CONTINUOUS_SIZE = env.CONTINUOUS_SIZE
DISCRETE_SIZE = env.DISCRETE_SIZE
CTN_ACTION_RANGE = 2

View File

@ -29,7 +29,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -73,7 +73,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -118,7 +118,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -182,7 +182,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -205,7 +205,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -240,16 +240,16 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"True" "array([[0.]])"
] ]
}, },
"execution_count": 20, "execution_count": 4,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -259,28 +259,119 @@
"\n", "\n",
"a = np.array([10, 20, 30, 0])\n", "a = np.array([10, 20, 30, 0])\n",
"\n", "\n",
"np.any(a == 0)\n" "np.asarray([[0.]])\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"4" "1.5"
] ]
}, },
"execution_count": 1, "execution_count": 11,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"asd = \"adsf\"\n", "import numpy as np\n",
"len(asd)" "\n",
"asd = [1,2,3,np.array([0.5]),np.array([0.5])]\n",
"\n",
"asd[3:]\n",
"len(asd)\n",
"\n",
"np.mean([1,2])"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.0\n",
"0.0\n"
]
}
],
"source": [
"import time\n",
"import pyautogui as pag\n",
"\n",
"from pynput.mouse import Button, Controller\n",
"\n",
"w = pag.size().width\n",
"h = pag.size().height\n",
"mouse = Controller()\n",
"\n",
"nowt = time.time()\n",
"\n",
"middletime = time.time() - nowt\n",
"print(middletime)\n",
"# print(nowPos-(w/2))\n",
"\n",
"print(time.time() - middletime - nowt)\n",
"while True:\n",
" x,_ = mouse.position\n",
" #print(mouse.press)\n",
" #print(mouse.position)\n",
" \n",
" mouse.position = (w / 2, h / 2)\n",
" time.sleep(1/60)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"import pyautogui as pag\n",
"\n",
"import mouse\n",
"\n",
"w = pag.size().width\n",
"h = pag.size().height\n",
"\n",
"nowt = time.time()\n",
"\n",
"middletime = time.time() - nowt\n",
"print(middletime)\n",
"# print(nowPos-(w/2))\n",
"\n",
"print(time.time() - middletime - nowt)\n",
"while True:\n",
" x = mouse.get_position()\n",
" print(x)\n",
" #print(mouse.position)\n",
" \n",
" mouse.move(w / 2, h / 2)\n",
" time.sleep(1/60)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import keyboard\n",
"\n",
"while True:\n",
" if keyboard.is_pressed(\"w\"):\n",
" print(\"w\")\n",
" elif keyboard.is_pressed(\"s\"):\n",
" print(\"s\")"
] ]
} }
], ],