Update PPO class,add python human control
Python: Update PPO class add python human control Unity: add FP/TP choose button
This commit is contained in:
parent
de066f3a65
commit
ae8a1ba8e2
8
Aimbot-PPO-MultiScene/Assets/Demonstrations.meta
Normal file
8
Aimbot-PPO-MultiScene/Assets/Demonstrations.meta
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
fileFormatVersion: 2
|
||||||
|
guid: d65d9ca7ae1253341b6790f3a23e3a11
|
||||||
|
folderAsset: yes
|
||||||
|
DefaultImporter:
|
||||||
|
externalObjects: {}
|
||||||
|
userData:
|
||||||
|
assetBundleName:
|
||||||
|
assetBundleVariant:
|
BIN
Aimbot-PPO-MultiScene/Assets/Demonstrations/AKMAgent.demo
Normal file
BIN
Aimbot-PPO-MultiScene/Assets/Demonstrations/AKMAgent.demo
Normal file
Binary file not shown.
@ -0,0 +1,10 @@
|
|||||||
|
fileFormatVersion: 2
|
||||||
|
guid: 39a127fc79ed92d4e88aec711f545d5f
|
||||||
|
ScriptedImporter:
|
||||||
|
internalIDToNameTable: []
|
||||||
|
externalObjects: {}
|
||||||
|
serializedVersion: 2
|
||||||
|
userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
|
||||||
|
assetBundleName:
|
||||||
|
assetBundleVariant:
|
||||||
|
script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}
|
@ -1 +1 @@
|
|||||||
{"count":1,"self":33.6679968,"total":34.5046305,"children":{"InitializeActuators":{"count":2,"self":0.0010002,"total":0.0010002,"children":null},"InitializeSensors":{"count":2,"self":0.0010004,"total":0.0010004,"children":null},"AgentSendState":{"count":1489,"self":0.011503399999999999,"total":0.2010688,"children":{"CollectObservations":{"count":1489,"self":0.1780647,"total":0.1780647,"children":null},"WriteActionMask":{"count":1488,"self":0.0019993999999999997,"total":0.0019993999999999997,"children":null},"RequestDecision":{"count":1488,"self":0.009501299999999999,"total":0.009501299999999999,"children":null}}},"DecideAction":{"count":1488,"self":0.0117408,"total":0.0117408,"children":null},"AgentAct":{"count":1488,"self":0.6208231,"total":0.6208231,"children":null}},"gauges":{},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1663089804","unity_version":"2020.3.19f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2020.3.19f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-MultiScene -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-UCUNI -hubSessionId 4cf980b0-326c-11ed-87c2-a7333acffe7c -accessToken j61gZPw8-vc4ZH7TJMvrSAAPQLV9SK6U72z_dek2xhw00ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.0","scene_name":"InGame","end_time_seconds":"1663089838"}}
|
{"count":1,"self":42.3855296,"total":42.4020608,"children":{"InitializeActuators":{"count":2,"self":0.0015155,"total":0.0015155,"children":null},"InitializeSensors":{"count":2,"self":0.0015017,"total":0.0015017,"children":null},"AgentSendState":{"count":1898,"self":0.0025031999999999997,"total":0.0025031999999999997,"children":null},"DecideAction":{"count":1898,"self":0.0070091999999999993,"total":0.0070091999999999993,"children":null},"AgentAct":{"count":1898,"self":0.0030023,"total":0.0030023,"children":null}},"gauges":{},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1665414279","unity_version":"2020.3.19f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2020.3.19f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-MultiScene -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-UCUNI -hubSessionId 39022900-48a5-11ed-b848-09be5949a456 -accessToken _47qt9I_MF3bhL7JS735Xdmfj8A4dGBOdRNKR0X2L_w00ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.0","scene_name":"InGame","end_time_seconds":"1665414322"}}
|
@ -1 +1 @@
|
|||||||
{"count":1,"self":114.25904639999999,"total":114.62062499999999,"children":{"InitializeActuators":{"count":2,"self":0.0010000999999999999,"total":0.0010000999999999999,"children":null},"InitializeSensors":{"count":2,"self":0.0010002,"total":0.0010002,"children":null},"AgentSendState":{"count":1382,"self":0.0080028,"total":0.0195053,"children":{"CollectObservations":{"count":1382,"self":0.0070022999999999995,"total":0.0070022999999999995,"children":null},"WriteActionMask":{"count":1382,"self":0.0004994,"total":0.0004994,"children":null},"RequestDecision":{"count":1382,"self":0.0040008,"total":0.0040008,"children":null}}},"DecideAction":{"count":1382,"self":0.0110034,"total":0.0110034,"children":null},"AgentAct":{"count":1382,"self":0.3290731,"total":0.3290731,"children":null}},"gauges":{},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1662500099","unity_version":"2020.3.19f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2020.3.19f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-MultiScene -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-UCUNI -hubSessionId 209fdf30-2c1f-11ed-916f-33e85f4223cc -accessToken 78EBbrn-dg5kE__h3rNOqQVTDU3b1xUmmwWF1c5sFLc00ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.0","scene_name":"Start","end_time_seconds":"1662500214"}}
|
{"count":1,"self":100.7007424,"total":102.0526476,"children":{"InitializeActuators":{"count":2,"self":0.0015004999999999999,"total":0.0015004999999999999,"children":null},"InitializeSensors":{"count":2,"self":0.0010015,"total":0.0010015,"children":null},"AgentSendState":{"count":2851,"self":0.0227973,"total":0.3594312,"children":{"CollectObservations":{"count":2851,"self":0.3230326,"total":0.3230326,"children":null},"WriteActionMask":{"count":2850,"self":0.0040877,"total":0.0040877,"children":null},"RequestDecision":{"count":2850,"self":0.0095135999999999988,"total":0.0095135999999999988,"children":null}}},"DecideAction":{"count":2850,"self":0.0184923,"total":0.0184923,"children":null},"AgentAct":{"count":2850,"self":0.971482,"total":0.971482,"children":null}},"gauges":{"AKMAgent.CumulativeReward":{"count":1,"max":0,"min":0,"runningAverage":0,"value":0,"weightedAverage":0}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1665340408","unity_version":"2020.3.19f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2020.3.19f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-MultiScene -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-8AgJBC01I23iOtjIDvezn -hubSessionId a2bff0f0-47ee-11ed-98ba-e72fca9de6f1 -accessToken VHkJOvWIH11sBEzC18rl6YA9y6y2sRMQj2zrOyZdNeE00ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.0","scene_name":"Start","end_time_seconds":"1665340510"}}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -21,6 +21,8 @@ public class AgentWithGun : Agent
|
|||||||
public Camera thisCam;
|
public Camera thisCam;
|
||||||
public CharacterController PlayerController;
|
public CharacterController PlayerController;
|
||||||
public GameObject enemyPrefab;
|
public GameObject enemyPrefab;
|
||||||
|
public GameObject cameraChangerOBJ;
|
||||||
|
|
||||||
|
|
||||||
[Header("Rewards")]
|
[Header("Rewards")]
|
||||||
[Tooltip("Nothing happened reward")]
|
[Tooltip("Nothing happened reward")]
|
||||||
@ -76,10 +78,13 @@ public class AgentWithGun : Agent
|
|||||||
private string LoadDirTime;
|
private string LoadDirTime;
|
||||||
private float LoadDirDateF;
|
private float LoadDirDateF;
|
||||||
private float loadDirTimeF;
|
private float loadDirTimeF;
|
||||||
|
public bool defaultTPCamera = true;
|
||||||
private StartSeneData DataTransfer;
|
private StartSeneData DataTransfer;
|
||||||
private UIController UICon;
|
private UIController UICon;
|
||||||
private HistoryRecorder HistoryRec;
|
private HistoryRecorder HistoryRec;
|
||||||
private RaySensors rayScript;
|
private RaySensors rayScript;
|
||||||
|
private CameraChange camChanger;
|
||||||
|
|
||||||
|
|
||||||
[System.NonSerialized] public float nonReward;
|
[System.NonSerialized] public float nonReward;
|
||||||
[System.NonSerialized] public float shootReward;
|
[System.NonSerialized] public float shootReward;
|
||||||
@ -118,6 +123,8 @@ public class AgentWithGun : Agent
|
|||||||
killRewardDefault = DataTransfer.killReward;
|
killRewardDefault = DataTransfer.killReward;
|
||||||
winRewardDefault = DataTransfer.winReward;
|
winRewardDefault = DataTransfer.winReward;
|
||||||
loseRewardDefault = DataTransfer.loseReward;
|
loseRewardDefault = DataTransfer.loseReward;
|
||||||
|
lockMouse = DataTransfer.lockMouse;
|
||||||
|
defaultTPCamera = DataTransfer.defaultTPCamera;
|
||||||
|
|
||||||
// change Decision Period & Take Actions Between Decisions
|
// change Decision Period & Take Actions Between Decisions
|
||||||
transform.GetComponent<DecisionRequester>().DecisionPeriod = DataTransfer.DecisionPeriod;
|
transform.GetComponent<DecisionRequester>().DecisionPeriod = DataTransfer.DecisionPeriod;
|
||||||
@ -156,6 +163,7 @@ public class AgentWithGun : Agent
|
|||||||
UICon = transform.GetComponent<UIController>();
|
UICon = transform.GetComponent<UIController>();
|
||||||
HistoryRec = transform.GetComponent<HistoryRecorder>();
|
HistoryRec = transform.GetComponent<HistoryRecorder>();
|
||||||
rayScript = GetComponent<RaySensors>();
|
rayScript = GetComponent<RaySensors>();
|
||||||
|
camChanger = cameraChangerOBJ.GetComponent<CameraChange>();
|
||||||
|
|
||||||
// give default Reward to Reward value will be used.
|
// give default Reward to Reward value will be used.
|
||||||
nonReward = nonRewardDefault;
|
nonReward = nonRewardDefault;
|
||||||
@ -167,6 +175,15 @@ public class AgentWithGun : Agent
|
|||||||
killReward = killRewardDefault;
|
killReward = killRewardDefault;
|
||||||
//initialize remainTime
|
//initialize remainTime
|
||||||
remainTime = (int)(timeLimit - Time.time + startTime);
|
remainTime = (int)(timeLimit - Time.time + startTime);
|
||||||
|
// change default camera view
|
||||||
|
if (defaultTPCamera)
|
||||||
|
{
|
||||||
|
camChanger.ShowTPSView();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
camChanger.ShowFPSView();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -203,27 +220,9 @@ public class AgentWithGun : Agent
|
|||||||
|
|
||||||
// ------------动作处理--------------
|
// ------------动作处理--------------
|
||||||
// moveAgent 用于模拟Input.GetAxis移动
|
// moveAgent 用于模拟Input.GetAxis移动
|
||||||
public void moveAgent(int kW, int kS,int kA,int kD)
|
public void moveAgent(int vertical, int horizontal)
|
||||||
{
|
{
|
||||||
Vector3 thisMovement;
|
Vector3 thisMovement;
|
||||||
int horizontal = 0;
|
|
||||||
int vertical = 0;
|
|
||||||
if (kW==1 && kS != 1)
|
|
||||||
{
|
|
||||||
vertical = 1;
|
|
||||||
}
|
|
||||||
else if (kS==1 && kW!=1)
|
|
||||||
{
|
|
||||||
vertical = -1;
|
|
||||||
}
|
|
||||||
if (kD==1 && kA!=1)
|
|
||||||
{
|
|
||||||
horizontal = 1;
|
|
||||||
}
|
|
||||||
else if (kA ==1 && kD!=1)
|
|
||||||
{
|
|
||||||
horizontal = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (horizontal != 0)//当按下按键(水平方向)
|
if (horizontal != 0)//当按下按键(水平方向)
|
||||||
{
|
{
|
||||||
@ -506,7 +505,7 @@ public class AgentWithGun : Agent
|
|||||||
}
|
}
|
||||||
if (lockMouse)
|
if (lockMouse)
|
||||||
{
|
{
|
||||||
Cursor.lockState = CursorLockMode.Locked; // 隐藏并且锁定鼠标
|
Cursor.lockState = CursorLockMode.Locked; // hide and lock the mouse
|
||||||
}
|
}
|
||||||
//iniCharts();
|
//iniCharts();
|
||||||
thisAgentObj.name = thisAgentObj.GetInstanceID().ToString();
|
thisAgentObj.name = thisAgentObj.GetInstanceID().ToString();
|
||||||
@ -549,35 +548,26 @@ public class AgentWithGun : Agent
|
|||||||
public override void OnActionReceived(ActionBuffers actionBuffers)
|
public override void OnActionReceived(ActionBuffers actionBuffers)
|
||||||
{
|
{
|
||||||
//获取输入
|
//获取输入
|
||||||
int kW = actionBuffers.DiscreteActions[0];
|
int vertical = actionBuffers.DiscreteActions[0];
|
||||||
int kS = actionBuffers.DiscreteActions[1];
|
int horizontal = actionBuffers.DiscreteActions[1];
|
||||||
int kA = actionBuffers.DiscreteActions[2];
|
int mouseShoot = actionBuffers.DiscreteActions[2];
|
||||||
int kD = actionBuffers.DiscreteActions[3];
|
|
||||||
int mouseShoot = actionBuffers.DiscreteActions[4];
|
|
||||||
float Mouse_X = actionBuffers.ContinuousActions[0];
|
float Mouse_X = actionBuffers.ContinuousActions[0];
|
||||||
//float Mouse_Y = actionBuffers.ContinuousActions[1];
|
if (vertical == 2) vertical = -1;
|
||||||
//int timeLimitControl = (int)actionBuffers.ContinuousActions[2];
|
if (horizontal == 2) horizontal = -1;
|
||||||
//float nonRewardIn = actionBuffers.ContinuousActions[1];
|
|
||||||
//float shootRewardIn = actionBuffers.ContinuousActions[2];
|
|
||||||
//float shootWithoutReadyRewardIn = actionBuffers.ContinuousActions[3];
|
|
||||||
//float hitRewardIn = actionBuffers.ContinuousActions[4];
|
|
||||||
//float winRewardIn = actionBuffers.ContinuousActions[5];
|
|
||||||
// loseRewardIn = actionBuffers.ContinuousActions[6];
|
|
||||||
//float killRewardIn = actionBuffers.ContinuousActions[7];
|
|
||||||
//Rewards Update
|
|
||||||
remainTime = (int)(timeLimit - Time.time + startTime);
|
remainTime = (int)(timeLimit - Time.time + startTime);
|
||||||
|
|
||||||
//应用输入
|
//应用输入
|
||||||
shoot = mouseShoot;
|
shoot = mouseShoot;
|
||||||
HistoryRec.realTimeKeyCounter(kW, kS, kA, kD, shoot);
|
HistoryRec.realTimeKeyCounter(vertical, horizontal, shoot);
|
||||||
(int kWCount, int kSCount, int kACount, int kDCount, int shootCount) = HistoryRec.getKeyCount();
|
(int kWCount, int kSCount, int kACount, int kDCount, int shootCount) = HistoryRec.getKeyCount();
|
||||||
UICon.updateRemainTime(remainTime);
|
UICon.updateRemainTime(remainTime);
|
||||||
UICon.updateWASDKeyViewer(kW, kS, kA, kD);
|
UICon.updateRemainEnemy(enemyNum);
|
||||||
|
UICon.updateWASDKeyViewer(vertical, horizontal);
|
||||||
UICon.updateKeyCounterChart(kWCount, kSCount, kACount, kDCount, shootCount);
|
UICon.updateKeyCounterChart(kWCount, kSCount, kACount, kDCount, shootCount);
|
||||||
UICon.updateMouseMovementViewer(Mouse_X);
|
UICon.updateMouseMovementViewer(Mouse_X);
|
||||||
UICon.updateRewardViewer(nonReward, shootReward, shootWithoutReadyReward, hitReward, winReward, loseReward, killReward);
|
UICon.updateRewardViewer(nonReward, shootReward, shootWithoutReadyReward, hitReward, winReward, loseReward, killReward);
|
||||||
cameraControl(Mouse_X, 0);
|
cameraControl(Mouse_X, 0);
|
||||||
moveAgent(kW, kS, kA, kD);
|
moveAgent(vertical, horizontal);
|
||||||
float thisRoundReward = rewardCalculate();
|
float thisRoundReward = rewardCalculate();
|
||||||
|
|
||||||
//判断结束
|
//判断结束
|
||||||
@ -628,37 +618,45 @@ public class AgentWithGun : Agent
|
|||||||
ActionSegment<float> continuousActions = actionsOut.ContinuousActions;
|
ActionSegment<float> continuousActions = actionsOut.ContinuousActions;
|
||||||
ActionSegment<int> discreteActions = actionsOut.DiscreteActions;
|
ActionSegment<int> discreteActions = actionsOut.DiscreteActions;
|
||||||
|
|
||||||
int kW = 0;
|
int vertical = 0;
|
||||||
int kS = 0;
|
int horizontal = 0;
|
||||||
int kA = 0;
|
if (Input.GetKey(KeyCode.W) && !Input.GetKey(KeyCode.S))
|
||||||
int kD = 0;
|
|
||||||
if (Input.GetKey(KeyCode.W))
|
|
||||||
{
|
{
|
||||||
kW = 1;
|
vertical = 1;
|
||||||
}
|
}
|
||||||
if (Input.GetKey(KeyCode.S))
|
else if (Input.GetKey(KeyCode.S) && !Input.GetKey(KeyCode.W))
|
||||||
{
|
{
|
||||||
kS = 1;
|
vertical = -1;
|
||||||
}
|
}
|
||||||
if (Input.GetKey(KeyCode.A))
|
else
|
||||||
{
|
{
|
||||||
kA = 1;
|
vertical = 0;
|
||||||
}
|
}
|
||||||
if (Input.GetKey(KeyCode.D))
|
if (Input.GetKey(KeyCode.D) && !Input.GetKey(KeyCode.A))
|
||||||
{
|
{
|
||||||
kD = 1;
|
horizontal = 1;
|
||||||
|
}
|
||||||
|
else if (Input.GetKey(KeyCode.A) && !Input.GetKey(KeyCode.D))
|
||||||
|
{
|
||||||
|
horizontal = -1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
horizontal = 0;
|
||||||
}
|
}
|
||||||
discreteActions[0] = kW;
|
|
||||||
discreteActions[1] = kS;
|
|
||||||
discreteActions[2] = kA;
|
|
||||||
discreteActions[3] = kD;
|
|
||||||
|
|
||||||
if (Input.GetMouseButton(0))
|
if (Input.GetMouseButton(0))
|
||||||
{
|
{
|
||||||
// Debug.Log("mousebuttonhit");
|
// Debug.Log("mousebuttonhit");
|
||||||
shoot = 1;
|
shoot = 1;
|
||||||
}
|
}
|
||||||
discreteActions[4] = shoot;
|
else
|
||||||
|
{
|
||||||
|
shoot = 0;
|
||||||
|
}
|
||||||
|
discreteActions[0] = vertical;
|
||||||
|
discreteActions[1] = horizontal;
|
||||||
|
discreteActions[2] = shoot;
|
||||||
//^^^^^^^^^^^^^^^^^^^^^discrete-Control^^^^^^^^^^^^^^^^^^^^^^
|
//^^^^^^^^^^^^^^^^^^^^^discrete-Control^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
//vvvvvvvvvvvvvvvvvvvvvvvvvvvvvcontinuous-Controlvvvvvvvvvvvvvvvvvvvvvv
|
//vvvvvvvvvvvvvvvvvvvvvvvvvvvvvcontinuous-Controlvvvvvvvvvvvvvvvvvvvvvv
|
||||||
|
@ -31,24 +31,24 @@ public class HistoryRecorder : MonoBehaviour
|
|||||||
{
|
{
|
||||||
EPTotalShootCount.Add(TotalShootCount);
|
EPTotalShootCount.Add(TotalShootCount);
|
||||||
}
|
}
|
||||||
public void realTimeKeyCounter(int kW, int kS, int kA, int kD, int shoot)
|
public void realTimeKeyCounter(int vertical, int horizontal, int shoot)
|
||||||
{
|
{
|
||||||
if (kW == 1)
|
if (vertical == 1)
|
||||||
{
|
{
|
||||||
realTimeWKeyCount += 1;
|
realTimeWKeyCount += 1;
|
||||||
}
|
}
|
||||||
if (kS == 1)
|
else if (vertical == -1)
|
||||||
{
|
{
|
||||||
realTimeSKeyCount += 1;
|
realTimeSKeyCount += 1;
|
||||||
}
|
}
|
||||||
if (kA == 1)
|
if (horizontal == 1)
|
||||||
{
|
|
||||||
realTimeAKeyCount += 1;
|
|
||||||
}
|
|
||||||
if (kD == 1)
|
|
||||||
{
|
{
|
||||||
realTimeDKeyCount += 1;
|
realTimeDKeyCount += 1;
|
||||||
}
|
}
|
||||||
|
else if (horizontal == -1)
|
||||||
|
{
|
||||||
|
realTimeAKeyCount += 1;
|
||||||
|
}
|
||||||
if (shoot == 1)
|
if (shoot == 1)
|
||||||
{
|
{
|
||||||
realTimeShootCount += 1;
|
realTimeShootCount += 1;
|
||||||
|
@ -69,39 +69,37 @@ public class UIController : MonoBehaviour
|
|||||||
}
|
}
|
||||||
|
|
||||||
//------------Key Viewer----------
|
//------------Key Viewer----------
|
||||||
public void updateWASDKeyViewer(int kW,int kS,int kA,int kD)
|
public void updateWASDKeyViewer(int vertical,int horizontal)
|
||||||
{
|
{
|
||||||
if (kW == 1)
|
if (vertical == 1)
|
||||||
{
|
{
|
||||||
upText.color = Color.red;
|
upText.color = Color.red;
|
||||||
|
downText.color = Color.black;
|
||||||
}
|
}
|
||||||
else
|
else if (vertical == -1)
|
||||||
{
|
|
||||||
upText.color = Color.black;
|
|
||||||
}
|
|
||||||
if (kS == 1)
|
|
||||||
{
|
{
|
||||||
downText.color = Color.red;
|
downText.color = Color.red;
|
||||||
|
upText.color = Color.black;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
downText.color = Color.black;
|
downText.color = Color.black;
|
||||||
|
upText.color = Color.black;
|
||||||
}
|
}
|
||||||
if(kA == 1)
|
if (horizontal == 1)
|
||||||
{
|
|
||||||
leftText.color = Color.red;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
leftText.color = Color.black;
|
|
||||||
}
|
|
||||||
if( kD == 1)
|
|
||||||
{
|
{
|
||||||
rightText.color = Color.red;
|
rightText.color = Color.red;
|
||||||
|
leftText.color = Color.black;
|
||||||
|
}
|
||||||
|
else if (horizontal == -1)
|
||||||
|
{
|
||||||
|
leftText.color = Color.red;
|
||||||
|
rightText.color = Color.black;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
rightText.color = Color.black;
|
downText.color = Color.black;
|
||||||
|
upText.color = Color.black;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
public void updateShootKeyViewer(int shoot,bool isGunReady)
|
public void updateShootKeyViewer(int shoot,bool isGunReady)
|
||||||
|
@ -0,0 +1,27 @@
|
|||||||
|
using System.Collections;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using UnityEngine;
|
||||||
|
|
||||||
|
public class gameFlowController : MonoBehaviour
|
||||||
|
{
|
||||||
|
public GameObject Agent;
|
||||||
|
AgentWithGun agentWithGun;
|
||||||
|
// Start is called before the first frame update
|
||||||
|
void Start()
|
||||||
|
{
|
||||||
|
agentWithGun = Agent.GetComponent<AgentWithGun>();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update is called once per frame
|
||||||
|
void Update()
|
||||||
|
{
|
||||||
|
if (Input.GetKey(KeyCode.Escape))
|
||||||
|
{
|
||||||
|
Application.Quit();
|
||||||
|
}
|
||||||
|
if (Input.GetKey(KeyCode.L))
|
||||||
|
{
|
||||||
|
agentWithGun.lockMouse = !agentWithGun.lockMouse;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,11 @@
|
|||||||
|
fileFormatVersion: 2
|
||||||
|
guid: 9a8fb4d12d4b8fc4784f3e142e7fdcf8
|
||||||
|
MonoImporter:
|
||||||
|
externalObjects: {}
|
||||||
|
serializedVersion: 2
|
||||||
|
defaultReferences: []
|
||||||
|
executionOrder: 0
|
||||||
|
icon: {instanceID: 0}
|
||||||
|
userData:
|
||||||
|
assetBundleName:
|
||||||
|
assetBundleVariant:
|
@ -19,6 +19,21 @@ public class EnvArgsChanger : MonoBehaviour
|
|||||||
public Text DecisionPeriodDataText;
|
public Text DecisionPeriodDataText;
|
||||||
public Toggle TakeActionsBetweenDecisionsToggle;
|
public Toggle TakeActionsBetweenDecisionsToggle;
|
||||||
|
|
||||||
|
[Header("Lock Mouse")]
|
||||||
|
public Toggle LockMouseToggle;
|
||||||
|
|
||||||
|
[Header("Default Camera")]
|
||||||
|
public Toggle FPToggle;
|
||||||
|
public Text FPText;
|
||||||
|
public Toggle TPToggle;
|
||||||
|
public Text TPText;
|
||||||
|
|
||||||
|
private StartSeneData startSeneData;
|
||||||
|
private void Start()
|
||||||
|
{
|
||||||
|
startSeneData = DataTransfer.GetComponent<StartSeneData>();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public void onEnemynumValueChanged()
|
public void onEnemynumValueChanged()
|
||||||
{
|
{
|
||||||
@ -30,7 +45,7 @@ public class EnvArgsChanger : MonoBehaviour
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
EnemyNumText.color = Color.yellow;
|
EnemyNumText.color = Color.yellow;
|
||||||
DataTransfer.GetComponent<StartSeneData>().EnemyNum = Math.Abs(int.Parse(EnemyNumInput.GetComponent<InputField>().text));
|
startSeneData.EnemyNum = Math.Abs(int.Parse(EnemyNumInput.GetComponent<InputField>().text));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -44,19 +59,48 @@ public class EnvArgsChanger : MonoBehaviour
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
TimeLimText.color = Color.yellow;
|
TimeLimText.color = Color.yellow;
|
||||||
DataTransfer.GetComponent<StartSeneData>().Timelim = Math.Abs(int.Parse(TimelimInput.GetComponent<InputField>().text));
|
startSeneData.Timelim = Math.Abs(int.Parse(TimelimInput.GetComponent<InputField>().text));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void onDPSlideValueChanged()
|
public void onDPSlideValueChanged()
|
||||||
{
|
{
|
||||||
// DecisionPeriod(DP) value Control
|
// DecisionPeriod(DP) value Control
|
||||||
DataTransfer.GetComponent<StartSeneData>().DecisionPeriod = (int)(DecisionPeriodSlide.GetComponent<Slider>().value);
|
startSeneData.DecisionPeriod = (int)(DecisionPeriodSlide.GetComponent<Slider>().value);
|
||||||
DecisionPeriodDataText.text = DataTransfer.GetComponent<StartSeneData>().DecisionPeriod.ToString();
|
DecisionPeriodDataText.text = startSeneData.DecisionPeriod.ToString();
|
||||||
}
|
}
|
||||||
public void onABDToggleChanged()
|
public void onABDToggleChanged()
|
||||||
{
|
{
|
||||||
// Actions Between Decisions(ABD) Toggle Control
|
// Actions Between Decisions(ABD) Toggle Control
|
||||||
DataTransfer.GetComponent<StartSeneData>().ActionsBetweenDecisions = TakeActionsBetweenDecisionsToggle.isOn;
|
startSeneData.ActionsBetweenDecisions = TakeActionsBetweenDecisionsToggle.isOn;
|
||||||
|
}
|
||||||
|
public void onLockMouseToggleChanged()
|
||||||
|
{
|
||||||
|
// lock mouse or not
|
||||||
|
startSeneData.lockMouse = LockMouseToggle.isOn;
|
||||||
|
}
|
||||||
|
public void onTPCamToggleChanged()
|
||||||
|
{
|
||||||
|
startSeneData.defaultTPCamera = true;
|
||||||
|
|
||||||
|
FPToggle.interactable = true;
|
||||||
|
FPToggle.SetIsOnWithoutNotify(false);
|
||||||
|
FPText.color = Color.gray;
|
||||||
|
|
||||||
|
TPToggle.SetIsOnWithoutNotify(true);
|
||||||
|
TPToggle.interactable = false;
|
||||||
|
TPText.color = Color.green;
|
||||||
|
}
|
||||||
|
public void onFPCameToggleChanged()
|
||||||
|
{
|
||||||
|
startSeneData.defaultTPCamera = false;
|
||||||
|
|
||||||
|
TPToggle.interactable = true;
|
||||||
|
TPToggle.SetIsOnWithoutNotify(false);
|
||||||
|
TPText.color = Color.gray;
|
||||||
|
|
||||||
|
FPToggle.SetIsOnWithoutNotify(true);
|
||||||
|
FPToggle.interactable = false;
|
||||||
|
FPText.color = Color.green;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -13,6 +13,8 @@ public class StartSeneData : MonoBehaviour
|
|||||||
public float killRewardDefault = 10.0f;
|
public float killRewardDefault = 10.0f;
|
||||||
public float winRewardDefault = 20.0f;
|
public float winRewardDefault = 20.0f;
|
||||||
public float loseRewardDefault = -10.0f;
|
public float loseRewardDefault = -10.0f;
|
||||||
|
public bool lockMouse = false;
|
||||||
|
public bool defaultTPCamera = true;
|
||||||
|
|
||||||
// LoadDir
|
// LoadDir
|
||||||
[System.NonSerialized]public string LoadDirDate = "0";
|
[System.NonSerialized]public string LoadDirDate = "0";
|
||||||
|
90
Aimbot-PPO-Python/DemoRecorder.ipynb
Normal file
90
Aimbot-PPO-Python/DemoRecorder.ipynb
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"√√√√√Enviroment Initialized Success√√√√√\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import time\n",
|
||||||
|
"import aimBotEnv\n",
|
||||||
|
"from HumanAction import HumanActions\n",
|
||||||
|
"\n",
|
||||||
|
"# Env\n",
|
||||||
|
"ENV_PATH = \"./Build-CloseEnemyCut/Aimbot-PPO\"\n",
|
||||||
|
"WORKER_ID = 1\n",
|
||||||
|
"BASE_PORT = 200\n",
|
||||||
|
"\n",
|
||||||
|
"MOUSEDISCOUNT = 8.0\n",
|
||||||
|
"MAX_EP = 10000000\n",
|
||||||
|
"\n",
|
||||||
|
"env = aimBotEnv.makeEnv(envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "UnityCommunicatorStoppedException",
|
||||||
|
"evalue": "Communicator has exited.",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[1;31mUnityCommunicatorStoppedException\u001b[0m Traceback (most recent call last)",
|
||||||
|
"\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_37248/645561173.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mactions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdemoAct\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetHumanActions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
|
||||||
|
"\u001b[1;32mc:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\aimBotEnv.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, actions, behaviorName, trackedAgent)\u001b[0m\n\u001b[0;32m 72\u001b[0m \u001b[1;31m# take action to env\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 73\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_actions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehavior_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mthisActionTuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 74\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 75\u001b[0m \u001b[1;31m# get nextState & reward & done after this action\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 76\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloadDir\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msaveNow\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetSteps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrackedAgent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||||
|
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\mlagents_envs\\timers.py\u001b[0m in \u001b[0;36mwrapped\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 303\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapped\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 304\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mhierarchical_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__qualname__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 305\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 306\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 307\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mwrapped\u001b[0m \u001b[1;31m# type: ignore\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||||
|
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\mlagents_envs\\environment.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 333\u001b[0m \u001b[0moutputs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_communicator\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexchange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstep_input\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_poll_process\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 334\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0moutputs\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 335\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mUnityCommunicatorStoppedException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Communicator has exited.\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 336\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_update_behavior_specs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 337\u001b[0m \u001b[0mrl_output\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0moutputs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrl_output\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||||
|
"\u001b[1;31mUnityCommunicatorStoppedException\u001b[0m: Communicator has exited."
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"done = False\n",
|
||||||
|
"env.reset()\n",
|
||||||
|
"demoAct = HumanActions(mouseDiscount=MOUSEDISCOUNT)\n",
|
||||||
|
"for ep in range(MAX_EP):\n",
|
||||||
|
" while not done:\n",
|
||||||
|
" actions = demoAct.getHumanActions()\n",
|
||||||
|
" env.step(actions=actions)6\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.9.7 64-bit",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.7"
|
||||||
|
},
|
||||||
|
"orig_nbformat": 4,
|
||||||
|
"vscode": {
|
||||||
|
"interpreter": {
|
||||||
|
"hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
51
Aimbot-PPO-Python/HumanAction.py
Normal file
51
Aimbot-PPO-Python/HumanAction.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
import keyboard
|
||||||
|
import mouse
|
||||||
|
|
||||||
|
|
||||||
|
class HumanActions:
|
||||||
|
def __init__(self, mouseDiscount: float = 10, screenW: int = 1920, screenH: int = 1080):
|
||||||
|
def multiPressed():
|
||||||
|
pass
|
||||||
|
|
||||||
|
keyboard.add_hotkey("w+a", multiPressed)
|
||||||
|
keyboard.add_hotkey("w+d", multiPressed)
|
||||||
|
keyboard.add_hotkey("s+a", multiPressed)
|
||||||
|
keyboard.add_hotkey("s+d", multiPressed)
|
||||||
|
self.screenW = screenW
|
||||||
|
self.screenH = screenH
|
||||||
|
self.MOUSEDISCOUNT = mouseDiscount
|
||||||
|
|
||||||
|
def getHumanActions(self):
|
||||||
|
x, _ = mouse.get_position()
|
||||||
|
xMovement = (x - self.screenW / 2) / self.MOUSEDISCOUNT
|
||||||
|
|
||||||
|
ws = 0
|
||||||
|
ad = 0
|
||||||
|
click = 0
|
||||||
|
if keyboard.is_pressed("w"):
|
||||||
|
ws = 1
|
||||||
|
elif keyboard.is_pressed("s"):
|
||||||
|
ws = 2
|
||||||
|
if keyboard.is_pressed("d"):
|
||||||
|
ad = 1
|
||||||
|
elif keyboard.is_pressed("a"):
|
||||||
|
ad = 2
|
||||||
|
if keyboard.is_pressed("w+d"):
|
||||||
|
ws = 1
|
||||||
|
ad = 1
|
||||||
|
elif keyboard.is_pressed("w+a"):
|
||||||
|
ws = 1
|
||||||
|
ad = 2
|
||||||
|
elif keyboard.is_pressed("s+d"):
|
||||||
|
ws = 2
|
||||||
|
ad = 1
|
||||||
|
elif keyboard.is_pressed("s+a"):
|
||||||
|
ws = 2
|
||||||
|
ad = 2
|
||||||
|
if mouse.is_pressed(button="left"):
|
||||||
|
click = 1
|
||||||
|
|
||||||
|
actions = [ws, ad, click, [xMovement]]
|
||||||
|
|
||||||
|
mouse.move(self.screenW / 2, self.screenH / 2)
|
||||||
|
return actions
|
1147
Aimbot-PPO-Python/PPO-mian.ipynb
Normal file
1147
Aimbot-PPO-Python/PPO-mian.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,108 +1,213 @@
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
from tensorflow.python.ops.numpy_ops import ndarray
|
||||||
import tensorflow_probability as tfp
|
import tensorflow_probability as tfp
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import time
|
||||||
import math
|
import math
|
||||||
import copy
|
|
||||||
import datetime
|
import datetime
|
||||||
import os
|
from PPOConfig import PPOConfig
|
||||||
|
|
||||||
from tensorflow import keras
|
from tensorflow import keras
|
||||||
from tensorflow.keras import layers
|
from tensorflow.keras import layers
|
||||||
from tensorflow.keras import optimizers
|
from tensorflow.keras import optimizers
|
||||||
from keras_radam import RAdam
|
|
||||||
|
EPS = 1e-10
|
||||||
|
|
||||||
|
|
||||||
class PPO(object):
|
class PPO(object):
|
||||||
"""Create PPO Agent
|
def __init__(
|
||||||
|
self,
|
||||||
|
stateSize: int,
|
||||||
|
disActShape: list,
|
||||||
|
conActSize: int,
|
||||||
|
conActRange: float,
|
||||||
|
PPOConfig: PPOConfig,
|
||||||
|
):
|
||||||
|
"""initialize PPO
|
||||||
|
|
||||||
|
Args:
|
||||||
|
stateSize (int): enviroment state size
|
||||||
|
disActShape (numpy): discrete Action shape.
|
||||||
|
just like [3,2],means 2 type of dis actions,each act include 3 and 2 types
|
||||||
|
if no discrete action output then use [0].
|
||||||
|
conActSize (int): continuous Action Size. if no continuous action output then use 0.
|
||||||
|
conActRange (float): continuous action range. -conActRange to +conActRange
|
||||||
|
PPOConfig (PPOConfig): PPO configuration
|
||||||
"""
|
"""
|
||||||
|
# check use dis action or not.
|
||||||
def __init__(self, stateSize, disActShape, conActSize, conActRange, criticLR, actorLR, gamma, epsilon, entropyWeight, saveDir, loadModelDir):
|
if disActShape == [0]:
|
||||||
|
# non dis action output
|
||||||
# check disActShape is correct(greater than 1)
|
self.disActSize = 0
|
||||||
|
self.disOutputSize = 0
|
||||||
|
else:
|
||||||
|
# make sure disActShape greater than 1
|
||||||
try:
|
try:
|
||||||
if np.any(np.array(disActShape) <= 1):
|
if np.any(np.array(disActShape) <= 1):
|
||||||
raise ValueError("disActShape error,disActShape should greater than 1 but get",disActShape)
|
raise ValueError(
|
||||||
except ValueError as e:
|
"disActShape error,disActShape should greater than 1 but get", disActShape
|
||||||
|
)
|
||||||
|
except ValueError:
|
||||||
raise
|
raise
|
||||||
|
self.disActSize = len(disActShape)
|
||||||
|
self.disOutputSize = sum(disActShape)
|
||||||
|
|
||||||
self.stateSize = stateSize
|
self.stateSize = stateSize
|
||||||
# self.actionSize = actionSize
|
self.disActShape = disActShape
|
||||||
self.disActShape = disActShape # shape of discrete action output. like [3,3,2]
|
|
||||||
self.disActSize = len(disActShape)
|
|
||||||
self.conActSize = conActSize
|
self.conActSize = conActSize
|
||||||
self.conActRange = conActRange
|
self.conActRange = conActRange
|
||||||
self.criticLR = criticLR
|
self.muSigSize = 2
|
||||||
self.actorLR = actorLR
|
self.conOutputSize = conActSize * self.muSigSize
|
||||||
self.GAMMA = gamma
|
|
||||||
self.EPSILON = epsilon
|
|
||||||
self.saveDir = saveDir
|
|
||||||
self.entropyWeight = entropyWeight
|
|
||||||
|
|
||||||
self.disOutputSize = sum(disActShape)
|
# config
|
||||||
self.conOutputSize = conActSize * 2
|
self.NNShape = PPOConfig.NNShape
|
||||||
|
self.criticLR = PPOConfig.criticLR
|
||||||
|
self.actorLR = PPOConfig.actorLR
|
||||||
|
self.gamma = PPOConfig.gamma
|
||||||
|
self.lmbda = PPOConfig.lmbda
|
||||||
|
self.clipRange = PPOConfig.clipRange
|
||||||
|
self.entropyWeight = PPOConfig.entropyWeight
|
||||||
|
self.trainEpochs = PPOConfig.trainEpochs
|
||||||
|
self.saveDir = PPOConfig.saveDir
|
||||||
|
self.loadModelDir = PPOConfig.loadModelDir
|
||||||
|
print("---------thisPPO Params---------")
|
||||||
|
print("self.stateSize = ", self.stateSize)
|
||||||
|
print("self.disActShape = ", self.disActShape)
|
||||||
|
print("self.disActSize", self.disActSize)
|
||||||
|
print("self.disOutputSize", self.disOutputSize)
|
||||||
|
print("self.conActSize = ", self.conActSize)
|
||||||
|
print("self.conActRange = ", self.conActRange)
|
||||||
|
print("self.conOutputSize = ", self.conOutputSize)
|
||||||
|
|
||||||
if loadModelDir == None:
|
# config
|
||||||
|
print("---------thisPPO config---------")
|
||||||
|
print("self.NNShape = ", self.NNShape)
|
||||||
|
print("self.criticLR = ", self.criticLR)
|
||||||
|
print("self.actorLR = ", self.actorLR)
|
||||||
|
print("self.gamma = ", self.gamma)
|
||||||
|
print("self.lmbda = ", self.lmbda)
|
||||||
|
print("self.clipRange = ", self.clipRange)
|
||||||
|
print("self.entropyWeight = ", self.entropyWeight)
|
||||||
|
print("self.trainEpochs = ", self.trainEpochs)
|
||||||
|
print("self.saveDir = ", self.saveDir)
|
||||||
|
print("self.loadModelDir = ", self.loadModelDir)
|
||||||
|
|
||||||
|
# load NN or not
|
||||||
|
if self.loadModelDir is None:
|
||||||
# critc NN
|
# critc NN
|
||||||
self.critic = self.buildCriticNet(self.stateSize, 1, compileModel=True)
|
self.critic = self.buildCriticNet(self.stateSize, 1, compileModel=True)
|
||||||
# actor NN
|
# actor NN
|
||||||
self.actor = self.buildActorNet(self.stateSize, self.conActRange, compileModel = True)
|
self.actor = self.buildActorNet(self.stateSize, compileModel=True)
|
||||||
|
print("---------Actor Model Create Success---------")
|
||||||
|
self.actor.summary()
|
||||||
|
print("---------Critic Model Create Success---------")
|
||||||
|
self.critic.summary()
|
||||||
else:
|
else:
|
||||||
# critc NN
|
# critc NN
|
||||||
self.critic = self.buildCriticNet(self.stateSize, 1, compileModel=True)
|
self.critic = self.buildCriticNet(self.stateSize, 1, compileModel=True)
|
||||||
# actor NN
|
# actor NN
|
||||||
self.actor = self.buildActorNet(self.stateSize, self.conActRange, compileModel=True)
|
self.actor = self.buildActorNet(self.stateSize, compileModel=True)
|
||||||
# load weight to Critic&Actor NN
|
# load weight to Critic&Actor NN
|
||||||
self.loadWeightToModels(loadModelDir)
|
self.loadWeightToModels(self.loadModelDir)
|
||||||
|
print("---------Actor Model Load Success---------")
|
||||||
|
self.actor.summary()
|
||||||
|
print("---------Critic Model Load Success---------")
|
||||||
|
self.critic.summary()
|
||||||
|
|
||||||
# Build Net
|
# Build Net
|
||||||
def buildActorNet(self, inputSize, continuousActionRange,compileModel):
|
def buildActorNet(self, inputSize: int, compileModel: bool):
|
||||||
"""build Actor Nueral Net and compile.Output:[disAct1,disAct2,disAct3,mu,sigma]
|
"""build Actor Nueral Net and compile.Output:[disAct1,disAct2,disAct3,mu,sigma]
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
inputSize (int): InputLayer Nueral size.
|
inputSize (int): InputLayer Nueral size.
|
||||||
continuousActionRange (foat): continuous Action's max Range.
|
compileModel (bool): compile Model or not.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
keras.Model: return Actor NN
|
keras.Model: return Actor NN
|
||||||
"""
|
"""
|
||||||
stateInput = layers.Input(shape=(inputSize,), name='stateInput')
|
# -----------Input Layers-----------
|
||||||
dense0 = layers.Dense(500, activation='relu',name='dense0',)(stateInput)
|
stateInput = layers.Input(shape=(inputSize,), name="stateInput")
|
||||||
dense1 = layers.Dense(200, activation='relu',name='dense1',)(dense0)
|
|
||||||
dense2 = layers.Dense(100, activation='relu', name='dense2')(dense1)
|
|
||||||
|
|
||||||
disAct1 = layers.Dense(3, activation='softmax',name='WSAction')(dense2) # WS
|
# -------Intermediate layers--------
|
||||||
disAct2 = layers.Dense(3, activation='softmax',name='ADAction')(dense2) # AD
|
interLayers = []
|
||||||
disAct3 = layers.Dense(2, activation='softmax',name='ShootAction')(dense2) # Mouse shoot
|
interLayersIndex = 0
|
||||||
mu = continuousActionRange * layers.Dense(1, activation='tanh', name='muOut')(dense2) # mu,既正态分布mean
|
for neuralUnit in self.NNShape:
|
||||||
sigma = 1e-8 + layers.Dense(1, activation='softplus',name='sigmaOut')(dense2) # sigma,既正态分布
|
thisLayerName = "dense" + str(interLayersIndex)
|
||||||
# musig = layers.concatenate([mu,sigma],name = 'musig')
|
if interLayersIndex == 0:
|
||||||
totalOut = layers.concatenate(
|
interLayers.append(
|
||||||
[disAct1, disAct2, disAct3, mu, sigma], name='totalOut') # package
|
layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(stateInput)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
interLayers.append(
|
||||||
|
layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(interLayers[-1])
|
||||||
|
)
|
||||||
|
interLayersIndex += 1
|
||||||
|
|
||||||
|
# ----------Output Layers-----------
|
||||||
|
outputLayersList = []
|
||||||
|
if self.disActSize != 0:
|
||||||
|
# while NN have discrete action output.
|
||||||
|
disActIndex = 0
|
||||||
|
for thisDisActDepth in self.disActShape:
|
||||||
|
thisDisActName = "disAct" + str(disActIndex)
|
||||||
|
outputLayersList.append(
|
||||||
|
layers.Dense(thisDisActDepth, activation="softmax", name=thisDisActName)(
|
||||||
|
interLayers[-1]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
disActIndex += 1
|
||||||
|
if self.conActSize != 0:
|
||||||
|
# while NN have continuous action output.
|
||||||
|
mu = tf.multiply(
|
||||||
|
layers.Dense(1, activation="tanh", name="muOut")(interLayers[-1]), self.conActRange
|
||||||
|
) # mu,既正态分布位置参数
|
||||||
|
sigma = tf.add(
|
||||||
|
layers.Dense(1, activation="softplus", name="sigmaOut")(interLayers[-1]), EPS
|
||||||
|
) # sigma,既正态分布尺度参数
|
||||||
|
outputLayersList.append(mu)
|
||||||
|
outputLayersList.append(sigma)
|
||||||
|
totalOut = layers.concatenate(outputLayersList, name="totalOut") # package
|
||||||
|
|
||||||
|
# ----------Model Compile-----------
|
||||||
model = keras.Model(inputs=stateInput, outputs=totalOut)
|
model = keras.Model(inputs=stateInput, outputs=totalOut)
|
||||||
#actorOPT = optimizers.Adam(learning_rate = self.actorLR)
|
if compileModel: # Compile Model
|
||||||
if compileModel:
|
actorOPT = optimizers.Adam(learning_rate=self.actorLR)
|
||||||
actorOPT = RAdam(self.actorLR)
|
|
||||||
model.compile(optimizer=actorOPT, loss=self.aLoss())
|
model.compile(optimizer=actorOPT, loss=self.aLoss())
|
||||||
return model
|
return model
|
||||||
|
|
||||||
def buildCriticNet(self, inputSize, outputSize,compileModel):
|
def buildCriticNet(self, inputSize: int, outputSize: int, compileModel: bool):
|
||||||
"""build Critic Nueral Net and compile.Output:[Q]
|
"""build Critic Nueral Net and compile.Output:[Q]
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
inputSize (int): InputLayer Neural Size
|
inputSize (int): input size
|
||||||
outputSize (float): Q size
|
outputSize (int): output size
|
||||||
|
compileModel (bool): compile Model or not.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
keras.Model: return Critic NN
|
keras.Model: return Critic NN
|
||||||
"""
|
"""
|
||||||
stateInput = keras.Input(shape=(inputSize,))
|
# -----------Input Layers-----------
|
||||||
dense0 = layers.Dense(500, activation='relu',
|
stateInput = keras.Input(shape=(inputSize,), name="stateInput")
|
||||||
name='dense0',)(stateInput)
|
|
||||||
dense1 = layers.Dense(200, activation='relu')(dense0)
|
# -------Intermediate layers--------
|
||||||
dense2 = layers.Dense(100, activation='relu')(dense1)
|
interLayers = []
|
||||||
output = layers.Dense(outputSize)(dense2)
|
interLayersIndex = 0
|
||||||
|
for neuralUnit in self.NNShape:
|
||||||
|
thisLayerName = "dense" + str(interLayersIndex)
|
||||||
|
if interLayersIndex == 0:
|
||||||
|
interLayers.append(
|
||||||
|
layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(stateInput)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
interLayers.append(
|
||||||
|
layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(interLayers[-1])
|
||||||
|
)
|
||||||
|
interLayersIndex += 1
|
||||||
|
|
||||||
|
# ----------Output Layers-----------
|
||||||
|
output = layers.Dense(outputSize, activation=None)(interLayers[-1])
|
||||||
|
|
||||||
|
# ----------Model Compile-----------
|
||||||
model = keras.Model(inputs=stateInput, outputs=output)
|
model = keras.Model(inputs=stateInput, outputs=output)
|
||||||
if compileModel:
|
if compileModel:
|
||||||
criticOPT = optimizers.Adam(learning_rate=self.criticLR)
|
criticOPT = optimizers.Adam(learning_rate=self.criticLR)
|
||||||
@ -110,36 +215,50 @@ class PPO(object):
|
|||||||
return model
|
return model
|
||||||
|
|
||||||
# loss Function
|
# loss Function
|
||||||
|
# critic loss
|
||||||
def cLoss(self):
|
def cLoss(self):
|
||||||
"""Critic Loss function
|
"""Critic Loss function"""
|
||||||
"""
|
|
||||||
def loss(y_true, y_pred):
|
def loss(y_true, y_pred):
|
||||||
# y_true: discountedR
|
# y_true: discountedR
|
||||||
# y_pred: critcV = model.predict(states)
|
# y_pred: critcV = model.predict(states)
|
||||||
|
|
||||||
advantage = y_true - y_pred # TD error
|
adv = y_true - y_pred # TD error
|
||||||
loss = tf.reduce_mean(tf.square(advantage))
|
loss = tf.reduce_mean(tf.square(adv))
|
||||||
return loss
|
|
||||||
return loss
|
return loss
|
||||||
|
|
||||||
|
return loss
|
||||||
|
|
||||||
|
# actor loss
|
||||||
def aLoss(self):
|
def aLoss(self):
|
||||||
def getDiscreteALoss(nowProbs,oldProbs,advantage):
|
"""Actor Loss function"""
|
||||||
|
|
||||||
|
def getDiscreteALoss(nowProbs, oldProbs, disOneHotAct, actShape, advantage):
|
||||||
"""get Discrete Action Loss
|
"""get Discrete Action Loss
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
nowProbs (tf.constant): (length,actionSize)
|
nowProbs (tf.constant): (length,actionProbSize)
|
||||||
oldProbs (tf.constant): (length,actionSize)
|
oldProbs (tf.constant): (length,actionProbSize)
|
||||||
advantage (tf.constant): (length,)
|
advantage (tf.constant): (length,)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tf.constant: (length,)
|
tf.constant: (length,)
|
||||||
"""
|
"""
|
||||||
entropy = tf.reduce_mean(tf.math.multiply(nowProbs,tf.math.log(nowProbs+1e-6)))
|
entropy = tf.negative(
|
||||||
ratio = tf.math.divide(nowProbs,oldProbs+1e-6)
|
tf.reduce_mean(tf.math.multiply(nowProbs, tf.math.log(nowProbs + EPS)))
|
||||||
value = tf.math.multiply(ratio,tf.expand_dims(advantage,axis = 1))
|
)
|
||||||
clipRatio = tf.clip_by_value(ratio,1. - self.EPSILON,1.+self.EPSILON)
|
nowSingleProbs = tf.reduce_mean(tf.multiply(nowProbs, disOneHotAct), axis=1)
|
||||||
clipValue = tf.math.multiply(clipRatio,tf.expand_dims(advantage,axis = 1))
|
nowSingleProbs = tf.multiply(nowSingleProbs, actShape)
|
||||||
loss = -tf.reduce_mean(tf.math.minimum(value,clipValue)) + self.entropyWeight * entropy
|
oldSingleProbs = tf.reduce_mean(tf.multiply(oldProbs, disOneHotAct), axis=1)
|
||||||
|
oldSingleProbs = tf.multiply(oldSingleProbs, actShape)
|
||||||
|
ratio = tf.math.divide(nowSingleProbs, oldSingleProbs + EPS)
|
||||||
|
value = tf.math.multiply(ratio, advantage)
|
||||||
|
clipRatio = tf.clip_by_value(ratio, 1.0 - self.clipRange, 1.0 + self.clipRange)
|
||||||
|
clipValue = tf.math.multiply(clipRatio, advantage)
|
||||||
|
loss = tf.math.negative(
|
||||||
|
tf.reduce_mean(tf.math.minimum(value, clipValue))
|
||||||
|
- tf.multiply(self.entropyWeight, entropy)
|
||||||
|
)
|
||||||
return loss
|
return loss
|
||||||
|
|
||||||
def getContinuousALoss(musig, actions, oldProbs, advantage):
|
def getContinuousALoss(musig, actions, oldProbs, advantage):
|
||||||
@ -159,103 +278,286 @@ class PPO(object):
|
|||||||
dist = tfp.distributions.Normal(mu, sigma)
|
dist = tfp.distributions.Normal(mu, sigma)
|
||||||
|
|
||||||
nowProbs = dist.prob(actions)
|
nowProbs = dist.prob(actions)
|
||||||
ratio = tf.math.divide(nowProbs,oldProbs+1e-6)
|
|
||||||
entropy = tf.reduce_mean(dist.entropy())
|
entropy = tf.reduce_mean(dist.entropy())
|
||||||
|
|
||||||
value = tf.math.multiply(ratio,tf.expand_dims(advantage,axis = 1))
|
ratio = tf.math.divide(nowProbs, oldProbs + EPS)
|
||||||
clipValue = tf.clip_by_value(ratio,1. - self.EPSILON,1.+self.EPSILON) * advantage
|
value = tf.math.multiply(ratio, advantage)
|
||||||
loss = -tf.reduce_mean(tf.math.minimum(value,clipValue)) + self.entropyWeight * entropy
|
clipRatio = tf.clip_by_value(ratio, 1.0 - self.clipRange, 1.0 + self.clipRange)
|
||||||
|
clipValue = tf.math.multiply(clipRatio, advantage)
|
||||||
|
loss = tf.negative(
|
||||||
|
tf.reduce_mean(tf.math.minimum(value, clipValue))
|
||||||
|
- tf.multiply(self.entropyWeight, entropy)
|
||||||
|
)
|
||||||
return loss
|
return loss
|
||||||
|
|
||||||
def loss(y_true, y_pred):
|
def loss(y_true, y_pred):
|
||||||
# y_true: [[disAct1, disAct2, disAct3, mu, sigma]]
|
# y_true: [[disActProb..., conActProbs..., disOneHotActs..., conAct..., advantage]]
|
||||||
# y_pred: muSigma = self.actor(state) =
|
# y_pred: [[disActProb..., mu, sigma...]]
|
||||||
# [[disAct1, disAct2, disAct3, mu, sigma]]
|
totalALoss = 0
|
||||||
oldDisProbs = y_true[:,0:self.disOutputSize]
|
|
||||||
oldConMusigs = y_true[:,self.disOutputSize:self.disOutputSize+self.conActSize]
|
|
||||||
conActions = y_true[:,self.disOutputSize+self.conActSize:self.disOutputSize+(self.conActSize*2)]
|
|
||||||
advantage = y_true[:,-1]
|
|
||||||
|
|
||||||
nowDisProbs = y_pred[:,0:self.disOutputSize] # [disAct1, disAct2, disAct3]
|
|
||||||
nowConMusigs = y_pred[:,self.disOutputSize:] #[musig1,musig2]
|
|
||||||
|
|
||||||
totalALoss = tf.constant([0.])
|
|
||||||
totalActionNum = 0
|
totalActionNum = 0
|
||||||
|
advantage = tf.expand_dims(y_true[:, -1], axis=1)
|
||||||
|
|
||||||
# for nowProb,oldProb in zip(tf.transpose(nowDisProbs,perm=[1,0,2]),tf.transpose(oldDisProbs,perm=[1,0,2])):
|
if self.disActSize != 0:
|
||||||
|
# while NN have discrete action output.
|
||||||
|
oldDisProbs = y_true[:, 0 : self.disOutputSize]
|
||||||
|
nowDisProbs = y_pred[:, 0 : self.disOutputSize] # [disAct1, disAct2, disAct3]
|
||||||
|
disOneHotActs = y_true[
|
||||||
|
:,
|
||||||
|
self.disOutputSize
|
||||||
|
+ self.conActSize : self.disOutputSize
|
||||||
|
+ self.conActSize
|
||||||
|
+ self.disOutputSize,
|
||||||
|
]
|
||||||
lastDisActShape = 0
|
lastDisActShape = 0
|
||||||
for shape in self.disActShape:
|
for thisShape in self.disActShape:
|
||||||
thisNowDisProbs = nowDisProbs[:,lastDisActShape:lastDisActShape+shape]
|
thisNowDisProbs = nowDisProbs[:, lastDisActShape : lastDisActShape + thisShape]
|
||||||
thisOldDisProbs = oldDisProbs[:,lastDisActShape:lastDisActShape+shape]
|
thisOldDisProbs = oldDisProbs[:, lastDisActShape : lastDisActShape + thisShape]
|
||||||
discreteALoss = getDiscreteALoss(thisNowDisProbs,thisOldDisProbs,advantage)
|
thisDisOneHotActs = disOneHotActs[
|
||||||
lastDisActShape += shape
|
:, lastDisActShape : lastDisActShape + thisShape
|
||||||
|
]
|
||||||
|
discreteALoss = getDiscreteALoss(
|
||||||
|
thisNowDisProbs, thisOldDisProbs, thisDisOneHotActs, thisShape, advantage
|
||||||
|
)
|
||||||
|
lastDisActShape += thisShape
|
||||||
totalALoss += discreteALoss
|
totalALoss += discreteALoss
|
||||||
totalActionNum += 1
|
totalActionNum += 1.0
|
||||||
# for nowConMusig,conAction,oldPiProb in zip(tf.transpose(nowConMusigs,perm=[1,0,2]),conActions,oldPiProbs):
|
if self.conActSize != 0:
|
||||||
|
# while NN have continuous action output.
|
||||||
|
oldConProbs = y_true[:, self.disOutputSize : self.disOutputSize + self.conActSize]
|
||||||
|
conActions = y_true[
|
||||||
|
:,
|
||||||
|
self.disOutputSize
|
||||||
|
+ self.conActSize : self.disOutputSize
|
||||||
|
+ self.conActSize
|
||||||
|
+ self.conActSize,
|
||||||
|
]
|
||||||
|
nowConMusigs = y_pred[:, self.disOutputSize :] # [musig1,musig2]
|
||||||
lastConAct = 0
|
lastConAct = 0
|
||||||
for act in range(self.conActSize):
|
for conAct in range(self.conActSize):
|
||||||
thisNowConMusig = nowConMusigs[:,lastConAct:lastConAct+((act+1)*2)]
|
thisNowConMusig = nowConMusigs[:, lastConAct : lastConAct + self.muSigSize]
|
||||||
thisOldConMusig = oldConMusigs[:,lastConAct:lastConAct+((act+1)*2)]
|
thisOldConProb = oldConProbs[:, conAct : conAct + 1]
|
||||||
thisConAction = conActions[:,act]
|
thisConAction = conActions[:, conAct]
|
||||||
continuousAloss = getContinuousALoss(thisNowConMusig,thisConAction,thisOldConMusig,advantage)
|
continuousAloss = getContinuousALoss(
|
||||||
|
thisNowConMusig, thisConAction, thisOldConProb, advantage
|
||||||
|
)
|
||||||
totalALoss += continuousAloss
|
totalALoss += continuousAloss
|
||||||
totalActionNum += 1
|
totalActionNum += 1.0
|
||||||
|
lastConAct += self.muSigSize
|
||||||
loss = tf.divide(totalALoss, totalActionNum)
|
loss = tf.divide(totalALoss, totalActionNum)
|
||||||
return loss
|
return loss
|
||||||
|
|
||||||
return loss
|
return loss
|
||||||
|
|
||||||
# get Action&V
|
# get Actions&values
|
||||||
def chooseAction(self, state):
|
def chooseAction(self, state: ndarray):
|
||||||
"""Agent choose action to take
|
"""Agent choose action to take
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
state (np.array): enviroment state
|
state (ndarray): enviroment state
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.array:
|
np.array:
|
||||||
disAct1,
|
actions,
|
||||||
discreteAction1
|
actions list,2dims like [[0],[1],[1.5]]
|
||||||
disAct2,
|
|
||||||
discreteAction2
|
|
||||||
disAct3,
|
|
||||||
discreteAction3
|
|
||||||
conAction,
|
|
||||||
continuousAction
|
|
||||||
predictResult,
|
predictResult,
|
||||||
actor NN predict Result output
|
actor NN predict Result output
|
||||||
"""
|
"""
|
||||||
# let actor choose action,use the normal distribution
|
# let actor choose action,use the normal distribution
|
||||||
# state = np.expand_dims(state,0)
|
# state = np.expand_dims(state,0)
|
||||||
|
|
||||||
# check state dimension is [1,statesize]
|
# check state dimension is [stateNum,statesize]
|
||||||
if state.ndim != 2:
|
if state.ndim != 2:
|
||||||
state = state.reshape([1,self.stateSize])
|
stateNum = int(len(state) / self.stateSize)
|
||||||
|
state = state.reshape([stateNum, self.stateSize])
|
||||||
predictResult = self.actor(state) # get predict result [[disAct1, disAct2, disAct3, musig]]
|
predictResult = self.actor(state) # get predict result [[disAct1, disAct2, disAct3, musig]]
|
||||||
predictResult = predictResult.numpy()
|
# print("predictResult",predictResult)
|
||||||
disAct1Prob = predictResult[0][0:3]
|
# predictResult = predictResult.numpy()
|
||||||
disAct2Prob = predictResult[0][3:6]
|
actions = []
|
||||||
disAct3Prob = predictResult[0][6:8]
|
if self.disActSize != 0:
|
||||||
mu = predictResult[0][8]
|
# while NN have discrete action output.
|
||||||
sigma = predictResult[0][9]
|
lastDisActShape = 0
|
||||||
if math.isnan(mu) or math.isnan(sigma):
|
for shape in self.disActShape:
|
||||||
|
thisDisActProbs = predictResult[:, lastDisActShape : lastDisActShape + shape]
|
||||||
|
dist = tfp.distributions.Categorical(probs=thisDisActProbs, dtype=tf.float32)
|
||||||
|
action = int(dist.sample().numpy()[0])
|
||||||
|
# action = np.argmax(thisDisActProbs)
|
||||||
|
actions.append(action)
|
||||||
|
lastDisActShape += shape
|
||||||
|
if self.conActSize != 0:
|
||||||
|
# while NN have continuous action output.
|
||||||
|
lastConAct = 0
|
||||||
|
for actIndex in range(self.conActSize):
|
||||||
|
thisMu = predictResult[:, self.disOutputSize + lastConAct]
|
||||||
|
thisSig = predictResult[:, self.disOutputSize + lastConAct + 1]
|
||||||
|
if math.isnan(thisMu) or math.isnan(thisSig):
|
||||||
# check mu or sigma is nan
|
# check mu or sigma is nan
|
||||||
print("mu or sigma is nan")
|
print("chooseAction:mu or sigma is nan")
|
||||||
|
thisDist = np.random.normal(loc=thisMu, scale=thisSig)
|
||||||
|
actions.append(np.clip(thisDist, -self.conActRange, self.conActRange))
|
||||||
|
lastConAct += 2
|
||||||
|
return actions, predictResult
|
||||||
|
|
||||||
disAct1 = np.argmax(disAct1Prob) # WS 0 or 1 or 2
|
def trainCritcActor(
|
||||||
disAct2 = np.argmax(disAct2Prob) # AD 0 or 1 or 2
|
self,
|
||||||
disAct3 = np.argmax(disAct3Prob) # mouse shoot 0 or 1
|
states: ndarray,
|
||||||
normDist = np.random.normal(loc=mu, scale=sigma) # normalDistribution
|
oldActorResult: ndarray,
|
||||||
conAction = np.clip(normDist, -self.conActRange,
|
actions: ndarray,
|
||||||
self.conActRange) # 在正态分布中随机get一个action
|
rewards: ndarray,
|
||||||
return disAct1, disAct2, disAct3, conAction, predictResult
|
dones: ndarray,
|
||||||
|
nextState: ndarray,
|
||||||
|
epochs: int = None,
|
||||||
|
):
|
||||||
|
"""train critic&actor use PPO ways
|
||||||
|
|
||||||
def getCriticV(self, state):
|
Args:
|
||||||
|
states (ndarray): states
|
||||||
|
oldActorResult (ndarray): actor predict result
|
||||||
|
actions (ndarray): predicted actions include both discrete actions and continuous actions
|
||||||
|
rewards (ndarray): rewards from enviroment
|
||||||
|
dones (ndarray): dones from enviroment
|
||||||
|
nextState (ndarray): next state from enviroment
|
||||||
|
epochs (int, optional): train epochs,default to ppoConfig. Defaults to None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tf.constant: criticLoss, actorLoss
|
||||||
|
"""
|
||||||
|
|
||||||
|
if epochs == None:
|
||||||
|
epochs = self.trainEpochs
|
||||||
|
criticValues = self.getCriticV(state=states)
|
||||||
|
discountedR = self.discountReward(nextState, criticValues, dones, rewards)
|
||||||
|
advantage = self.getGAE(discountedR, criticValues)
|
||||||
|
|
||||||
|
criticLoss = self.trainCritic(states, discountedR, epochs)
|
||||||
|
actorLoss = self.trainActor(states, oldActorResult, actions, advantage, epochs)
|
||||||
|
# print("A_Loss:", actorLoss, "C_Loss:", criticLoss)
|
||||||
|
return criticLoss, actorLoss
|
||||||
|
|
||||||
|
def trainCritic(self, states: ndarray, discountedR: ndarray, epochs: int = None):
|
||||||
|
"""critic NN trainning function
|
||||||
|
|
||||||
|
Args:
|
||||||
|
states (ndarray): states
|
||||||
|
discountedR (ndarray): discounted rewards
|
||||||
|
epochs (int, optional): train epochs,default to ppoConfig. Defaults to None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tf.constant: all critic losses
|
||||||
|
"""
|
||||||
|
if epochs == None:
|
||||||
|
epochs = self.trainEpochs
|
||||||
|
his = self.critic.fit(x=states, y=discountedR, epochs=epochs, verbose=0)
|
||||||
|
return his.history["loss"]
|
||||||
|
|
||||||
|
def trainActor(
|
||||||
|
self,
|
||||||
|
states: ndarray,
|
||||||
|
oldActorResult: ndarray,
|
||||||
|
actions: ndarray,
|
||||||
|
advantage: ndarray,
|
||||||
|
epochs: int = None,
|
||||||
|
):
|
||||||
|
"""actor NN trainning function
|
||||||
|
|
||||||
|
Args:
|
||||||
|
states (ndarray): states
|
||||||
|
oldActorResult (ndarray): actor predict results
|
||||||
|
actions (ndarray): acotor predict actions
|
||||||
|
advantage (ndarray): GAE advantage
|
||||||
|
epochs (int, optional): train epochs,default to ppoConfig. Defaults to None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tf.constant: all actor losses
|
||||||
|
"""
|
||||||
|
# Trian Actor
|
||||||
|
# states: Buffer States
|
||||||
|
# actions: Buffer Actions
|
||||||
|
# discountedR: Discounted Rewards
|
||||||
|
# Epochs: just Epochs
|
||||||
|
if epochs == None:
|
||||||
|
epochs = self.trainEpochs
|
||||||
|
actions = np.asarray(actions, dtype=np.float32)
|
||||||
|
|
||||||
|
disActions = actions[:, 0 : self.disActSize]
|
||||||
|
conActions = actions[:, self.disActSize :]
|
||||||
|
oldDisProbs = oldActorResult[:, 0 : self.disOutputSize] # [disAct1, disAct2, disAct3]
|
||||||
|
oldConMusigs = oldActorResult[:, self.disOutputSize :] # [musig1,musig2]
|
||||||
|
if self.disActSize != 0:
|
||||||
|
disOneHotActs = self.getOneHotActs(disActions)
|
||||||
|
if self.conActSize != 0:
|
||||||
|
# while NN have discrete6 & continuous actions output.
|
||||||
|
oldPiProbs = self.conProb(oldConMusigs[:, 0], oldConMusigs[:, 1], conActions)
|
||||||
|
# pack [oldDisProbs,oldPiProbs,conActions,advantage] as y_true
|
||||||
|
y_true = np.hstack((oldDisProbs, oldPiProbs, disOneHotActs, conActions, advantage))
|
||||||
|
else:
|
||||||
|
# while NN have only discrete actions output.
|
||||||
|
# pack [oldDisProbs,advantage] as y_true
|
||||||
|
y_true = np.hstack((oldDisProbs, disOneHotActs, advantage))
|
||||||
|
else:
|
||||||
|
if self.conActSize != 0:
|
||||||
|
# while NN have only continuous action output.
|
||||||
|
oldPiProbs = self.conProb(oldConMusigs[:, 0], oldConMusigs[:, 1], conActions)
|
||||||
|
# pack [oldPiProbs,conActions,advantage] as y_true
|
||||||
|
y_true = np.hstack((oldPiProbs, conActions, advantage))
|
||||||
|
else:
|
||||||
|
print("trainActor:disActSize & conActSize error")
|
||||||
|
time.sleep(999999)
|
||||||
|
# assembly Actions history
|
||||||
|
# train start
|
||||||
|
if np.any(tf.math.is_nan(y_true)):
|
||||||
|
print("y_true got nan")
|
||||||
|
print("y_true", y_true)
|
||||||
|
his = self.actor.fit(x=states, y=y_true, epochs=epochs, verbose=0)
|
||||||
|
if np.any(tf.math.is_nan(his.history["loss"])):
|
||||||
|
print("his.history['loss'] is nan!")
|
||||||
|
print(his.history["loss"])
|
||||||
|
return his.history["loss"]
|
||||||
|
|
||||||
|
def saveWeights(self, score: float):
|
||||||
|
"""save now NN's Weight. Use "models.save_weights" method.
|
||||||
|
Save as "tf" format "ckpt" file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
score (float): now score
|
||||||
|
"""
|
||||||
|
actor_save_dir = (
|
||||||
|
self.saveDir + datetime.datetime.now().strftime("%H%M%S") + "/actor/" + "actor.ckpt"
|
||||||
|
)
|
||||||
|
critic_save_dir = (
|
||||||
|
self.saveDir + datetime.datetime.now().strftime("%H%M%S") + "/critic/" + "critic.ckpt"
|
||||||
|
)
|
||||||
|
self.actor.save_weights(actor_save_dir, save_format="tf")
|
||||||
|
self.critic.save_weights(critic_save_dir, save_format="tf")
|
||||||
|
# create an empty file named as score to recored score
|
||||||
|
score_dir = (
|
||||||
|
self.saveDir + datetime.datetime.now().strftime("%H%M%S") + "/" + str(round(score))
|
||||||
|
)
|
||||||
|
scorefile = open(score_dir, "w")
|
||||||
|
scorefile.close()
|
||||||
|
print("Model's Weights Saved")
|
||||||
|
|
||||||
|
def loadWeightToModels(self, loadDir: str):
|
||||||
|
"""load NN Model. Use "models.load_weights()" method.
|
||||||
|
Load "tf" format "ckpt" file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
loadDir (str): Model dir
|
||||||
|
"""
|
||||||
|
actorDir = loadDir + "/actor/" + "actor.ckpt"
|
||||||
|
criticDir = loadDir + "/critic/" + "critic.ckpt"
|
||||||
|
self.actor.load_weights(actorDir)
|
||||||
|
self.critic.load_weights(criticDir)
|
||||||
|
|
||||||
|
print("++++++++++++++++++++++++++++++++++++")
|
||||||
|
print("++++++++++++Model Loaded++++++++++++")
|
||||||
|
print(loadDir)
|
||||||
|
print("++++++++++++++++++++++++++++++++++++")
|
||||||
|
|
||||||
|
def getCriticV(self, state: ndarray):
|
||||||
"""get Critic predict V value
|
"""get Critic predict V value
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
state (np.array): Env state
|
state (ndarray): Env state
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tensor: retrun Critic predict result
|
tensor: retrun Critic predict result
|
||||||
@ -263,41 +565,84 @@ class PPO(object):
|
|||||||
# if state.ndim < 2:
|
# if state.ndim < 2:
|
||||||
# state = np.expand_dims(state,0)
|
# state = np.expand_dims(state,0)
|
||||||
if state.ndim != 2:
|
if state.ndim != 2:
|
||||||
state = state.reshape([1,self.stateSize])
|
stateNum = int(len(state) / self.stateSize)
|
||||||
|
state = state.reshape([stateNum, self.stateSize])
|
||||||
return self.critic.predict(state)
|
return self.critic.predict(state)
|
||||||
|
|
||||||
def discountReward(self, nextState, rewards):
|
def discountReward(self, nextState: ndarray, values: ndarray, dones: ndarray, rewards: ndarray):
|
||||||
"""Discount future rewards
|
"""Discount future rewards
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
nextState (np.array): next Env state
|
nextState (ndarray): next Env state
|
||||||
rewards (np.array): reward list of this episode
|
values (ndarray): critic predict values
|
||||||
|
dones (ndarray): dones from enviroment
|
||||||
|
rewards (ndarray): reward list of this episode
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.array: discounted rewards list,same shape as rewards that input
|
ndarray: discounted rewards list,same shape as rewards that input
|
||||||
|
"""
|
||||||
"""
|
"""
|
||||||
# 降低未来的rewards
|
|
||||||
nextV = self.getCriticV(nextState)
|
nextV = self.getCriticV(nextState)
|
||||||
|
dones = 1 - dones
|
||||||
discountedRewards = []
|
discountedRewards = []
|
||||||
for r in rewards[::-1]:
|
for i in reversed(range(len(rewards))):
|
||||||
nextV = r + self.GAMMA*nextV
|
nextV = rewards[i] + dones[i] * self.gamma * nextV
|
||||||
discountedRewards.append(nextV)
|
discountedRewards.append(nextV)
|
||||||
discountedRewards.reverse() # \ESREVER/
|
discountedRewards.reverse() # reverse
|
||||||
discountedRewards = np.squeeze(discountedRewards)
|
discountedRewards = np.squeeze(discountedRewards)
|
||||||
discountedRewards = np.expand_dims(discountedRewards, axis=1)
|
discountedRewards = np.expand_dims(discountedRewards, axis=1)
|
||||||
# discountedRewards = np.array(discountedRewards)[:, np.newaxis]
|
# discountedRewards = np.array(discountedRewards)[:, np.newaxis]
|
||||||
return discountedRewards
|
return discountedRewards
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
nextV = self.getCriticV(nextState)
|
||||||
|
discountedRewards = []
|
||||||
|
for r in rewards[::-1]:
|
||||||
|
nextV = r + self.gamma * nextV
|
||||||
|
discountedRewards.append(nextV)
|
||||||
|
discountedRewards.reverse() # reverse
|
||||||
|
discountedRewards = np.squeeze(discountedRewards)
|
||||||
|
discountedRewards = np.expand_dims(discountedRewards, axis=1)
|
||||||
|
# discountedRewards = np.array(discountedRewards)[:, np.newaxis]
|
||||||
|
print(discountedRewards)
|
||||||
|
return discountedRewards
|
||||||
|
"""
|
||||||
|
g = 0
|
||||||
|
discountedRewards = []
|
||||||
|
lastValue = self.getCriticV(nextState)
|
||||||
|
values = np.append(values, lastValue, axis=0)
|
||||||
|
dones = 1 - dones
|
||||||
|
for i in reversed(range(len(rewards))):
|
||||||
|
delta = rewards[i] + self.gamma * values[i + 1] * dones[i] - values[i]
|
||||||
|
g = delta + self.gamma * self.lmbda * dones[i] * g
|
||||||
|
discountedRewards.append(g + values[i])
|
||||||
|
discountedRewards.reverse()
|
||||||
|
return np.asarray(discountedRewards)
|
||||||
|
|
||||||
def conProb(self, mu, sig, x):
|
def getGAE(self, discountedRewards: ndarray, values: ndarray):
|
||||||
|
"""compute GAE adcantage
|
||||||
|
|
||||||
|
Args:
|
||||||
|
discountedRewards (ndarray): discounted rewards
|
||||||
|
values (ndarray): critic predict values
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: GAE advantage
|
||||||
|
"""
|
||||||
|
advantage = discountedRewards - values
|
||||||
|
advantage = (advantage - np.mean(advantage)) / (np.std(advantage) + EPS)
|
||||||
|
return advantage
|
||||||
|
|
||||||
|
def conProb(self, mu: ndarray, sig: ndarray, x: ndarray):
|
||||||
"""calculate probability when x in Normal distribution(mu,sigma)
|
"""calculate probability when x in Normal distribution(mu,sigma)
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
mu (np,array): mu
|
mu (ndarray): mu
|
||||||
sig (np.array): sigma
|
sig (ndarray): sigma
|
||||||
x (np.array): x
|
x (ndarray): x
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.array: probabilities
|
ndarray: probability
|
||||||
"""
|
"""
|
||||||
# 获取在正态分布mu,sig下当取x值时的概率
|
# 获取在正态分布mu,sig下当取x值时的概率
|
||||||
# return shape : (length,1)
|
# return shape : (length,1)
|
||||||
@ -313,116 +658,58 @@ class PPO(object):
|
|||||||
# prob = dist*tf.exp(-tf.square(x-mu)/(2.*tf.square(sig)))
|
# prob = dist*tf.exp(-tf.square(x-mu)/(2.*tf.square(sig)))
|
||||||
return prob
|
return prob
|
||||||
|
|
||||||
def trainCritcActor(self, states, actions, rewards, nextState, criticEpochs, actorEpochs):
|
def getOneHotActs(self, disActions):
|
||||||
# Train ActorNN and CriticNN
|
"""one hot action encoder
|
||||||
# states: Buffer States
|
|
||||||
# actions: Buffer Actions
|
|
||||||
# rewards: Buffer Rewards,没有Discount处理
|
|
||||||
# nextState: 下一个单独state
|
|
||||||
# criticEpochs: just criticNN'Epochs
|
|
||||||
# acotrEpochs: just acotrNN'Epochs
|
|
||||||
discountedR = self.discountReward(nextState, rewards)
|
|
||||||
|
|
||||||
criticMeanLoss = self.trainCritic(states, discountedR, criticEpochs)
|
|
||||||
actorMeanLoss = self.trainActor(
|
|
||||||
states, actions, discountedR, actorEpochs)
|
|
||||||
print("A_Loss:", actorMeanLoss, "C_Loss:", criticMeanLoss)
|
|
||||||
return actorMeanLoss, criticMeanLoss
|
|
||||||
|
|
||||||
def trainCritic(self, states, discountedR, epochs):
|
|
||||||
# Trian Critic
|
|
||||||
# states: Buffer States
|
|
||||||
# discountedR: Discounted Rewards
|
|
||||||
# Epochs: just Epochs
|
|
||||||
|
|
||||||
# IDK why this should be list...It just work...
|
|
||||||
# If discountR in np.array type it will throw 'Failed to find data adapter that can handle'
|
|
||||||
# discountedR = discountedR.tolist()
|
|
||||||
his = self.critic.fit(x=states, y=discountedR,
|
|
||||||
epochs=epochs, verbose=0)
|
|
||||||
return np.mean(his.history['loss'])
|
|
||||||
|
|
||||||
def trainActor(self, states, actions, discountedR, epochs):
|
|
||||||
"""Actor NN trainning function
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
states (np.array): Env states
|
disActions (ndarray): discrete actions
|
||||||
actions (np.array): action history
|
|
||||||
discountedR (np.array): discountedR
|
|
||||||
epochs (int): epochs,how many time NN learning
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Average actor loss: this learning round's average actor loss
|
ndarray: one hot actions
|
||||||
"""
|
"""
|
||||||
# Trian Actor
|
actIndex = 0
|
||||||
# states: Buffer States
|
for thisShape in self.disActShape:
|
||||||
# actions: Buffer Actions
|
thisActs = disActions[:, actIndex]
|
||||||
# discountedR: Discounted Rewards
|
thisOneHotAct = tf.squeeze(tf.one_hot(thisActs, thisShape)).numpy()
|
||||||
# Epochs: just Epochs
|
if actIndex == 0:
|
||||||
|
oneHotActs = thisOneHotAct
|
||||||
|
else:
|
||||||
|
oneHotActs = np.append(oneHotActs, thisOneHotAct, axis=1)
|
||||||
|
actIndex += 1
|
||||||
|
return oneHotActs
|
||||||
|
|
||||||
states = np.asarray(states)
|
def getAverageEntropy(self, probs: ndarray):
|
||||||
actions = np.asarray(actions, dtype=np.float32)
|
"""get average dis&con ACT Entropys
|
||||||
# predict with old Actor NN
|
|
||||||
oldActorResult = self.actor.predict(states)
|
|
||||||
|
|
||||||
# assembly Actions history
|
|
||||||
disActions = actions[:,0:self.disActSize]
|
|
||||||
conActions = actions[:,self.disActSize:]
|
|
||||||
# assembly predictResult as old Actor's Result
|
|
||||||
oldDisProbs = oldActorResult[:,0:self.disOutputSize] # [disAct1, disAct2, disAct3]
|
|
||||||
oldConMusigs = oldActorResult[:,self.disOutputSize:] # [musig1,musig2]
|
|
||||||
oldPiProbs = self.conProb(oldConMusigs[:, 0], oldConMusigs[:, 1], conActions)
|
|
||||||
|
|
||||||
criticV = self.critic.predict(states)
|
|
||||||
advantage = copy.deepcopy(discountedR - criticV)
|
|
||||||
|
|
||||||
# pack [oldDisProbs,oldPiProbs,conActions,advantage] as y_true
|
|
||||||
y_true = np.hstack((oldDisProbs,oldPiProbs,conActions,advantage))
|
|
||||||
|
|
||||||
# train start
|
|
||||||
if np.any(tf.math.is_nan(y_true)):
|
|
||||||
print("y_true got nan")
|
|
||||||
print("oldConMusigs",oldConMusigs)
|
|
||||||
print("oldPiProbs",oldPiProbs)
|
|
||||||
print("conActions",conActions)
|
|
||||||
print("oldConMusigs",oldConMusigs)
|
|
||||||
his = self.actor.fit(x=states, y=y_true, epochs=epochs, verbose=0)
|
|
||||||
if np.any(tf.math.is_nan(his.history['loss'])):
|
|
||||||
print("his.history['loss'] is nan!")
|
|
||||||
print(his.history['loss'])
|
|
||||||
return np.mean(his.history['loss'])
|
|
||||||
|
|
||||||
def saveWeights(self,score = None):
|
|
||||||
"""save now NN's Weight. Use "models.save_weights" method.
|
|
||||||
Save as "tf" format "ckpt" file.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
score (int): now score
|
probs (ndarray): actor NN predict result
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float: average total entropy
|
||||||
|
list: discrete entropys
|
||||||
|
list: continuous entropys
|
||||||
"""
|
"""
|
||||||
actor_save_dir = self.saveDir+datetime.datetime.now().strftime("%H%M%S") + "/actor/" + "actor.ckpt"
|
discreteEntropys = []
|
||||||
critic_save_dir = self.saveDir+datetime.datetime.now().strftime("%H%M%S") + "/critic/" + "critic.ckpt"
|
continuousEntropys = []
|
||||||
self.actor.save_weights(actor_save_dir, save_format="tf")
|
if self.disActSize != 0:
|
||||||
self.critic.save_weights(critic_save_dir, save_format="tf")
|
disProbs = probs[:, 0 : self.disOutputSize]
|
||||||
if score != None:
|
lastDisActIndex = 0
|
||||||
# create an empty file named as score to recored score
|
for actShape in self.disActShape:
|
||||||
score_dir = self.saveDir+datetime.datetime.now().strftime("%H%M%S") + "/" + str(round(score))
|
thisDisProbs = disProbs[:, lastDisActIndex : lastDisActIndex + actShape]
|
||||||
scorefile = open(score_dir,'w')
|
lastDisActIndex += actShape
|
||||||
scorefile.close()
|
discreteEntropys.append(
|
||||||
print("Model's Weights Saved")
|
tf.negative(
|
||||||
|
tf.reduce_mean(
|
||||||
def loadWeightToModels(self,loadDir):
|
tf.math.multiply(thisDisProbs, tf.math.log(thisDisProbs + EPS))
|
||||||
"""load NN Model. Use "models.load_weights()" method.
|
)
|
||||||
Load "tf" format "ckpt" file.
|
)
|
||||||
|
)
|
||||||
Args:
|
if self.conActSize != 0:
|
||||||
loadDir (string): Model dir
|
conProbs = probs[:, self.disOutputSize :]
|
||||||
"""
|
conActIndex = 0
|
||||||
actorDir = loadDir + "/actor/" + "actor.ckpt"
|
for i in range(self.conActSize):
|
||||||
criticDir = loadDir + "/critic/" + "critic.ckpt"
|
thisConProbs = conProbs[:, conActIndex : conActIndex + 2]
|
||||||
self.actor.load_weights(actorDir)
|
conActIndex += 2
|
||||||
self.critic.load_weights(criticDir)
|
continuousEntropys.append(tf.reduce_mean(thisConProbs[:, 1]))
|
||||||
|
averageEntropy = np.mean([np.mean(discreteEntropys), np.mean(continuousEntropys)])
|
||||||
print("++++++++++++++++++++++++++++++++++++")
|
return averageEntropy, discreteEntropys, continuousEntropys
|
||||||
print("++++++++++++Model Loaded++++++++++++")
|
|
||||||
print(loadDir)
|
|
||||||
print("++++++++++++++++++++++++++++++++++++")
|
|
||||||
|
65
Aimbot-PPO-Python/PPOBuffer.py
Normal file
65
Aimbot-PPO-Python/PPOBuffer.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class PPOBuffer(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.states = []
|
||||||
|
self.actorProbs = []
|
||||||
|
self.actions = []
|
||||||
|
self.rewards = []
|
||||||
|
self.dones = []
|
||||||
|
print("√√√√√Buffer Initialized Success√√√√√")
|
||||||
|
|
||||||
|
def clearBuffer(self):
|
||||||
|
self.states = []
|
||||||
|
self.actorProbs = []
|
||||||
|
self.actions = []
|
||||||
|
self.rewards = []
|
||||||
|
self.dones = []
|
||||||
|
|
||||||
|
def getStates(self):
|
||||||
|
return self.standDims(np.asarray(self.states))
|
||||||
|
|
||||||
|
def getActorProbs(self):
|
||||||
|
return self.standDims(np.asarray(self.actorProbs))
|
||||||
|
|
||||||
|
def getActions(self):
|
||||||
|
return self.standDims(np.asarray(self.actions))
|
||||||
|
|
||||||
|
def getRewards(self):
|
||||||
|
return self.standDims(np.asarray(self.rewards))
|
||||||
|
|
||||||
|
def getDones(self):
|
||||||
|
return self.standDims(np.asarray(self.dones))
|
||||||
|
|
||||||
|
def saveState(self, state):
|
||||||
|
self.states.append(state)
|
||||||
|
|
||||||
|
def saveAction(self, action):
|
||||||
|
self.actions.append(action)
|
||||||
|
|
||||||
|
def saveReward(self, reward):
|
||||||
|
self.rewards.append(reward)
|
||||||
|
|
||||||
|
def standDims(self, data):
|
||||||
|
# standarlize data's dimension
|
||||||
|
if np.ndim(data) > 2:
|
||||||
|
return np.squeeze(data, axis=1)
|
||||||
|
elif np.ndim(data) < 2:
|
||||||
|
return np.expand_dims(data, axis=1)
|
||||||
|
else:
|
||||||
|
return np.asarray(data)
|
||||||
|
|
||||||
|
def saveBuffers(self, state, actorProb, action, reward, done):
|
||||||
|
self.states.append(state)
|
||||||
|
self.actorProbs.append(actorProb)
|
||||||
|
self.actions.append(action)
|
||||||
|
self.rewards.append(reward)
|
||||||
|
self.dones.append(done)
|
||||||
|
"""
|
||||||
|
print("self.states", self.states)
|
||||||
|
print("self.actions", self.actions)
|
||||||
|
print("self.rewards", self.rewards)
|
||||||
|
print("self.dones", self.dones)
|
||||||
|
print("self.values", self.values)
|
||||||
|
"""
|
15
Aimbot-PPO-Python/PPOConfig.py
Normal file
15
Aimbot-PPO-Python/PPOConfig.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
import datetime
|
||||||
|
from typing import NamedTuple, Optional
|
||||||
|
|
||||||
|
|
||||||
|
class PPOConfig(NamedTuple):
|
||||||
|
NNShape: list = [256, 256, 128]
|
||||||
|
actorLR: float = 2e-3 # Actor Net Learning
|
||||||
|
criticLR: float = 2e-3 # Critic Net Learning
|
||||||
|
gamma: float = 0.99
|
||||||
|
lmbda: float = 0.95
|
||||||
|
clipRange: float = 0.20
|
||||||
|
entropyWeight: float = 1e-2
|
||||||
|
trainEpochs: int = 8
|
||||||
|
saveDir: str = "PPO-Model/" + datetime.datetime.now().strftime("%m%d-%H%M") + "/"
|
||||||
|
loadModelDir: Optional[str] = None
|
58
Aimbot-PPO-Python/PPOHistoryRecorder.py
Normal file
58
Aimbot-PPO-Python/PPOHistoryRecorder.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
from turtle import color
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
class PPOHistory(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.meanRewards = []
|
||||||
|
self.entropys = []
|
||||||
|
self.actorLosses = []
|
||||||
|
self.criticLosses = []
|
||||||
|
|
||||||
|
def saveHis(self, rewards, entropys, aLosses, cLosses):
|
||||||
|
self.meanRewards.extend([rewards])
|
||||||
|
self.entropys.extend([entropys])
|
||||||
|
self.actorLosses.extend(aLosses)
|
||||||
|
self.criticLosses.extend(cLosses)
|
||||||
|
|
||||||
|
def drawHis(self):
|
||||||
|
plt.figure(figsize=(21, 13), facecolor="#011627")
|
||||||
|
ax = plt.subplot(2, 2, 1)
|
||||||
|
ax.set_facecolor("#011627")
|
||||||
|
ax.spines["top"].set_color("#c9d2df")
|
||||||
|
ax.spines["bottom"].set_color("#c9d2df")
|
||||||
|
ax.spines["left"].set_color("#c9d2df")
|
||||||
|
ax.spines["right"].set_color("#c9d2df")
|
||||||
|
ax.plot(
|
||||||
|
range(len(self.meanRewards)), self.meanRewards, color="#c9d2df", label="AverageRewards"
|
||||||
|
)
|
||||||
|
ax.set_title("meanRewards", color="#c9d2df")
|
||||||
|
ax = plt.subplot(2, 2, 2)
|
||||||
|
ax.set_facecolor("#011627")
|
||||||
|
ax.spines["top"].set_color("#c9d2df")
|
||||||
|
ax.spines["bottom"].set_color("#c9d2df")
|
||||||
|
ax.spines["left"].set_color("#c9d2df")
|
||||||
|
ax.spines["right"].set_color("#c9d2df")
|
||||||
|
ax.plot(range(len(self.entropys)), self.entropys, color="#c9d2df", label="AverageEntropys")
|
||||||
|
ax.set_title("entropys", color="#c9d2df")
|
||||||
|
ax = plt.subplot(2, 2, 3)
|
||||||
|
ax.set_facecolor("#011627")
|
||||||
|
ax.spines["top"].set_color("#c9d2df")
|
||||||
|
ax.spines["bottom"].set_color("#c9d2df")
|
||||||
|
ax.spines["left"].set_color("#c9d2df")
|
||||||
|
ax.spines["right"].set_color("#c9d2df")
|
||||||
|
ax.plot(
|
||||||
|
range(len(self.actorLosses)), self.actorLosses, color="#c9d2df", label="actorLosses"
|
||||||
|
)
|
||||||
|
ax.set_title("actorLosses", color="#c9d2df")
|
||||||
|
ax = plt.subplot(2, 2, 4)
|
||||||
|
ax.set_facecolor("#011627")
|
||||||
|
ax.spines["top"].set_color("#c9d2df")
|
||||||
|
ax.spines["bottom"].set_color("#c9d2df")
|
||||||
|
ax.spines["left"].set_color("#c9d2df")
|
||||||
|
ax.spines["right"].set_color("#c9d2df")
|
||||||
|
ax.plot(
|
||||||
|
range(len(self.criticLosses)), self.criticLosses, color="#c9d2df", label="criticLosses"
|
||||||
|
)
|
||||||
|
ax.set_title("criticLosses", color="#c9d2df")
|
||||||
|
plt.show()
|
@ -1,8 +1,8 @@
|
|||||||
import mlagents_envs
|
|
||||||
from mlagents_envs.base_env import ActionTuple
|
from mlagents_envs.base_env import ActionTuple
|
||||||
from mlagents_envs.environment import UnityEnvironment
|
from mlagents_envs.environment import UnityEnvironment
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from numpy import ndarray
|
||||||
|
|
||||||
|
|
||||||
class makeEnv(object):
|
class makeEnv(object):
|
||||||
@ -22,69 +22,71 @@ class makeEnv(object):
|
|||||||
self.BEHA_SPECS = self.env.behavior_specs
|
self.BEHA_SPECS = self.env.behavior_specs
|
||||||
self.BEHA_NAME = list(self.BEHA_SPECS)[0]
|
self.BEHA_NAME = list(self.BEHA_SPECS)[0]
|
||||||
self.SPEC = self.BEHA_SPECS[self.BEHA_NAME]
|
self.SPEC = self.BEHA_SPECS[self.BEHA_NAME]
|
||||||
self.OBSERVATION_SPECS = self.SPEC.observation_specs[
|
self.OBSERVATION_SPECS = self.SPEC.observation_specs[0] # observation spec
|
||||||
0
|
|
||||||
] # observation spec
|
|
||||||
self.ACTION_SPEC = self.SPEC.action_spec # action specs
|
self.ACTION_SPEC = self.SPEC.action_spec # action specs
|
||||||
|
|
||||||
self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size # 連続的な動作のSize
|
self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size # 連続的な動作のSize
|
||||||
|
self.DISCRETE_SHAPE = list(self.ACTION_SPEC.discrete_branches)
|
||||||
self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size # 離散的な動作のSize
|
self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size # 離散的な動作のSize
|
||||||
self.STATE_SIZE = (
|
self.STATE_SIZE = self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE # 環境観測データ数
|
||||||
self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE
|
|
||||||
) # 環境観測データ数
|
|
||||||
print("√√√√√Enviroment Initialized Success√√√√√")
|
print("√√√√√Enviroment Initialized Success√√√√√")
|
||||||
|
|
||||||
def step(
|
def step(
|
||||||
self,
|
self,
|
||||||
discreteActions=None,
|
actions: list,
|
||||||
continuousActions=None,
|
behaviorName: ndarray = None,
|
||||||
behaviorName=None,
|
trackedAgent: ndarray = None,
|
||||||
trackedAgent=None,
|
|
||||||
):
|
):
|
||||||
|
"""change ations list to ActionTuple then send it to enviroment
|
||||||
|
|
||||||
|
Args:
|
||||||
|
actions (list): PPO chooseAction output action list
|
||||||
|
behaviorName (ndarray, optional): behaviorName. Defaults to None.
|
||||||
|
trackedAgent (ndarray, optional): trackedAgentID. Defaults to None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: nextState, reward, done, loadDir, saveNow
|
||||||
|
"""
|
||||||
# take action to enviroment
|
# take action to enviroment
|
||||||
# return mextState,reward,done
|
# return mextState,reward,done
|
||||||
|
if self.DISCRETE_SIZE == 0:
|
||||||
|
# create empty discrete action
|
||||||
|
discreteActions = np.asarray([[0]])
|
||||||
|
else:
|
||||||
|
# create discrete action from actions list
|
||||||
|
discreteActions = np.asanyarray([actions[0:self.DISCRETE_SIZE]])
|
||||||
|
if self.CONTINUOUS_SIZE == 0:
|
||||||
|
# create empty continuous action
|
||||||
|
continuousActions = np.asanyarray([[0.0]])
|
||||||
|
else:
|
||||||
|
# create continuous actions from actions list
|
||||||
|
continuousActions = np.asanyarray(actions[self.DISCRETE_SIZE:])
|
||||||
|
|
||||||
# check if arg is include None or IS None
|
|
||||||
try:
|
|
||||||
isDisNone = discreteActions.any() is None
|
|
||||||
if discreteActions.all() is None:
|
|
||||||
print("step() Error!:discreteActions include None")
|
|
||||||
except:
|
|
||||||
isDisNone = True
|
|
||||||
try:
|
|
||||||
isConNone = continuousActions.any() is None
|
|
||||||
if continuousActions.all() is None:
|
|
||||||
print("step() Error!:continuousActions include None")
|
|
||||||
except:
|
|
||||||
isConNone = True
|
|
||||||
|
|
||||||
if isDisNone:
|
|
||||||
# if discreteActions is enpty just give nothing[[0]] to Enviroment
|
|
||||||
discreteActions = np.array([[0]], dtype=np.int)
|
|
||||||
if isConNone:
|
|
||||||
# if continuousActions is enpty just give nothing[[0]] to Enviroment
|
|
||||||
continuousActions = np.array([[0]], dtype=np.float)
|
|
||||||
if behaviorName is None:
|
if behaviorName is None:
|
||||||
behaviorName = self.BEHA_NAME
|
behaviorName = self.BEHA_NAME
|
||||||
if trackedAgent is None:
|
if trackedAgent is None:
|
||||||
trackedAgent = self.TRACKED_AGENT
|
trackedAgent = self.TRACKED_AGENT
|
||||||
|
|
||||||
# create actionTuple
|
# create actionTuple
|
||||||
thisActionTuple = ActionTuple(
|
thisActionTuple = ActionTuple(continuous=continuousActions, discrete=discreteActions)
|
||||||
continuous=continuousActions, discrete=discreteActions
|
|
||||||
)
|
|
||||||
# take action to env
|
# take action to env
|
||||||
self.env.set_actions(
|
self.env.set_actions(behavior_name=behaviorName, action=thisActionTuple)
|
||||||
behavior_name=behaviorName, action=thisActionTuple
|
|
||||||
)
|
|
||||||
self.env.step()
|
self.env.step()
|
||||||
# get nextState & reward & done after this action
|
# get nextState & reward & done after this action
|
||||||
nextState, reward, done, loadDir, saveNow = self.getSteps(
|
nextState, reward, done, loadDir, saveNow = self.getSteps(behaviorName, trackedAgent)
|
||||||
behaviorName, trackedAgent
|
|
||||||
)
|
|
||||||
return nextState, reward, done, loadDir, saveNow
|
return nextState, reward, done, loadDir, saveNow
|
||||||
|
|
||||||
def getSteps(self, behaviorName=None, trackedAgent=None):
|
def getSteps(self, behaviorName=None, trackedAgent=None):
|
||||||
|
"""get enviroment now observations.
|
||||||
|
Include State, Reward, Done, LoadDir, SaveNow
|
||||||
|
|
||||||
|
Args:
|
||||||
|
behaviorName (_type_, optional): behaviorName. Defaults to None.
|
||||||
|
trackedAgent (_type_, optional): trackedAgent. Defaults to None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: nextState, reward, done, loadDir, saveNow
|
||||||
|
"""
|
||||||
# get nextState & reward & done
|
# get nextState & reward & done
|
||||||
if behaviorName is None:
|
if behaviorName is None:
|
||||||
behaviorName = self.BEHA_NAME
|
behaviorName = self.BEHA_NAME
|
||||||
@ -94,25 +96,17 @@ class makeEnv(object):
|
|||||||
if trackedAgent is None:
|
if trackedAgent is None:
|
||||||
trackedAgent = self.TRACKED_AGENT
|
trackedAgent = self.TRACKED_AGENT
|
||||||
|
|
||||||
if (
|
if trackedAgent in decisionSteps: # ゲーム終了していない場合、環境状態がdecision_stepsに保存される
|
||||||
trackedAgent in decisionSteps
|
|
||||||
): # ゲーム終了していない場合、環境状態がdecision_stepsに保存される
|
|
||||||
nextState = decisionSteps[trackedAgent].obs[0]
|
nextState = decisionSteps[trackedAgent].obs[0]
|
||||||
nextState = np.reshape(
|
nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE])
|
||||||
nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
|
|
||||||
)
|
|
||||||
saveNow = nextState[0][-1]
|
saveNow = nextState[0][-1]
|
||||||
loadDir = nextState[0][-3:-1]
|
loadDir = nextState[0][-3:-1]
|
||||||
nextState = nextState[0][:-3]
|
nextState = nextState[0][:-3]
|
||||||
reward = decisionSteps[trackedAgent].reward
|
reward = decisionSteps[trackedAgent].reward
|
||||||
done = False
|
done = False
|
||||||
if (
|
if trackedAgent in terminalSteps: # ゲーム終了した場合、環境状態がterminal_stepsに保存される
|
||||||
trackedAgent in terminalSteps
|
|
||||||
): # ゲーム終了した場合、環境状態がterminal_stepsに保存される
|
|
||||||
nextState = terminalSteps[trackedAgent].obs[0]
|
nextState = terminalSteps[trackedAgent].obs[0]
|
||||||
nextState = np.reshape(
|
nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE])
|
||||||
nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
|
|
||||||
)
|
|
||||||
saveNow = nextState[0][-1]
|
saveNow = nextState[0][-1]
|
||||||
loadDir = nextState[0][-3:-1]
|
loadDir = nextState[0][-3:-1]
|
||||||
nextState = nextState[0][:-3]
|
nextState = nextState[0][:-3]
|
||||||
@ -121,9 +115,16 @@ class makeEnv(object):
|
|||||||
return nextState, reward, done, loadDir, saveNow
|
return nextState, reward, done, loadDir, saveNow
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
|
"""reset enviroment and get observations
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ndarray: nextState, reward, done, loadDir, saveNow
|
||||||
|
"""
|
||||||
self.env.reset()
|
self.env.reset()
|
||||||
nextState, reward, done, loadDir, saveNow = self.getSteps()
|
nextState, reward, done, loadDir, saveNow = self.getSteps()
|
||||||
return nextState, reward, done, loadDir, saveNow
|
return nextState, reward, done, loadDir, saveNow
|
||||||
|
|
||||||
def render(self):
|
def render(self):
|
||||||
|
"""render enviroment
|
||||||
|
"""
|
||||||
self.env.render()
|
self.env.render()
|
||||||
|
@ -1,29 +0,0 @@
|
|||||||
import numpy as np
|
|
||||||
|
|
||||||
class buffer(object):
|
|
||||||
def __init__(self):
|
|
||||||
self.states = []
|
|
||||||
self.actions = []
|
|
||||||
self.rewards = []
|
|
||||||
print("√√√√√Buffer Initialized Success√√√√√")
|
|
||||||
def clearBuffer(self):
|
|
||||||
self.states = []
|
|
||||||
self.actions = []
|
|
||||||
self.rewards = []
|
|
||||||
def getStates(self):
|
|
||||||
return np.asarray(self.states)
|
|
||||||
def getActions(self):
|
|
||||||
return np.asarray(self.actions)
|
|
||||||
def getRewards(self):
|
|
||||||
return np.asarray(self.rewards)
|
|
||||||
|
|
||||||
def saveState(self,state):
|
|
||||||
self.states.append(state)
|
|
||||||
def saveAction(self,action):
|
|
||||||
self.actions.append(action)
|
|
||||||
def saveReward(self,reward):
|
|
||||||
self.rewards.append(reward)
|
|
||||||
def saveBuffers(self,state,action,reward):
|
|
||||||
self.states.append(state)
|
|
||||||
self.actions.append(action)
|
|
||||||
self.rewards.append(reward)
|
|
@ -1,356 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 1,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import aimBotEnv\n",
|
|
||||||
"import PPO\n",
|
|
||||||
"import buffer\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"\n",
|
|
||||||
"import tensorflow as tf\n",
|
|
||||||
"import time\n",
|
|
||||||
"import datetime\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Attempts to allocate only the GPU memory needed for allocation\n",
|
|
||||||
"physical_devices = tf.config.list_physical_devices('GPU')\n",
|
|
||||||
"tf.config.experimental.set_memory_growth(physical_devices[0], True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 3,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Env\n",
|
|
||||||
"ENV_PATH = \"./Build-CloseEnemyCut/Aimbot-PPO\"\n",
|
|
||||||
"WORKER_ID = 1\n",
|
|
||||||
"BASE_PORT = 200\n",
|
|
||||||
"\n",
|
|
||||||
"MAX_EP = 1000\n",
|
|
||||||
"EP_LENGTH = 100000\n",
|
|
||||||
"GAMMA = 0.99 # discount future reward (UP?)\n",
|
|
||||||
"EPSILON = 0.2 # clip Ratio range[1-EPSILON,1+EPSILON]\n",
|
|
||||||
"ACTOR_LR = 1e-5 # LR\n",
|
|
||||||
"CRITIC_LR = 2e-5 # LR\n",
|
|
||||||
"BATCH = 256 # learning step\n",
|
|
||||||
"ACTOR_EPOCH = 15 # epoch\n",
|
|
||||||
"CRITIC_EPOCH = 15 # epoch\n",
|
|
||||||
"ENTROPY_WHEIGHT = 0.001 # sigma's entropy in Actor loss\n",
|
|
||||||
"ACTION_INTERVAL = 1 # take action every ACTION_INTERVAL steps\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"TRAIN = True\n",
|
|
||||||
"SAVE_DIR = \"PPO-Model/\" + datetime.datetime.now().strftime(\"%m%d%H%M\") + \"/\"\n",
|
|
||||||
"LOAD_DIR = None\n",
|
|
||||||
"\n",
|
|
||||||
"CTN_ACTION_RANGE = 10\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 4,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"√√√√√Enviroment Initialized Success√√√√√\n",
|
|
||||||
"√√√√√Buffer Initialized Success√√√√√\n",
|
|
||||||
"No loadDir specified,Create a New Model\n",
|
|
||||||
"CONTINUOUS_SIZE 1\n",
|
|
||||||
"DISCRETE_SIZE 5\n",
|
|
||||||
"STATE_SIZE 30\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"# initialize enviroment & buffer class\n",
|
|
||||||
"env = aimBotEnv.makeEnv(\n",
|
|
||||||
" envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT\n",
|
|
||||||
")\n",
|
|
||||||
"epBuffer = buffer.buffer()\n",
|
|
||||||
"\n",
|
|
||||||
"STATE_SIZE = env.STATE_SIZE\n",
|
|
||||||
"CONTINUOUS_SIZE = env.CONTINUOUS_SIZE\n",
|
|
||||||
"DISCRETE_SIZE = env.DISCRETE_SIZE\n",
|
|
||||||
"_, _, _, loadDir, _ = env.getSteps()\n",
|
|
||||||
"\n",
|
|
||||||
"# check load model or not\n",
|
|
||||||
"if np.any(loadDir == 0):\n",
|
|
||||||
" # create a new model\n",
|
|
||||||
" print(\"No loadDir specified,Create a New Model\")\n",
|
|
||||||
" LOAD_DIR = None\n",
|
|
||||||
"else:\n",
|
|
||||||
" # load model\n",
|
|
||||||
" loadDirDateSTR = str(int(loadDir[0]))\n",
|
|
||||||
" loadDirTimeSTR = str(int(loadDir[1]))\n",
|
|
||||||
" if len(loadDirDateSTR) != 8:\n",
|
|
||||||
" # fill lost 0 while converse float to string\n",
|
|
||||||
" for _ in range(8 - len(loadDirDateSTR)):\n",
|
|
||||||
" loadDirDateSTR = \"0\" + loadDirDateSTR\n",
|
|
||||||
" if len(loadDirTimeSTR) != 6:\n",
|
|
||||||
" # fill lost 0 while converse float to string\n",
|
|
||||||
" for _ in range(6 - len(loadDirTimeSTR)):\n",
|
|
||||||
" loadDirTimeSTR = \"0\" + loadDirTimeSTR\n",
|
|
||||||
" LOAD_DIR = \"PPO-Model/\" + loadDirDateSTR + \"/\" + loadDirTimeSTR\n",
|
|
||||||
" print(\"Load Model:\")\n",
|
|
||||||
" print(LOAD_DIR)\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"CONTINUOUS_SIZE\", CONTINUOUS_SIZE)\n",
|
|
||||||
"print(\"DISCRETE_SIZE\", DISCRETE_SIZE)\n",
|
|
||||||
"print(\"STATE_SIZE\", STATE_SIZE)\n",
|
|
||||||
"\n",
|
|
||||||
"disActShape = [3, 3, 2]\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 5,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def actToKey(disAct1,disAct2,disAct3,conAct):\n",
|
|
||||||
" kW = 0\n",
|
|
||||||
" kS = 0\n",
|
|
||||||
" kA = 0\n",
|
|
||||||
" kD = 0\n",
|
|
||||||
" mouseShoot = 0\n",
|
|
||||||
" if disAct1 == 0:\n",
|
|
||||||
" kW = 0\n",
|
|
||||||
" kS = 1\n",
|
|
||||||
" elif disAct1 == 1:\n",
|
|
||||||
" kW = 0\n",
|
|
||||||
" kS = 0\n",
|
|
||||||
" elif disAct1 == 2:\n",
|
|
||||||
" kW = 1\n",
|
|
||||||
" kS = 0\n",
|
|
||||||
" if disAct2 == 0:\n",
|
|
||||||
" kA = 0\n",
|
|
||||||
" kD = 1\n",
|
|
||||||
" elif disAct2 == 1:\n",
|
|
||||||
" kA = 0\n",
|
|
||||||
" kD = 0\n",
|
|
||||||
" elif disAct2 == 2:\n",
|
|
||||||
" kA = 1\n",
|
|
||||||
" kD = 0\n",
|
|
||||||
" mouseShoot = disAct3\n",
|
|
||||||
" return kW,kS,kA,kD,mouseShoot,conAct"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 6,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"EP 0 START\n",
|
|
||||||
"√√√√√Buffer Initialized Success√√√√√\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\core\\fromnumeric.py:3474: RuntimeWarning: Mean of empty slice.\n",
|
|
||||||
" return _methods._mean(a, axis=axis, dtype=dtype,\n",
|
|
||||||
"c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\core\\_methods.py:189: RuntimeWarning: invalid value encountered in double_scalars\n",
|
|
||||||
" ret = ret.dtype.type(ret / rcount)\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"A_Loss: 0.4477495511372884 C_Loss: 3.155759557088216\n",
|
|
||||||
"A_Loss: 0.14549287557601928 C_Loss: 0.5123071213563283\n",
|
|
||||||
"A_Loss: 0.055241942902406055 C_Loss: 0.13002794484297434\n",
|
|
||||||
"A_Loss: 0.057325509190559384 C_Loss: 0.11068039039770762\n",
|
|
||||||
"A_Loss: 0.04376962607105573 C_Loss: 0.03923700377345085\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"bestScore = 200.0\n",
|
|
||||||
"stopTrainCounter = 0\n",
|
|
||||||
"\n",
|
|
||||||
"totalRewardHis = []\n",
|
|
||||||
"totalActorLossHis = []\n",
|
|
||||||
"totalCriticLossHis = []\n",
|
|
||||||
"epHis = []\n",
|
|
||||||
"maxTotalReward = -99999999999\n",
|
|
||||||
"\n",
|
|
||||||
"for ep in range(MAX_EP):\n",
|
|
||||||
" print(\"EP \", ep, \" START\")\n",
|
|
||||||
" # first time run game\n",
|
|
||||||
" s, _, _, _, _ = env.reset()\n",
|
|
||||||
" if ep == 0:\n",
|
|
||||||
" epBuffer = buffer.buffer()\n",
|
|
||||||
" s = s.reshape([STATE_SIZE])\n",
|
|
||||||
" agent = PPO.PPO(\n",
|
|
||||||
" stateSize=STATE_SIZE,\n",
|
|
||||||
" disActShape=disActShape,\n",
|
|
||||||
" conActSize=1,\n",
|
|
||||||
" conActRange=CTN_ACTION_RANGE,\n",
|
|
||||||
" criticLR=CRITIC_LR,\n",
|
|
||||||
" actorLR=ACTOR_LR,\n",
|
|
||||||
" gamma=GAMMA,\n",
|
|
||||||
" epsilon=EPSILON,\n",
|
|
||||||
" entropyWeight=ENTROPY_WHEIGHT,\n",
|
|
||||||
" saveDir=SAVE_DIR,\n",
|
|
||||||
" loadModelDir=LOAD_DIR,\n",
|
|
||||||
" )\n",
|
|
||||||
" step = 0\n",
|
|
||||||
" done = False\n",
|
|
||||||
" stopTrainCounter -= 1\n",
|
|
||||||
" epHis.append(ep)\n",
|
|
||||||
"\n",
|
|
||||||
" # reset total reward\n",
|
|
||||||
" epTotalReward = 0\n",
|
|
||||||
"\n",
|
|
||||||
" # Recorder list\n",
|
|
||||||
" epStepHis = []\n",
|
|
||||||
" epRewardHis = []\n",
|
|
||||||
" epActorLossHis = []\n",
|
|
||||||
" epCriticLossHis = []\n",
|
|
||||||
"\n",
|
|
||||||
" # save weight immediately?\n",
|
|
||||||
" saveNow = 0\n",
|
|
||||||
"\n",
|
|
||||||
" while not done:\n",
|
|
||||||
" step += 1\n",
|
|
||||||
" if (\n",
|
|
||||||
" step % ACTION_INTERVAL == 0\n",
|
|
||||||
" ): # take action every ACTION_INTERVAL steps\n",
|
|
||||||
" epStepHis.append(step)\n",
|
|
||||||
" (\n",
|
|
||||||
" disAct1,\n",
|
|
||||||
" disAct2,\n",
|
|
||||||
" disAct3,\n",
|
|
||||||
" conAct,\n",
|
|
||||||
" predictResult,\n",
|
|
||||||
" ) = agent.chooseAction(s)\n",
|
|
||||||
" kW, kS, kA, kD, mouseShoot, mouseMove = actToKey(\n",
|
|
||||||
" disAct1, disAct2, disAct3, conAct\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
" nextState, thisReward, done, _, saveNow = env.step(\n",
|
|
||||||
" discreteActions=np.array([[kW, kS, kA, kD, mouseShoot]]),\n",
|
|
||||||
" continuousActions=np.array([[mouseMove]]),\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
" epTotalReward += thisReward\n",
|
|
||||||
" epBuffer.saveBuffers(\n",
|
|
||||||
" s, [disAct1, disAct2, disAct3, conAct], thisReward\n",
|
|
||||||
" )\n",
|
|
||||||
" else:\n",
|
|
||||||
" disActs = np.array([[0, 0, 0, 0, 0]])\n",
|
|
||||||
" conActs = np.array([[0]])\n",
|
|
||||||
"\n",
|
|
||||||
" nextState, thisReward, done, _, saveNow = env.step(\n",
|
|
||||||
" discreteActions=disActs, continuousActions=conActs\n",
|
|
||||||
" )\n",
|
|
||||||
" epTotalReward += thisReward\n",
|
|
||||||
" nextState = nextState.reshape([STATE_SIZE])\n",
|
|
||||||
" s = nextState\n",
|
|
||||||
"\n",
|
|
||||||
" if done:\n",
|
|
||||||
" print(\"EP OVER!\")\n",
|
|
||||||
" if saveNow != 0:\n",
|
|
||||||
" print(\"SAVENOW!\")\n",
|
|
||||||
" saveNow = 0\n",
|
|
||||||
" agent.saveWeights()\n",
|
|
||||||
" # update PPO after Batch step or GameOver\n",
|
|
||||||
" if (step + 1) % BATCH == 0 or done:\n",
|
|
||||||
" bs = epBuffer.getStates()\n",
|
|
||||||
" ba = epBuffer.getActions()\n",
|
|
||||||
" br = epBuffer.getRewards()\n",
|
|
||||||
" epBuffer.clearBuffer()\n",
|
|
||||||
" if TRAIN:\n",
|
|
||||||
" epActorLoss, epCriticLoss = agent.trainCritcActor(\n",
|
|
||||||
" bs, ba, br, s, CRITIC_EPOCH, ACTOR_EPOCH\n",
|
|
||||||
" )\n",
|
|
||||||
" epActorLossHis.append(epActorLoss)\n",
|
|
||||||
" epCriticLossHis.append(epCriticLoss)\n",
|
|
||||||
" # update History Recorder\n",
|
|
||||||
" totalActorLossHis.append(np.mean(epActorLossHis))\n",
|
|
||||||
" totalCriticLossHis.append(np.mean(epCriticLossHis))\n",
|
|
||||||
" totalRewardHis.append(epTotalReward)\n",
|
|
||||||
"\n",
|
|
||||||
" if epTotalReward > maxTotalReward and epTotalReward != 0:\n",
|
|
||||||
" maxTotalReward = epTotalReward\n",
|
|
||||||
" agent.saveWeights(epTotalReward)\n",
|
|
||||||
" print(\"New Record! Save NN\", epTotalReward)\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"aaa = 0\n",
|
|
||||||
"aaa = 1\n",
|
|
||||||
"aaa = 2\n",
|
|
||||||
"aaa = 3\n",
|
|
||||||
"aaa = 4\n",
|
|
||||||
"aaa = 5\n",
|
|
||||||
"aaa = 6\n",
|
|
||||||
"aaa = 7\n",
|
|
||||||
"aaa = 8\n",
|
|
||||||
"aaa = 9\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"aaa = 0\n",
|
|
||||||
"while aaa<10:\n",
|
|
||||||
" print(\"aaa = \",aaa)\n",
|
|
||||||
" aaa+=1"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"interpreter": {
|
|
||||||
"hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
|
|
||||||
},
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.9.7 64-bit",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.9.7"
|
|
||||||
},
|
|
||||||
"orig_nbformat": 4
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
@ -1,25 +0,0 @@
|
|||||||
import aimBotEnv
|
|
||||||
import PPO
|
|
||||||
|
|
||||||
ENV_PATH = './Build/Aimbot-PPO'
|
|
||||||
WORKER_ID = 100
|
|
||||||
|
|
||||||
MAX_EP = 1000
|
|
||||||
EP_LENGTH = 400
|
|
||||||
GAMMA = 0.99 # discount future reward (UP?)
|
|
||||||
EPSILON = 0.2 # clip Ratio range[1-EPSILON,1+EPSILON]
|
|
||||||
ACTOR_LR = 1e-5 # LR
|
|
||||||
CRITIC_LR = 2e-5 # LR
|
|
||||||
BATCH = 32 # learning step
|
|
||||||
ACTOR_EPOCH = 10 # epoch
|
|
||||||
CRITIC_EPOCH = 10 # epoch
|
|
||||||
ENTROPY_WHEIGHT = 0.01 # sigma's entropy in Actor loss
|
|
||||||
ACTION_INTERVAL = 1 # take action every ACTION_INTERVAL steps
|
|
||||||
TRAIN = True
|
|
||||||
|
|
||||||
env = aimBotEnv.makeEnv(envPath = ENV_PATH,workerID = WORKER_ID)
|
|
||||||
STATE_SIZE = env.STATE_SIZE
|
|
||||||
CONTINUOUS_SIZE = env.CONTINUOUS_SIZE
|
|
||||||
DISCRETE_SIZE = env.DISCRETE_SIZE
|
|
||||||
|
|
||||||
CTN_ACTION_RANGE = 2
|
|
@ -29,7 +29,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -73,7 +73,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 16,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -118,7 +118,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -182,7 +182,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -205,7 +205,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -240,16 +240,16 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 20,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"True"
|
"array([[0.]])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 20,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -259,28 +259,119 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"a = np.array([10, 20, 30, 0])\n",
|
"a = np.array([10, 20, 30, 0])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"np.any(a == 0)\n"
|
"np.asarray([[0.]])\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"4"
|
"1.5"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 1,
|
"execution_count": 11,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"asd = \"adsf\"\n",
|
"import numpy as np\n",
|
||||||
"len(asd)"
|
"\n",
|
||||||
|
"asd = [1,2,3,np.array([0.5]),np.array([0.5])]\n",
|
||||||
|
"\n",
|
||||||
|
"asd[3:]\n",
|
||||||
|
"len(asd)\n",
|
||||||
|
"\n",
|
||||||
|
"np.mean([1,2])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0.0\n",
|
||||||
|
"0.0\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import time\n",
|
||||||
|
"import pyautogui as pag\n",
|
||||||
|
"\n",
|
||||||
|
"from pynput.mouse import Button, Controller\n",
|
||||||
|
"\n",
|
||||||
|
"w = pag.size().width\n",
|
||||||
|
"h = pag.size().height\n",
|
||||||
|
"mouse = Controller()\n",
|
||||||
|
"\n",
|
||||||
|
"nowt = time.time()\n",
|
||||||
|
"\n",
|
||||||
|
"middletime = time.time() - nowt\n",
|
||||||
|
"print(middletime)\n",
|
||||||
|
"# print(nowPos-(w/2))\n",
|
||||||
|
"\n",
|
||||||
|
"print(time.time() - middletime - nowt)\n",
|
||||||
|
"while True:\n",
|
||||||
|
" x,_ = mouse.position\n",
|
||||||
|
" #print(mouse.press)\n",
|
||||||
|
" #print(mouse.position)\n",
|
||||||
|
" \n",
|
||||||
|
" mouse.position = (w / 2, h / 2)\n",
|
||||||
|
" time.sleep(1/60)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import time\n",
|
||||||
|
"import pyautogui as pag\n",
|
||||||
|
"\n",
|
||||||
|
"import mouse\n",
|
||||||
|
"\n",
|
||||||
|
"w = pag.size().width\n",
|
||||||
|
"h = pag.size().height\n",
|
||||||
|
"\n",
|
||||||
|
"nowt = time.time()\n",
|
||||||
|
"\n",
|
||||||
|
"middletime = time.time() - nowt\n",
|
||||||
|
"print(middletime)\n",
|
||||||
|
"# print(nowPos-(w/2))\n",
|
||||||
|
"\n",
|
||||||
|
"print(time.time() - middletime - nowt)\n",
|
||||||
|
"while True:\n",
|
||||||
|
" x = mouse.get_position()\n",
|
||||||
|
" print(x)\n",
|
||||||
|
" #print(mouse.position)\n",
|
||||||
|
" \n",
|
||||||
|
" mouse.move(w / 2, h / 2)\n",
|
||||||
|
" time.sleep(1/60)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import keyboard\n",
|
||||||
|
"\n",
|
||||||
|
"while True:\n",
|
||||||
|
" if keyboard.is_pressed(\"w\"):\n",
|
||||||
|
" print(\"w\")\n",
|
||||||
|
" elif keyboard.is_pressed(\"s\"):\n",
|
||||||
|
" print(\"s\")"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
Loading…
Reference in New Issue
Block a user