Update PPO class,add python human control

Python:
Update PPO class
add python human control 
Unity: 
add FP/TP choose button
This commit is contained in:
Koha9 2022-10-11 06:40:15 +09:00
parent de066f3a65
commit ae8a1ba8e2
26 changed files with 3639 additions and 990 deletions

View File

@ -0,0 +1,8 @@
fileFormatVersion: 2
guid: d65d9ca7ae1253341b6790f3a23e3a11
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@ -0,0 +1,10 @@
fileFormatVersion: 2
guid: 39a127fc79ed92d4e88aec711f545d5f
ScriptedImporter:
internalIDToNameTable: []
externalObjects: {}
serializedVersion: 2
userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}

View File

@ -1 +1 @@
{"count":1,"self":33.6679968,"total":34.5046305,"children":{"InitializeActuators":{"count":2,"self":0.0010002,"total":0.0010002,"children":null},"InitializeSensors":{"count":2,"self":0.0010004,"total":0.0010004,"children":null},"AgentSendState":{"count":1489,"self":0.011503399999999999,"total":0.2010688,"children":{"CollectObservations":{"count":1489,"self":0.1780647,"total":0.1780647,"children":null},"WriteActionMask":{"count":1488,"self":0.0019993999999999997,"total":0.0019993999999999997,"children":null},"RequestDecision":{"count":1488,"self":0.009501299999999999,"total":0.009501299999999999,"children":null}}},"DecideAction":{"count":1488,"self":0.0117408,"total":0.0117408,"children":null},"AgentAct":{"count":1488,"self":0.6208231,"total":0.6208231,"children":null}},"gauges":{},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1663089804","unity_version":"2020.3.19f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2020.3.19f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-MultiScene -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-UCUNI -hubSessionId 4cf980b0-326c-11ed-87c2-a7333acffe7c -accessToken j61gZPw8-vc4ZH7TJMvrSAAPQLV9SK6U72z_dek2xhw00ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.0","scene_name":"InGame","end_time_seconds":"1663089838"}}
{"count":1,"self":42.3855296,"total":42.4020608,"children":{"InitializeActuators":{"count":2,"self":0.0015155,"total":0.0015155,"children":null},"InitializeSensors":{"count":2,"self":0.0015017,"total":0.0015017,"children":null},"AgentSendState":{"count":1898,"self":0.0025031999999999997,"total":0.0025031999999999997,"children":null},"DecideAction":{"count":1898,"self":0.0070091999999999993,"total":0.0070091999999999993,"children":null},"AgentAct":{"count":1898,"self":0.0030023,"total":0.0030023,"children":null}},"gauges":{},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1665414279","unity_version":"2020.3.19f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2020.3.19f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-MultiScene -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-UCUNI -hubSessionId 39022900-48a5-11ed-b848-09be5949a456 -accessToken _47qt9I_MF3bhL7JS735Xdmfj8A4dGBOdRNKR0X2L_w00ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.0","scene_name":"InGame","end_time_seconds":"1665414322"}}

View File

@ -1 +1 @@
{"count":1,"self":114.25904639999999,"total":114.62062499999999,"children":{"InitializeActuators":{"count":2,"self":0.0010000999999999999,"total":0.0010000999999999999,"children":null},"InitializeSensors":{"count":2,"self":0.0010002,"total":0.0010002,"children":null},"AgentSendState":{"count":1382,"self":0.0080028,"total":0.0195053,"children":{"CollectObservations":{"count":1382,"self":0.0070022999999999995,"total":0.0070022999999999995,"children":null},"WriteActionMask":{"count":1382,"self":0.0004994,"total":0.0004994,"children":null},"RequestDecision":{"count":1382,"self":0.0040008,"total":0.0040008,"children":null}}},"DecideAction":{"count":1382,"self":0.0110034,"total":0.0110034,"children":null},"AgentAct":{"count":1382,"self":0.3290731,"total":0.3290731,"children":null}},"gauges":{},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1662500099","unity_version":"2020.3.19f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2020.3.19f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-MultiScene -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-UCUNI -hubSessionId 209fdf30-2c1f-11ed-916f-33e85f4223cc -accessToken 78EBbrn-dg5kE__h3rNOqQVTDU3b1xUmmwWF1c5sFLc00ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.0","scene_name":"Start","end_time_seconds":"1662500214"}}
{"count":1,"self":100.7007424,"total":102.0526476,"children":{"InitializeActuators":{"count":2,"self":0.0015004999999999999,"total":0.0015004999999999999,"children":null},"InitializeSensors":{"count":2,"self":0.0010015,"total":0.0010015,"children":null},"AgentSendState":{"count":2851,"self":0.0227973,"total":0.3594312,"children":{"CollectObservations":{"count":2851,"self":0.3230326,"total":0.3230326,"children":null},"WriteActionMask":{"count":2850,"self":0.0040877,"total":0.0040877,"children":null},"RequestDecision":{"count":2850,"self":0.0095135999999999988,"total":0.0095135999999999988,"children":null}}},"DecideAction":{"count":2850,"self":0.0184923,"total":0.0184923,"children":null},"AgentAct":{"count":2850,"self":0.971482,"total":0.971482,"children":null}},"gauges":{"AKMAgent.CumulativeReward":{"count":1,"max":0,"min":0,"runningAverage":0,"value":0,"weightedAverage":0}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1665340408","unity_version":"2020.3.19f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2020.3.19f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-MultiScene -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-8AgJBC01I23iOtjIDvezn -hubSessionId a2bff0f0-47ee-11ed-98ba-e72fca9de6f1 -accessToken VHkJOvWIH11sBEzC18rl6YA9y6y2sRMQj2zrOyZdNeE00ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.0","scene_name":"Start","end_time_seconds":"1665340510"}}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -21,6 +21,8 @@ public class AgentWithGun : Agent
public Camera thisCam;
public CharacterController PlayerController;
public GameObject enemyPrefab;
public GameObject cameraChangerOBJ;
[Header("Rewards")]
[Tooltip("Nothing happened reward")]
@ -76,12 +78,15 @@ public class AgentWithGun : Agent
private string LoadDirTime;
private float LoadDirDateF;
private float loadDirTimeF;
public bool defaultTPCamera = true;
private StartSeneData DataTransfer;
private UIController UICon;
private HistoryRecorder HistoryRec;
private RaySensors rayScript;
private CameraChange camChanger;
[System.NonSerialized]public float nonReward;
[System.NonSerialized] public float nonReward;
[System.NonSerialized] public float shootReward;
[System.NonSerialized] public float shootWithoutReadyReward;
[System.NonSerialized] public float hitReward;
@ -118,6 +123,8 @@ public class AgentWithGun : Agent
killRewardDefault = DataTransfer.killReward;
winRewardDefault = DataTransfer.winReward;
loseRewardDefault = DataTransfer.loseReward;
lockMouse = DataTransfer.lockMouse;
defaultTPCamera = DataTransfer.defaultTPCamera;
// change Decision Period & Take Actions Between Decisions
transform.GetComponent<DecisionRequester>().DecisionPeriod = DataTransfer.DecisionPeriod;
@ -156,6 +163,7 @@ public class AgentWithGun : Agent
UICon = transform.GetComponent<UIController>();
HistoryRec = transform.GetComponent<HistoryRecorder>();
rayScript = GetComponent<RaySensors>();
camChanger = cameraChangerOBJ.GetComponent<CameraChange>();
// give default Reward to Reward value will be used.
nonReward = nonRewardDefault;
@ -167,6 +175,15 @@ public class AgentWithGun : Agent
killReward = killRewardDefault;
//initialize remainTime
remainTime = (int)(timeLimit - Time.time + startTime);
// change default camera view
if (defaultTPCamera)
{
camChanger.ShowTPSView();
}
else
{
camChanger.ShowFPSView();
}
}
}
@ -203,27 +220,9 @@ public class AgentWithGun : Agent
// ------------动作处理--------------
// moveAgent 用于模拟Input.GetAxis移动
public void moveAgent(int kW, int kS,int kA,int kD)
public void moveAgent(int vertical, int horizontal)
{
Vector3 thisMovement;
int horizontal = 0;
int vertical = 0;
if (kW==1 && kS != 1)
{
vertical = 1;
}
else if (kS==1 && kW!=1)
{
vertical = -1;
}
if (kD==1 && kA!=1)
{
horizontal = 1;
}
else if (kA ==1 && kD!=1)
{
horizontal = -1;
}
if (horizontal != 0)//当按下按键(水平方向)
{
@ -295,7 +294,7 @@ public class AgentWithGun : Agent
// ------------动作处理--------------
// cameraControl 用于控制Agent视角转动
public void cameraControl(float Mouse_X,float Mouse_Y)
public void cameraControl(float Mouse_X, float Mouse_Y)
{
//Mouse_X = Input.GetAxis("Mouse X") * MouseSensitivity * Time.deltaTime;
//Debug.Log(Input.GetAxis("Mouse X"));
@ -359,7 +358,7 @@ public class AgentWithGun : Agent
RaycastHit hit;
Debug.DrawRay(ray.origin, ray.direction * 100, Color.blue);
bool isGunReady = gunReady();
UICon.updateShootKeyViewer(shoot,isGunReady);
UICon.updateShootKeyViewer(shoot, isGunReady);
//按下鼠标左键
if (shoot != 0 && isGunReady == true)
{
@ -420,12 +419,12 @@ public class AgentWithGun : Agent
{
GameObject[] EnemyGameObjs;
EnemyGameObjs = GameObject.FindGameObjectsWithTag("Enemy");
if(EnemyGameObjs.Length <= 1)
if (EnemyGameObjs.Length <= 1)
{
//成功击杀所有Enemy
return 1;
}
else if(Time.time - startTime >= timeLimit)
else if (Time.time - startTime >= timeLimit)
{
//超时失败
return 2;
@ -477,9 +476,9 @@ public class AgentWithGun : Agent
{
float epreward = 0f;
// 击杀reward判断
if(enemyKillCount > 0)
if (enemyKillCount > 0)
{
for(int i = 0;i < enemyKillCount; i++)
for (int i = 0; i < enemyKillCount; i++)
{
epreward += killReward;
}
@ -506,7 +505,7 @@ public class AgentWithGun : Agent
}
if (lockMouse)
{
Cursor.lockState = CursorLockMode.Locked; // 隐藏并且锁定鼠标
Cursor.lockState = CursorLockMode.Locked; // hide and lock the mouse
}
//iniCharts();
thisAgentObj.name = thisAgentObj.GetInstanceID().ToString();
@ -549,35 +548,26 @@ public class AgentWithGun : Agent
public override void OnActionReceived(ActionBuffers actionBuffers)
{
//获取输入
int kW = actionBuffers.DiscreteActions[0];
int kS = actionBuffers.DiscreteActions[1];
int kA = actionBuffers.DiscreteActions[2];
int kD = actionBuffers.DiscreteActions[3];
int mouseShoot = actionBuffers.DiscreteActions[4];
int vertical = actionBuffers.DiscreteActions[0];
int horizontal = actionBuffers.DiscreteActions[1];
int mouseShoot = actionBuffers.DiscreteActions[2];
float Mouse_X = actionBuffers.ContinuousActions[0];
//float Mouse_Y = actionBuffers.ContinuousActions[1];
//int timeLimitControl = (int)actionBuffers.ContinuousActions[2];
//float nonRewardIn = actionBuffers.ContinuousActions[1];
//float shootRewardIn = actionBuffers.ContinuousActions[2];
//float shootWithoutReadyRewardIn = actionBuffers.ContinuousActions[3];
//float hitRewardIn = actionBuffers.ContinuousActions[4];
//float winRewardIn = actionBuffers.ContinuousActions[5];
// loseRewardIn = actionBuffers.ContinuousActions[6];
//float killRewardIn = actionBuffers.ContinuousActions[7];
//Rewards Update
if (vertical == 2) vertical = -1;
if (horizontal == 2) horizontal = -1;
remainTime = (int)(timeLimit - Time.time + startTime);
//应用输入
shoot = mouseShoot;
HistoryRec.realTimeKeyCounter(kW, kS, kA, kD, shoot);
HistoryRec.realTimeKeyCounter(vertical, horizontal, shoot);
(int kWCount, int kSCount, int kACount, int kDCount, int shootCount) = HistoryRec.getKeyCount();
UICon.updateRemainTime(remainTime);
UICon.updateWASDKeyViewer(kW, kS, kA, kD);
UICon.updateRemainEnemy(enemyNum);
UICon.updateWASDKeyViewer(vertical, horizontal);
UICon.updateKeyCounterChart(kWCount, kSCount, kACount, kDCount, shootCount);
UICon.updateMouseMovementViewer(Mouse_X);
UICon.updateRewardViewer(nonReward, shootReward, shootWithoutReadyReward, hitReward, winReward, loseReward, killReward);
cameraControl(Mouse_X, 0);
moveAgent(kW, kS, kA, kD);
moveAgent(vertical, horizontal);
float thisRoundReward = rewardCalculate();
//判断结束
@ -595,7 +585,7 @@ public class AgentWithGun : Agent
Debug.Log("reward = " + winReward);
EndEpisode();
}
else if(finished == 2)
else if (finished == 2)
{
//Lose Finished
HistoryRec.addRealTimeReward(loseReward);
@ -628,37 +618,45 @@ public class AgentWithGun : Agent
ActionSegment<float> continuousActions = actionsOut.ContinuousActions;
ActionSegment<int> discreteActions = actionsOut.DiscreteActions;
int kW = 0;
int kS = 0;
int kA = 0;
int kD = 0;
if (Input.GetKey(KeyCode.W))
int vertical = 0;
int horizontal = 0;
if (Input.GetKey(KeyCode.W) && !Input.GetKey(KeyCode.S))
{
kW = 1;
vertical = 1;
}
if (Input.GetKey(KeyCode.S))
else if (Input.GetKey(KeyCode.S) && !Input.GetKey(KeyCode.W))
{
kS = 1;
vertical = -1;
}
if (Input.GetKey(KeyCode.A))
else
{
kA = 1;
vertical = 0;
}
if (Input.GetKey(KeyCode.D))
if (Input.GetKey(KeyCode.D) && !Input.GetKey(KeyCode.A))
{
kD = 1;
horizontal = 1;
}
else if (Input.GetKey(KeyCode.A) && !Input.GetKey(KeyCode.D))
{
horizontal = -1;
}
else
{
horizontal = 0;
}
discreteActions[0] = kW;
discreteActions[1] = kS;
discreteActions[2] = kA;
discreteActions[3] = kD;
if (Input.GetMouseButton(0))
{
// Debug.Log("mousebuttonhit");
shoot = 1;
}
discreteActions[4] = shoot;
else
{
shoot = 0;
}
discreteActions[0] = vertical;
discreteActions[1] = horizontal;
discreteActions[2] = shoot;
//^^^^^^^^^^^^^^^^^^^^^discrete-Control^^^^^^^^^^^^^^^^^^^^^^
//vvvvvvvvvvvvvvvvvvvvvvvvvvvvvcontinuous-Controlvvvvvvvvvvvvvvvvvvvvvv

View File

@ -31,24 +31,24 @@ public class HistoryRecorder : MonoBehaviour
{
EPTotalShootCount.Add(TotalShootCount);
}
public void realTimeKeyCounter(int kW, int kS, int kA, int kD, int shoot)
public void realTimeKeyCounter(int vertical, int horizontal, int shoot)
{
if (kW == 1)
if (vertical == 1)
{
realTimeWKeyCount += 1;
}
if (kS == 1)
else if (vertical == -1)
{
realTimeSKeyCount += 1;
}
if (kA == 1)
{
realTimeAKeyCount += 1;
}
if (kD == 1)
if (horizontal == 1)
{
realTimeDKeyCount += 1;
}
else if (horizontal == -1)
{
realTimeAKeyCount += 1;
}
if (shoot == 1)
{
realTimeShootCount += 1;

View File

@ -69,39 +69,37 @@ public class UIController : MonoBehaviour
}
//------------Key Viewer----------
public void updateWASDKeyViewer(int kW,int kS,int kA,int kD)
public void updateWASDKeyViewer(int vertical,int horizontal)
{
if (kW == 1)
if (vertical == 1)
{
upText.color = Color.red;
downText.color = Color.black;
}
else
{
upText.color = Color.black;
}
if (kS == 1)
else if (vertical == -1)
{
downText.color = Color.red;
upText.color = Color.black;
}
else
{
downText.color = Color.black;
upText.color = Color.black;
}
if(kA == 1)
{
leftText.color = Color.red;
}
else
{
leftText.color = Color.black;
}
if( kD == 1)
if (horizontal == 1)
{
rightText.color = Color.red;
leftText.color = Color.black;
}
else if (horizontal == -1)
{
leftText.color = Color.red;
rightText.color = Color.black;
}
else
{
rightText.color = Color.black;
downText.color = Color.black;
upText.color = Color.black;
}
}
public void updateShootKeyViewer(int shoot,bool isGunReady)

View File

@ -0,0 +1,27 @@
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
public class gameFlowController : MonoBehaviour
{
public GameObject Agent;
AgentWithGun agentWithGun;
// Start is called before the first frame update
void Start()
{
agentWithGun = Agent.GetComponent<AgentWithGun>();
}
// Update is called once per frame
void Update()
{
if (Input.GetKey(KeyCode.Escape))
{
Application.Quit();
}
if (Input.GetKey(KeyCode.L))
{
agentWithGun.lockMouse = !agentWithGun.lockMouse;
}
}
}

View File

@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: 9a8fb4d12d4b8fc4784f3e142e7fdcf8
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@ -19,6 +19,21 @@ public class EnvArgsChanger : MonoBehaviour
public Text DecisionPeriodDataText;
public Toggle TakeActionsBetweenDecisionsToggle;
[Header("Lock Mouse")]
public Toggle LockMouseToggle;
[Header("Default Camera")]
public Toggle FPToggle;
public Text FPText;
public Toggle TPToggle;
public Text TPText;
private StartSeneData startSeneData;
private void Start()
{
startSeneData = DataTransfer.GetComponent<StartSeneData>();
}
public void onEnemynumValueChanged()
{
@ -30,7 +45,7 @@ public class EnvArgsChanger : MonoBehaviour
else
{
EnemyNumText.color = Color.yellow;
DataTransfer.GetComponent<StartSeneData>().EnemyNum = Math.Abs(int.Parse(EnemyNumInput.GetComponent<InputField>().text));
startSeneData.EnemyNum = Math.Abs(int.Parse(EnemyNumInput.GetComponent<InputField>().text));
}
}
@ -44,19 +59,48 @@ public class EnvArgsChanger : MonoBehaviour
else
{
TimeLimText.color = Color.yellow;
DataTransfer.GetComponent<StartSeneData>().Timelim = Math.Abs(int.Parse(TimelimInput.GetComponent<InputField>().text));
startSeneData.Timelim = Math.Abs(int.Parse(TimelimInput.GetComponent<InputField>().text));
}
}
public void onDPSlideValueChanged()
{
// DecisionPeriod(DP) value Control
DataTransfer.GetComponent<StartSeneData>().DecisionPeriod = (int)(DecisionPeriodSlide.GetComponent<Slider>().value);
DecisionPeriodDataText.text = DataTransfer.GetComponent<StartSeneData>().DecisionPeriod.ToString();
startSeneData.DecisionPeriod = (int)(DecisionPeriodSlide.GetComponent<Slider>().value);
DecisionPeriodDataText.text = startSeneData.DecisionPeriod.ToString();
}
public void onABDToggleChanged()
{
// Actions Between Decisions(ABD) Toggle Control
DataTransfer.GetComponent<StartSeneData>().ActionsBetweenDecisions = TakeActionsBetweenDecisionsToggle.isOn;
startSeneData.ActionsBetweenDecisions = TakeActionsBetweenDecisionsToggle.isOn;
}
public void onLockMouseToggleChanged()
{
// lock mouse or not
startSeneData.lockMouse = LockMouseToggle.isOn;
}
public void onTPCamToggleChanged()
{
startSeneData.defaultTPCamera = true;
FPToggle.interactable = true;
FPToggle.SetIsOnWithoutNotify(false);
FPText.color = Color.gray;
TPToggle.SetIsOnWithoutNotify(true);
TPToggle.interactable = false;
TPText.color = Color.green;
}
public void onFPCameToggleChanged()
{
startSeneData.defaultTPCamera = false;
TPToggle.interactable = true;
TPToggle.SetIsOnWithoutNotify(false);
TPText.color = Color.gray;
FPToggle.SetIsOnWithoutNotify(true);
FPToggle.interactable = false;
FPText.color = Color.green;
}
}

View File

@ -13,6 +13,8 @@ public class StartSeneData : MonoBehaviour
public float killRewardDefault = 10.0f;
public float winRewardDefault = 20.0f;
public float loseRewardDefault = -10.0f;
public bool lockMouse = false;
public bool defaultTPCamera = true;
// LoadDir
[System.NonSerialized]public string LoadDirDate = "0";

View File

@ -0,0 +1,90 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"√√√√√Enviroment Initialized Success√√√√√\n"
]
}
],
"source": [
"import time\n",
"import aimBotEnv\n",
"from HumanAction import HumanActions\n",
"\n",
"# Env\n",
"ENV_PATH = \"./Build-CloseEnemyCut/Aimbot-PPO\"\n",
"WORKER_ID = 1\n",
"BASE_PORT = 200\n",
"\n",
"MOUSEDISCOUNT = 8.0\n",
"MAX_EP = 10000000\n",
"\n",
"env = aimBotEnv.makeEnv(envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT)\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"ename": "UnityCommunicatorStoppedException",
"evalue": "Communicator has exited.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mUnityCommunicatorStoppedException\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_37248/645561173.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mactions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdemoAct\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetHumanActions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32mc:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\aimBotEnv.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, actions, behaviorName, trackedAgent)\u001b[0m\n\u001b[0;32m 72\u001b[0m \u001b[1;31m# take action to env\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 73\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_actions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehavior_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mthisActionTuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 74\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 75\u001b[0m \u001b[1;31m# get nextState & reward & done after this action\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 76\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloadDir\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msaveNow\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetSteps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrackedAgent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\mlagents_envs\\timers.py\u001b[0m in \u001b[0;36mwrapped\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 303\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapped\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 304\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mhierarchical_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__qualname__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 305\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 306\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 307\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mwrapped\u001b[0m \u001b[1;31m# type: ignore\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\mlagents_envs\\environment.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 333\u001b[0m \u001b[0moutputs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_communicator\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexchange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstep_input\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_poll_process\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 334\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0moutputs\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 335\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mUnityCommunicatorStoppedException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Communicator has exited.\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 336\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_update_behavior_specs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 337\u001b[0m \u001b[0mrl_output\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0moutputs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrl_output\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mUnityCommunicatorStoppedException\u001b[0m: Communicator has exited."
]
}
],
"source": [
"done = False\n",
"env.reset()\n",
"demoAct = HumanActions(mouseDiscount=MOUSEDISCOUNT)\n",
"for ep in range(MAX_EP):\n",
" while not done:\n",
" actions = demoAct.getHumanActions()\n",
" env.step(actions=actions)6\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.7 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,51 @@
import keyboard
import mouse
class HumanActions:
def __init__(self, mouseDiscount: float = 10, screenW: int = 1920, screenH: int = 1080):
def multiPressed():
pass
keyboard.add_hotkey("w+a", multiPressed)
keyboard.add_hotkey("w+d", multiPressed)
keyboard.add_hotkey("s+a", multiPressed)
keyboard.add_hotkey("s+d", multiPressed)
self.screenW = screenW
self.screenH = screenH
self.MOUSEDISCOUNT = mouseDiscount
def getHumanActions(self):
x, _ = mouse.get_position()
xMovement = (x - self.screenW / 2) / self.MOUSEDISCOUNT
ws = 0
ad = 0
click = 0
if keyboard.is_pressed("w"):
ws = 1
elif keyboard.is_pressed("s"):
ws = 2
if keyboard.is_pressed("d"):
ad = 1
elif keyboard.is_pressed("a"):
ad = 2
if keyboard.is_pressed("w+d"):
ws = 1
ad = 1
elif keyboard.is_pressed("w+a"):
ws = 1
ad = 2
elif keyboard.is_pressed("s+d"):
ws = 2
ad = 1
elif keyboard.is_pressed("s+a"):
ws = 2
ad = 2
if mouse.is_pressed(button="left"):
click = 1
actions = [ws, ad, click, [xMovement]]
mouse.move(self.screenW / 2, self.screenH / 2)
return actions

File diff suppressed because it is too large Load Diff

View File

@ -1,108 +1,213 @@
import tensorflow as tf
from tensorflow.python.ops.numpy_ops import ndarray
import tensorflow_probability as tfp
import numpy as np
import time
import math
import copy
import datetime
import os
from PPOConfig import PPOConfig
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from keras_radam import RAdam
EPS = 1e-10
class PPO(object):
"""Create PPO Agent
"""
def __init__(
self,
stateSize: int,
disActShape: list,
conActSize: int,
conActRange: float,
PPOConfig: PPOConfig,
):
"""initialize PPO
def __init__(self, stateSize, disActShape, conActSize, conActRange, criticLR, actorLR, gamma, epsilon, entropyWeight, saveDir, loadModelDir):
# check disActShape is correct(greater than 1)
try:
if np.any(np.array(disActShape)<=1):
raise ValueError("disActShape error,disActShape should greater than 1 but get",disActShape)
except ValueError as e:
raise
Args:
stateSize (int): enviroment state size
disActShape (numpy): discrete Action shape.
just like [3,2],means 2 type of dis actions,each act include 3 and 2 types
if no discrete action output then use [0].
conActSize (int): continuous Action Size. if no continuous action output then use 0.
conActRange (float): continuous action range. -conActRange to +conActRange
PPOConfig (PPOConfig): PPO configuration
"""
# check use dis action or not.
if disActShape == [0]:
# non dis action output
self.disActSize = 0
self.disOutputSize = 0
else:
# make sure disActShape greater than 1
try:
if np.any(np.array(disActShape) <= 1):
raise ValueError(
"disActShape error,disActShape should greater than 1 but get", disActShape
)
except ValueError:
raise
self.disActSize = len(disActShape)
self.disOutputSize = sum(disActShape)
self.stateSize = stateSize
# self.actionSize = actionSize
self.disActShape = disActShape # shape of discrete action output. like [3,3,2]
self.disActSize = len(disActShape)
self.disActShape = disActShape
self.conActSize = conActSize
self.conActRange = conActRange
self.criticLR = criticLR
self.actorLR = actorLR
self.GAMMA = gamma
self.EPSILON = epsilon
self.saveDir = saveDir
self.entropyWeight = entropyWeight
self.muSigSize = 2
self.conOutputSize = conActSize * self.muSigSize
self.disOutputSize = sum(disActShape)
self.conOutputSize = conActSize * 2
# config
self.NNShape = PPOConfig.NNShape
self.criticLR = PPOConfig.criticLR
self.actorLR = PPOConfig.actorLR
self.gamma = PPOConfig.gamma
self.lmbda = PPOConfig.lmbda
self.clipRange = PPOConfig.clipRange
self.entropyWeight = PPOConfig.entropyWeight
self.trainEpochs = PPOConfig.trainEpochs
self.saveDir = PPOConfig.saveDir
self.loadModelDir = PPOConfig.loadModelDir
print("---------thisPPO Params---------")
print("self.stateSize = ", self.stateSize)
print("self.disActShape = ", self.disActShape)
print("self.disActSize", self.disActSize)
print("self.disOutputSize", self.disOutputSize)
print("self.conActSize = ", self.conActSize)
print("self.conActRange = ", self.conActRange)
print("self.conOutputSize = ", self.conOutputSize)
if loadModelDir == None:
# config
print("---------thisPPO config---------")
print("self.NNShape = ", self.NNShape)
print("self.criticLR = ", self.criticLR)
print("self.actorLR = ", self.actorLR)
print("self.gamma = ", self.gamma)
print("self.lmbda = ", self.lmbda)
print("self.clipRange = ", self.clipRange)
print("self.entropyWeight = ", self.entropyWeight)
print("self.trainEpochs = ", self.trainEpochs)
print("self.saveDir = ", self.saveDir)
print("self.loadModelDir = ", self.loadModelDir)
# load NN or not
if self.loadModelDir is None:
# critc NN
self.critic = self.buildCriticNet(self.stateSize, 1, compileModel = True)
self.critic = self.buildCriticNet(self.stateSize, 1, compileModel=True)
# actor NN
self.actor = self.buildActorNet(self.stateSize, self.conActRange, compileModel = True)
self.actor = self.buildActorNet(self.stateSize, compileModel=True)
print("---------Actor Model Create Success---------")
self.actor.summary()
print("---------Critic Model Create Success---------")
self.critic.summary()
else:
# critc NN
self.critic = self.buildCriticNet(self.stateSize, 1, compileModel=True)
# actor NN
self.actor = self.buildActorNet(self.stateSize, self.conActRange, compileModel=True)
self.actor = self.buildActorNet(self.stateSize, compileModel=True)
# load weight to Critic&Actor NN
self.loadWeightToModels(loadModelDir)
self.loadWeightToModels(self.loadModelDir)
print("---------Actor Model Load Success---------")
self.actor.summary()
print("---------Critic Model Load Success---------")
self.critic.summary()
# Build Net
def buildActorNet(self, inputSize, continuousActionRange,compileModel):
def buildActorNet(self, inputSize: int, compileModel: bool):
"""build Actor Nueral Net and compile.Output:[disAct1,disAct2,disAct3,mu,sigma]
Args:
inputSize (int): InputLayer Nueral size.
continuousActionRange (foat): continuous Action's max Range.
compileModel (bool): compile Model or not.
Returns:
keras.Model: return Actor NN
"""
stateInput = layers.Input(shape=(inputSize,), name='stateInput')
dense0 = layers.Dense(500, activation='relu',name='dense0',)(stateInput)
dense1 = layers.Dense(200, activation='relu',name='dense1',)(dense0)
dense2 = layers.Dense(100, activation='relu', name='dense2')(dense1)
# -----------Input Layers-----------
stateInput = layers.Input(shape=(inputSize,), name="stateInput")
disAct1 = layers.Dense(3, activation='softmax',name='WSAction')(dense2) # WS
disAct2 = layers.Dense(3, activation='softmax',name='ADAction')(dense2) # AD
disAct3 = layers.Dense(2, activation='softmax',name='ShootAction')(dense2) # Mouse shoot
mu = continuousActionRange * layers.Dense(1, activation='tanh', name='muOut')(dense2) # mu既正态分布mean
sigma = 1e-8 + layers.Dense(1, activation='softplus',name='sigmaOut')(dense2) # sigma既正态分布
# musig = layers.concatenate([mu,sigma],name = 'musig')
totalOut = layers.concatenate(
[disAct1, disAct2, disAct3, mu, sigma], name='totalOut') # package
# -------Intermediate layers--------
interLayers = []
interLayersIndex = 0
for neuralUnit in self.NNShape:
thisLayerName = "dense" + str(interLayersIndex)
if interLayersIndex == 0:
interLayers.append(
layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(stateInput)
)
else:
interLayers.append(
layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(interLayers[-1])
)
interLayersIndex += 1
# ----------Output Layers-----------
outputLayersList = []
if self.disActSize != 0:
# while NN have discrete action output.
disActIndex = 0
for thisDisActDepth in self.disActShape:
thisDisActName = "disAct" + str(disActIndex)
outputLayersList.append(
layers.Dense(thisDisActDepth, activation="softmax", name=thisDisActName)(
interLayers[-1]
)
)
disActIndex += 1
if self.conActSize != 0:
# while NN have continuous action output.
mu = tf.multiply(
layers.Dense(1, activation="tanh", name="muOut")(interLayers[-1]), self.conActRange
) # mu既正态分布位置参数
sigma = tf.add(
layers.Dense(1, activation="softplus", name="sigmaOut")(interLayers[-1]), EPS
) # sigma既正态分布尺度参数
outputLayersList.append(mu)
outputLayersList.append(sigma)
totalOut = layers.concatenate(outputLayersList, name="totalOut") # package
# ----------Model Compile-----------
model = keras.Model(inputs=stateInput, outputs=totalOut)
#actorOPT = optimizers.Adam(learning_rate = self.actorLR)
if compileModel:
actorOPT = RAdam(self.actorLR)
if compileModel: # Compile Model
actorOPT = optimizers.Adam(learning_rate=self.actorLR)
model.compile(optimizer=actorOPT, loss=self.aLoss())
return model
def buildCriticNet(self, inputSize, outputSize,compileModel):
def buildCriticNet(self, inputSize: int, outputSize: int, compileModel: bool):
"""build Critic Nueral Net and compile.Output:[Q]
Args:
inputSize (int): InputLayer Neural Size
outputSize (float): Q size
inputSize (int): input size
outputSize (int): output size
compileModel (bool): compile Model or not.
Returns:
keras.Model: return Critic NN
"""
stateInput = keras.Input(shape=(inputSize,))
dense0 = layers.Dense(500, activation='relu',
name='dense0',)(stateInput)
dense1 = layers.Dense(200, activation='relu')(dense0)
dense2 = layers.Dense(100, activation='relu')(dense1)
output = layers.Dense(outputSize)(dense2)
# -----------Input Layers-----------
stateInput = keras.Input(shape=(inputSize,), name="stateInput")
# -------Intermediate layers--------
interLayers = []
interLayersIndex = 0
for neuralUnit in self.NNShape:
thisLayerName = "dense" + str(interLayersIndex)
if interLayersIndex == 0:
interLayers.append(
layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(stateInput)
)
else:
interLayers.append(
layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(interLayers[-1])
)
interLayersIndex += 1
# ----------Output Layers-----------
output = layers.Dense(outputSize, activation=None)(interLayers[-1])
# ----------Model Compile-----------
model = keras.Model(inputs=stateInput, outputs=output)
if compileModel:
criticOPT = optimizers.Adam(learning_rate=self.criticLR)
@ -110,39 +215,53 @@ class PPO(object):
return model
# loss Function
# critic loss
def cLoss(self):
"""Critic Loss function
"""
"""Critic Loss function"""
def loss(y_true, y_pred):
# y_true: discountedR
# y_pred: critcV = model.predict(states)
advantage = y_true - y_pred # TD error
loss = tf.reduce_mean(tf.square(advantage))
adv = y_true - y_pred # TD error
loss = tf.reduce_mean(tf.square(adv))
return loss
return loss
# actor loss
def aLoss(self):
def getDiscreteALoss(nowProbs,oldProbs,advantage):
"""Actor Loss function"""
def getDiscreteALoss(nowProbs, oldProbs, disOneHotAct, actShape, advantage):
"""get Discrete Action Loss
Args:
nowProbs (tf.constant): (length,actionSize)
oldProbs (tf.constant): (length,actionSize)
nowProbs (tf.constant): (length,actionProbSize)
oldProbs (tf.constant): (length,actionProbSize)
advantage (tf.constant): (length,)
Returns:
tf.constant: (length,)
"""
entropy = tf.reduce_mean(tf.math.multiply(nowProbs,tf.math.log(nowProbs+1e-6)))
ratio = tf.math.divide(nowProbs,oldProbs+1e-6)
value = tf.math.multiply(ratio,tf.expand_dims(advantage,axis = 1))
clipRatio = tf.clip_by_value(ratio,1. - self.EPSILON,1.+self.EPSILON)
clipValue = tf.math.multiply(clipRatio,tf.expand_dims(advantage,axis = 1))
loss = -tf.reduce_mean(tf.math.minimum(value,clipValue)) + self.entropyWeight * entropy
entropy = tf.negative(
tf.reduce_mean(tf.math.multiply(nowProbs, tf.math.log(nowProbs + EPS)))
)
nowSingleProbs = tf.reduce_mean(tf.multiply(nowProbs, disOneHotAct), axis=1)
nowSingleProbs = tf.multiply(nowSingleProbs, actShape)
oldSingleProbs = tf.reduce_mean(tf.multiply(oldProbs, disOneHotAct), axis=1)
oldSingleProbs = tf.multiply(oldSingleProbs, actShape)
ratio = tf.math.divide(nowSingleProbs, oldSingleProbs + EPS)
value = tf.math.multiply(ratio, advantage)
clipRatio = tf.clip_by_value(ratio, 1.0 - self.clipRange, 1.0 + self.clipRange)
clipValue = tf.math.multiply(clipRatio, advantage)
loss = tf.math.negative(
tf.reduce_mean(tf.math.minimum(value, clipValue))
- tf.multiply(self.entropyWeight, entropy)
)
return loss
def getContinuousALoss(musig,actions,oldProbs,advantage):
def getContinuousALoss(musig, actions, oldProbs, advantage):
"""get Continuous Action Loss
Args:
@ -154,150 +273,376 @@ class PPO(object):
Returns:
tf.constant: (length,)
"""
mu = musig[:,0]
sigma = musig[:,1]
dist = tfp.distributions.Normal(mu,sigma)
mu = musig[:, 0]
sigma = musig[:, 1]
dist = tfp.distributions.Normal(mu, sigma)
nowProbs = dist.prob(actions)
ratio = tf.math.divide(nowProbs,oldProbs+1e-6)
entropy = tf.reduce_mean(dist.entropy())
value = tf.math.multiply(ratio,tf.expand_dims(advantage,axis = 1))
clipValue = tf.clip_by_value(ratio,1. - self.EPSILON,1.+self.EPSILON) * advantage
loss = -tf.reduce_mean(tf.math.minimum(value,clipValue)) + self.entropyWeight * entropy
ratio = tf.math.divide(nowProbs, oldProbs + EPS)
value = tf.math.multiply(ratio, advantage)
clipRatio = tf.clip_by_value(ratio, 1.0 - self.clipRange, 1.0 + self.clipRange)
clipValue = tf.math.multiply(clipRatio, advantage)
loss = tf.negative(
tf.reduce_mean(tf.math.minimum(value, clipValue))
- tf.multiply(self.entropyWeight, entropy)
)
return loss
def loss(y_true, y_pred):
# y_true: [[disAct1, disAct2, disAct3, mu, sigma]]
# y_pred: muSigma = self.actor(state) =
# [[disAct1, disAct2, disAct3, mu, sigma]]
oldDisProbs = y_true[:,0:self.disOutputSize]
oldConMusigs = y_true[:,self.disOutputSize:self.disOutputSize+self.conActSize]
conActions = y_true[:,self.disOutputSize+self.conActSize:self.disOutputSize+(self.conActSize*2)]
advantage = y_true[:,-1]
nowDisProbs = y_pred[:,0:self.disOutputSize] # [disAct1, disAct2, disAct3]
nowConMusigs = y_pred[:,self.disOutputSize:] #[musig1,musig2]
totalALoss = tf.constant([0.])
# y_true: [[disActProb..., conActProbs..., disOneHotActs..., conAct..., advantage]]
# y_pred: [[disActProb..., mu, sigma...]]
totalALoss = 0
totalActionNum = 0
advantage = tf.expand_dims(y_true[:, -1], axis=1)
# for nowProb,oldProb in zip(tf.transpose(nowDisProbs,perm=[1,0,2]),tf.transpose(oldDisProbs,perm=[1,0,2])):
lastDisActShape = 0
for shape in self.disActShape:
thisNowDisProbs = nowDisProbs[:,lastDisActShape:lastDisActShape+shape]
thisOldDisProbs = oldDisProbs[:,lastDisActShape:lastDisActShape+shape]
discreteALoss = getDiscreteALoss(thisNowDisProbs,thisOldDisProbs,advantage)
lastDisActShape += shape
totalALoss += discreteALoss
totalActionNum += 1
# for nowConMusig,conAction,oldPiProb in zip(tf.transpose(nowConMusigs,perm=[1,0,2]),conActions,oldPiProbs):
lastConAct = 0
for act in range(self.conActSize):
thisNowConMusig = nowConMusigs[:,lastConAct:lastConAct+((act+1)*2)]
thisOldConMusig = oldConMusigs[:,lastConAct:lastConAct+((act+1)*2)]
thisConAction = conActions[:,act]
continuousAloss = getContinuousALoss(thisNowConMusig,thisConAction,thisOldConMusig,advantage)
totalALoss += continuousAloss
totalActionNum += 1
loss = tf.divide(totalALoss,totalActionNum)
if self.disActSize != 0:
# while NN have discrete action output.
oldDisProbs = y_true[:, 0 : self.disOutputSize]
nowDisProbs = y_pred[:, 0 : self.disOutputSize] # [disAct1, disAct2, disAct3]
disOneHotActs = y_true[
:,
self.disOutputSize
+ self.conActSize : self.disOutputSize
+ self.conActSize
+ self.disOutputSize,
]
lastDisActShape = 0
for thisShape in self.disActShape:
thisNowDisProbs = nowDisProbs[:, lastDisActShape : lastDisActShape + thisShape]
thisOldDisProbs = oldDisProbs[:, lastDisActShape : lastDisActShape + thisShape]
thisDisOneHotActs = disOneHotActs[
:, lastDisActShape : lastDisActShape + thisShape
]
discreteALoss = getDiscreteALoss(
thisNowDisProbs, thisOldDisProbs, thisDisOneHotActs, thisShape, advantage
)
lastDisActShape += thisShape
totalALoss += discreteALoss
totalActionNum += 1.0
if self.conActSize != 0:
# while NN have continuous action output.
oldConProbs = y_true[:, self.disOutputSize : self.disOutputSize + self.conActSize]
conActions = y_true[
:,
self.disOutputSize
+ self.conActSize : self.disOutputSize
+ self.conActSize
+ self.conActSize,
]
nowConMusigs = y_pred[:, self.disOutputSize :] # [musig1,musig2]
lastConAct = 0
for conAct in range(self.conActSize):
thisNowConMusig = nowConMusigs[:, lastConAct : lastConAct + self.muSigSize]
thisOldConProb = oldConProbs[:, conAct : conAct + 1]
thisConAction = conActions[:, conAct]
continuousAloss = getContinuousALoss(
thisNowConMusig, thisConAction, thisOldConProb, advantage
)
totalALoss += continuousAloss
totalActionNum += 1.0
lastConAct += self.muSigSize
loss = tf.divide(totalALoss, totalActionNum)
return loss
return loss
# get Action&V
def chooseAction(self, state):
# get Actions&values
def chooseAction(self, state: ndarray):
"""Agent choose action to take
Args:
state (np.array): enviroment state
state (ndarray): enviroment state
Returns:
np.array:
disAct1,
discreteAction1
disAct2,
discreteAction2
disAct3,
discreteAction3
conAction,
continuousAction
actions,
actions list,2dims like [[0],[1],[1.5]]
predictResult,
actor NN predict Result output
"""
# let actor choose action,use the normal distribution
# state = np.expand_dims(state,0)
# check state dimension is [1,statesize]
if state.ndim!=2:
state = state.reshape([1,self.stateSize])
# check state dimension is [stateNum,statesize]
if state.ndim != 2:
stateNum = int(len(state) / self.stateSize)
state = state.reshape([stateNum, self.stateSize])
predictResult = self.actor(state) # get predict result [[disAct1, disAct2, disAct3, musig]]
predictResult = predictResult.numpy()
disAct1Prob = predictResult[0][0:3]
disAct2Prob = predictResult[0][3:6]
disAct3Prob = predictResult[0][6:8]
mu = predictResult[0][8]
sigma = predictResult[0][9]
if math.isnan(mu) or math.isnan(sigma):
# check mu or sigma is nan
print("mu or sigma is nan")
# print("predictResult",predictResult)
# predictResult = predictResult.numpy()
actions = []
if self.disActSize != 0:
# while NN have discrete action output.
lastDisActShape = 0
for shape in self.disActShape:
thisDisActProbs = predictResult[:, lastDisActShape : lastDisActShape + shape]
dist = tfp.distributions.Categorical(probs=thisDisActProbs, dtype=tf.float32)
action = int(dist.sample().numpy()[0])
# action = np.argmax(thisDisActProbs)
actions.append(action)
lastDisActShape += shape
if self.conActSize != 0:
# while NN have continuous action output.
lastConAct = 0
for actIndex in range(self.conActSize):
thisMu = predictResult[:, self.disOutputSize + lastConAct]
thisSig = predictResult[:, self.disOutputSize + lastConAct + 1]
if math.isnan(thisMu) or math.isnan(thisSig):
# check mu or sigma is nan
print("chooseAction:mu or sigma is nan")
thisDist = np.random.normal(loc=thisMu, scale=thisSig)
actions.append(np.clip(thisDist, -self.conActRange, self.conActRange))
lastConAct += 2
return actions, predictResult
disAct1 = np.argmax(disAct1Prob) # WS 0 or 1 or 2
disAct2 = np.argmax(disAct2Prob) # AD 0 or 1 or 2
disAct3 = np.argmax(disAct3Prob) # mouse shoot 0 or 1
normDist = np.random.normal(loc=mu, scale=sigma) # normalDistribution
conAction = np.clip(normDist, -self.conActRange,
self.conActRange) # 在正态分布中随机get一个action
return disAct1, disAct2, disAct3, conAction, predictResult
def trainCritcActor(
self,
states: ndarray,
oldActorResult: ndarray,
actions: ndarray,
rewards: ndarray,
dones: ndarray,
nextState: ndarray,
epochs: int = None,
):
"""train critic&actor use PPO ways
def getCriticV(self, state):
Args:
states (ndarray): states
oldActorResult (ndarray): actor predict result
actions (ndarray): predicted actions include both discrete actions and continuous actions
rewards (ndarray): rewards from enviroment
dones (ndarray): dones from enviroment
nextState (ndarray): next state from enviroment
epochs (int, optional): train epochs,default to ppoConfig. Defaults to None.
Returns:
tf.constant: criticLoss, actorLoss
"""
if epochs == None:
epochs = self.trainEpochs
criticValues = self.getCriticV(state=states)
discountedR = self.discountReward(nextState, criticValues, dones, rewards)
advantage = self.getGAE(discountedR, criticValues)
criticLoss = self.trainCritic(states, discountedR, epochs)
actorLoss = self.trainActor(states, oldActorResult, actions, advantage, epochs)
# print("A_Loss:", actorLoss, "C_Loss:", criticLoss)
return criticLoss, actorLoss
def trainCritic(self, states: ndarray, discountedR: ndarray, epochs: int = None):
"""critic NN trainning function
Args:
states (ndarray): states
discountedR (ndarray): discounted rewards
epochs (int, optional): train epochs,default to ppoConfig. Defaults to None.
Returns:
tf.constant: all critic losses
"""
if epochs == None:
epochs = self.trainEpochs
his = self.critic.fit(x=states, y=discountedR, epochs=epochs, verbose=0)
return his.history["loss"]
def trainActor(
self,
states: ndarray,
oldActorResult: ndarray,
actions: ndarray,
advantage: ndarray,
epochs: int = None,
):
"""actor NN trainning function
Args:
states (ndarray): states
oldActorResult (ndarray): actor predict results
actions (ndarray): acotor predict actions
advantage (ndarray): GAE advantage
epochs (int, optional): train epochs,default to ppoConfig. Defaults to None.
Returns:
tf.constant: all actor losses
"""
# Trian Actor
# states: Buffer States
# actions: Buffer Actions
# discountedR: Discounted Rewards
# Epochs: just Epochs
if epochs == None:
epochs = self.trainEpochs
actions = np.asarray(actions, dtype=np.float32)
disActions = actions[:, 0 : self.disActSize]
conActions = actions[:, self.disActSize :]
oldDisProbs = oldActorResult[:, 0 : self.disOutputSize] # [disAct1, disAct2, disAct3]
oldConMusigs = oldActorResult[:, self.disOutputSize :] # [musig1,musig2]
if self.disActSize != 0:
disOneHotActs = self.getOneHotActs(disActions)
if self.conActSize != 0:
# while NN have discrete6 & continuous actions output.
oldPiProbs = self.conProb(oldConMusigs[:, 0], oldConMusigs[:, 1], conActions)
# pack [oldDisProbs,oldPiProbs,conActions,advantage] as y_true
y_true = np.hstack((oldDisProbs, oldPiProbs, disOneHotActs, conActions, advantage))
else:
# while NN have only discrete actions output.
# pack [oldDisProbs,advantage] as y_true
y_true = np.hstack((oldDisProbs, disOneHotActs, advantage))
else:
if self.conActSize != 0:
# while NN have only continuous action output.
oldPiProbs = self.conProb(oldConMusigs[:, 0], oldConMusigs[:, 1], conActions)
# pack [oldPiProbs,conActions,advantage] as y_true
y_true = np.hstack((oldPiProbs, conActions, advantage))
else:
print("trainActor:disActSize & conActSize error")
time.sleep(999999)
# assembly Actions history
# train start
if np.any(tf.math.is_nan(y_true)):
print("y_true got nan")
print("y_true", y_true)
his = self.actor.fit(x=states, y=y_true, epochs=epochs, verbose=0)
if np.any(tf.math.is_nan(his.history["loss"])):
print("his.history['loss'] is nan!")
print(his.history["loss"])
return his.history["loss"]
def saveWeights(self, score: float):
"""save now NN's Weight. Use "models.save_weights" method.
Save as "tf" format "ckpt" file.
Args:
score (float): now score
"""
actor_save_dir = (
self.saveDir + datetime.datetime.now().strftime("%H%M%S") + "/actor/" + "actor.ckpt"
)
critic_save_dir = (
self.saveDir + datetime.datetime.now().strftime("%H%M%S") + "/critic/" + "critic.ckpt"
)
self.actor.save_weights(actor_save_dir, save_format="tf")
self.critic.save_weights(critic_save_dir, save_format="tf")
# create an empty file named as score to recored score
score_dir = (
self.saveDir + datetime.datetime.now().strftime("%H%M%S") + "/" + str(round(score))
)
scorefile = open(score_dir, "w")
scorefile.close()
print("Model's Weights Saved")
def loadWeightToModels(self, loadDir: str):
"""load NN Model. Use "models.load_weights()" method.
Load "tf" format "ckpt" file.
Args:
loadDir (str): Model dir
"""
actorDir = loadDir + "/actor/" + "actor.ckpt"
criticDir = loadDir + "/critic/" + "critic.ckpt"
self.actor.load_weights(actorDir)
self.critic.load_weights(criticDir)
print("++++++++++++++++++++++++++++++++++++")
print("++++++++++++Model Loaded++++++++++++")
print(loadDir)
print("++++++++++++++++++++++++++++++++++++")
def getCriticV(self, state: ndarray):
"""get Critic predict V value
Args:
state (np.array): Env state
state (ndarray): Env state
Returns:
tensor: retrun Critic predict result
"""
# if state.ndim < 2:
# state = np.expand_dims(state,0)
if state.ndim!=2:
state = state.reshape([1,self.stateSize])
if state.ndim != 2:
stateNum = int(len(state) / self.stateSize)
state = state.reshape([stateNum, self.stateSize])
return self.critic.predict(state)
def discountReward(self, nextState, rewards):
def discountReward(self, nextState: ndarray, values: ndarray, dones: ndarray, rewards: ndarray):
"""Discount future rewards
Args:
nextState (np.array): next Env state
rewards (np.array): reward list of this episode
nextState (ndarray): next Env state
values (ndarray): critic predict values
dones (ndarray): dones from enviroment
rewards (ndarray): reward list of this episode
Returns:
np.array: discounted rewards list,same shape as rewards that input
ndarray: discounted rewards list,same shape as rewards that input
"""
"""
nextV = self.getCriticV(nextState)
dones = 1 - dones
discountedRewards = []
for i in reversed(range(len(rewards))):
nextV = rewards[i] + dones[i] * self.gamma * nextV
discountedRewards.append(nextV)
discountedRewards.reverse() # reverse
discountedRewards = np.squeeze(discountedRewards)
discountedRewards = np.expand_dims(discountedRewards, axis=1)
# discountedRewards = np.array(discountedRewards)[:, np.newaxis]
return discountedRewards
"""
"""
# 降低未来的rewards
nextV = self.getCriticV(nextState)
discountedRewards = []
for r in rewards[::-1]:
nextV = r + self.GAMMA*nextV
nextV = r + self.gamma * nextV
discountedRewards.append(nextV)
discountedRewards.reverse() # \ESREVER/
discountedRewards.reverse() # reverse
discountedRewards = np.squeeze(discountedRewards)
discountedRewards = np.expand_dims(discountedRewards, axis=1)
#discountedRewards = np.array(discountedRewards)[:, np.newaxis]
# discountedRewards = np.array(discountedRewards)[:, np.newaxis]
print(discountedRewards)
return discountedRewards
"""
g = 0
discountedRewards = []
lastValue = self.getCriticV(nextState)
values = np.append(values, lastValue, axis=0)
dones = 1 - dones
for i in reversed(range(len(rewards))):
delta = rewards[i] + self.gamma * values[i + 1] * dones[i] - values[i]
g = delta + self.gamma * self.lmbda * dones[i] * g
discountedRewards.append(g + values[i])
discountedRewards.reverse()
return np.asarray(discountedRewards)
def conProb(self, mu, sig, x):
def getGAE(self, discountedRewards: ndarray, values: ndarray):
"""compute GAE adcantage
Args:
discountedRewards (ndarray): discounted rewards
values (ndarray): critic predict values
Returns:
ndarray: GAE advantage
"""
advantage = discountedRewards - values
advantage = (advantage - np.mean(advantage)) / (np.std(advantage) + EPS)
return advantage
def conProb(self, mu: ndarray, sig: ndarray, x: ndarray):
"""calculate probability when x in Normal distribution(mu,sigma)
Args:
mu (np,array): mu
sig (np.array): sigma
x (np.array): x
mu (ndarray): mu
sig (ndarray): sigma
x (ndarray): x
Returns:
np.array: probabilities
ndarray: probability
"""
# 获取在正态分布mu,sig下当取x值时的概率
# return shape : (length,1)
@ -309,120 +654,62 @@ class PPO(object):
prob = dist.prob(x)
prob = np.reshape(prob, (np.size(x), 1))
#dist = 1./(tf.sqrt(2.*np.pi)*sig)
#prob = dist*tf.exp(-tf.square(x-mu)/(2.*tf.square(sig)))
# dist = 1./(tf.sqrt(2.*np.pi)*sig)
# prob = dist*tf.exp(-tf.square(x-mu)/(2.*tf.square(sig)))
return prob
def trainCritcActor(self, states, actions, rewards, nextState, criticEpochs, actorEpochs):
# Train ActorNN and CriticNN
# states: Buffer States
# actions: Buffer Actions
# rewards: Buffer Rewards,没有Discount处理
# nextState: 下一个单独state
# criticEpochs: just criticNN'Epochs
# acotrEpochs: just acotrNN'Epochs
discountedR = self.discountReward(nextState, rewards)
criticMeanLoss = self.trainCritic(states, discountedR, criticEpochs)
actorMeanLoss = self.trainActor(
states, actions, discountedR, actorEpochs)
print("A_Loss:", actorMeanLoss, "C_Loss:", criticMeanLoss)
return actorMeanLoss, criticMeanLoss
def trainCritic(self, states, discountedR, epochs):
# Trian Critic
# states: Buffer States
# discountedR: Discounted Rewards
# Epochs: just Epochs
# IDK why this should be list...It just work...
# If discountR in np.array type it will throw 'Failed to find data adapter that can handle'
# discountedR = discountedR.tolist()
his = self.critic.fit(x=states, y=discountedR,
epochs=epochs, verbose=0)
return np.mean(his.history['loss'])
def trainActor(self, states, actions, discountedR, epochs):
"""Actor NN trainning function
def getOneHotActs(self, disActions):
"""one hot action encoder
Args:
states (np.array): Env states
actions (np.array): action history
discountedR (np.array): discountedR
epochs (int): epochs,how many time NN learning
disActions (ndarray): discrete actions
Returns:
Average actor loss: this learning round's average actor loss
ndarray: one hot actions
"""
# Trian Actor
# states: Buffer States
# actions: Buffer Actions
# discountedR: Discounted Rewards
# Epochs: just Epochs
actIndex = 0
for thisShape in self.disActShape:
thisActs = disActions[:, actIndex]
thisOneHotAct = tf.squeeze(tf.one_hot(thisActs, thisShape)).numpy()
if actIndex == 0:
oneHotActs = thisOneHotAct
else:
oneHotActs = np.append(oneHotActs, thisOneHotAct, axis=1)
actIndex += 1
return oneHotActs
states = np.asarray(states)
actions = np.asarray(actions, dtype=np.float32)
# predict with old Actor NN
oldActorResult = self.actor.predict(states)
# assembly Actions history
disActions = actions[:,0:self.disActSize]
conActions = actions[:,self.disActSize:]
# assembly predictResult as old Actor's Result
oldDisProbs = oldActorResult[:,0:self.disOutputSize] # [disAct1, disAct2, disAct3]
oldConMusigs = oldActorResult[:,self.disOutputSize:] # [musig1,musig2]
oldPiProbs = self.conProb(oldConMusigs[:, 0], oldConMusigs[:, 1], conActions)
criticV = self.critic.predict(states)
advantage = copy.deepcopy(discountedR - criticV)
# pack [oldDisProbs,oldPiProbs,conActions,advantage] as y_true
y_true = np.hstack((oldDisProbs,oldPiProbs,conActions,advantage))
# train start
if np.any(tf.math.is_nan(y_true)):
print("y_true got nan")
print("oldConMusigs",oldConMusigs)
print("oldPiProbs",oldPiProbs)
print("conActions",conActions)
print("oldConMusigs",oldConMusigs)
his = self.actor.fit(x=states, y=y_true, epochs=epochs, verbose=0)
if np.any(tf.math.is_nan(his.history['loss'])):
print("his.history['loss'] is nan!")
print(his.history['loss'])
return np.mean(his.history['loss'])
def saveWeights(self,score = None):
"""save now NN's Weight. Use "models.save_weights" method.
Save as "tf" format "ckpt" file.
def getAverageEntropy(self, probs: ndarray):
"""get average dis&con ACT Entropys
Args:
score (int): now score
probs (ndarray): actor NN predict result
Returns:
float: average total entropy
list: discrete entropys
list: continuous entropys
"""
actor_save_dir = self.saveDir+datetime.datetime.now().strftime("%H%M%S") + "/actor/" + "actor.ckpt"
critic_save_dir = self.saveDir+datetime.datetime.now().strftime("%H%M%S") + "/critic/" + "critic.ckpt"
self.actor.save_weights(actor_save_dir, save_format="tf")
self.critic.save_weights(critic_save_dir, save_format="tf")
if score != None:
# create an empty file named as score to recored score
score_dir = self.saveDir+datetime.datetime.now().strftime("%H%M%S") + "/" + str(round(score))
scorefile = open(score_dir,'w')
scorefile.close()
print("Model's Weights Saved")
def loadWeightToModels(self,loadDir):
"""load NN Model. Use "models.load_weights()" method.
Load "tf" format "ckpt" file.
Args:
loadDir (string): Model dir
"""
actorDir = loadDir + "/actor/" + "actor.ckpt"
criticDir = loadDir + "/critic/" + "critic.ckpt"
self.actor.load_weights(actorDir)
self.critic.load_weights(criticDir)
print("++++++++++++++++++++++++++++++++++++")
print("++++++++++++Model Loaded++++++++++++")
print(loadDir)
print("++++++++++++++++++++++++++++++++++++")
discreteEntropys = []
continuousEntropys = []
if self.disActSize != 0:
disProbs = probs[:, 0 : self.disOutputSize]
lastDisActIndex = 0
for actShape in self.disActShape:
thisDisProbs = disProbs[:, lastDisActIndex : lastDisActIndex + actShape]
lastDisActIndex += actShape
discreteEntropys.append(
tf.negative(
tf.reduce_mean(
tf.math.multiply(thisDisProbs, tf.math.log(thisDisProbs + EPS))
)
)
)
if self.conActSize != 0:
conProbs = probs[:, self.disOutputSize :]
conActIndex = 0
for i in range(self.conActSize):
thisConProbs = conProbs[:, conActIndex : conActIndex + 2]
conActIndex += 2
continuousEntropys.append(tf.reduce_mean(thisConProbs[:, 1]))
averageEntropy = np.mean([np.mean(discreteEntropys), np.mean(continuousEntropys)])
return averageEntropy, discreteEntropys, continuousEntropys

View File

@ -0,0 +1,65 @@
import numpy as np
class PPOBuffer(object):
def __init__(self):
self.states = []
self.actorProbs = []
self.actions = []
self.rewards = []
self.dones = []
print("√√√√√Buffer Initialized Success√√√√√")
def clearBuffer(self):
self.states = []
self.actorProbs = []
self.actions = []
self.rewards = []
self.dones = []
def getStates(self):
return self.standDims(np.asarray(self.states))
def getActorProbs(self):
return self.standDims(np.asarray(self.actorProbs))
def getActions(self):
return self.standDims(np.asarray(self.actions))
def getRewards(self):
return self.standDims(np.asarray(self.rewards))
def getDones(self):
return self.standDims(np.asarray(self.dones))
def saveState(self, state):
self.states.append(state)
def saveAction(self, action):
self.actions.append(action)
def saveReward(self, reward):
self.rewards.append(reward)
def standDims(self, data):
# standarlize data's dimension
if np.ndim(data) > 2:
return np.squeeze(data, axis=1)
elif np.ndim(data) < 2:
return np.expand_dims(data, axis=1)
else:
return np.asarray(data)
def saveBuffers(self, state, actorProb, action, reward, done):
self.states.append(state)
self.actorProbs.append(actorProb)
self.actions.append(action)
self.rewards.append(reward)
self.dones.append(done)
"""
print("self.states", self.states)
print("self.actions", self.actions)
print("self.rewards", self.rewards)
print("self.dones", self.dones)
print("self.values", self.values)
"""

View File

@ -0,0 +1,15 @@
import datetime
from typing import NamedTuple, Optional
class PPOConfig(NamedTuple):
NNShape: list = [256, 256, 128]
actorLR: float = 2e-3 # Actor Net Learning
criticLR: float = 2e-3 # Critic Net Learning
gamma: float = 0.99
lmbda: float = 0.95
clipRange: float = 0.20
entropyWeight: float = 1e-2
trainEpochs: int = 8
saveDir: str = "PPO-Model/" + datetime.datetime.now().strftime("%m%d-%H%M") + "/"
loadModelDir: Optional[str] = None

View File

@ -0,0 +1,58 @@
from turtle import color
import matplotlib.pyplot as plt
class PPOHistory(object):
def __init__(self):
self.meanRewards = []
self.entropys = []
self.actorLosses = []
self.criticLosses = []
def saveHis(self, rewards, entropys, aLosses, cLosses):
self.meanRewards.extend([rewards])
self.entropys.extend([entropys])
self.actorLosses.extend(aLosses)
self.criticLosses.extend(cLosses)
def drawHis(self):
plt.figure(figsize=(21, 13), facecolor="#011627")
ax = plt.subplot(2, 2, 1)
ax.set_facecolor("#011627")
ax.spines["top"].set_color("#c9d2df")
ax.spines["bottom"].set_color("#c9d2df")
ax.spines["left"].set_color("#c9d2df")
ax.spines["right"].set_color("#c9d2df")
ax.plot(
range(len(self.meanRewards)), self.meanRewards, color="#c9d2df", label="AverageRewards"
)
ax.set_title("meanRewards", color="#c9d2df")
ax = plt.subplot(2, 2, 2)
ax.set_facecolor("#011627")
ax.spines["top"].set_color("#c9d2df")
ax.spines["bottom"].set_color("#c9d2df")
ax.spines["left"].set_color("#c9d2df")
ax.spines["right"].set_color("#c9d2df")
ax.plot(range(len(self.entropys)), self.entropys, color="#c9d2df", label="AverageEntropys")
ax.set_title("entropys", color="#c9d2df")
ax = plt.subplot(2, 2, 3)
ax.set_facecolor("#011627")
ax.spines["top"].set_color("#c9d2df")
ax.spines["bottom"].set_color("#c9d2df")
ax.spines["left"].set_color("#c9d2df")
ax.spines["right"].set_color("#c9d2df")
ax.plot(
range(len(self.actorLosses)), self.actorLosses, color="#c9d2df", label="actorLosses"
)
ax.set_title("actorLosses", color="#c9d2df")
ax = plt.subplot(2, 2, 4)
ax.set_facecolor("#011627")
ax.spines["top"].set_color("#c9d2df")
ax.spines["bottom"].set_color("#c9d2df")
ax.spines["left"].set_color("#c9d2df")
ax.spines["right"].set_color("#c9d2df")
ax.plot(
range(len(self.criticLosses)), self.criticLosses, color="#c9d2df", label="criticLosses"
)
ax.set_title("criticLosses", color="#c9d2df")
plt.show()

View File

@ -1,8 +1,8 @@
import mlagents_envs
from mlagents_envs.base_env import ActionTuple
from mlagents_envs.environment import UnityEnvironment
import numpy as np
from numpy import ndarray
class makeEnv(object):
@ -22,69 +22,71 @@ class makeEnv(object):
self.BEHA_SPECS = self.env.behavior_specs
self.BEHA_NAME = list(self.BEHA_SPECS)[0]
self.SPEC = self.BEHA_SPECS[self.BEHA_NAME]
self.OBSERVATION_SPECS = self.SPEC.observation_specs[
0
] # observation spec
self.OBSERVATION_SPECS = self.SPEC.observation_specs[0] # observation spec
self.ACTION_SPEC = self.SPEC.action_spec # action specs
self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size #  連続的な動作のSize
self.DISCRETE_SHAPE = list(self.ACTION_SPEC.discrete_branches)
self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size #  離散的な動作のSize
self.STATE_SIZE = (
self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE
) # 環境観測データ数
self.STATE_SIZE = self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE # 環境観測データ数
print("√√√√√Enviroment Initialized Success√√√√√")
def step(
self,
discreteActions=None,
continuousActions=None,
behaviorName=None,
trackedAgent=None,
actions: list,
behaviorName: ndarray = None,
trackedAgent: ndarray = None,
):
"""change ations list to ActionTuple then send it to enviroment
Args:
actions (list): PPO chooseAction output action list
behaviorName (ndarray, optional): behaviorName. Defaults to None.
trackedAgent (ndarray, optional): trackedAgentID. Defaults to None.
Returns:
ndarray: nextState, reward, done, loadDir, saveNow
"""
# take action to enviroment
# return mextState,reward,done
if self.DISCRETE_SIZE == 0:
# create empty discrete action
discreteActions = np.asarray([[0]])
else:
# create discrete action from actions list
discreteActions = np.asanyarray([actions[0:self.DISCRETE_SIZE]])
if self.CONTINUOUS_SIZE == 0:
# create empty continuous action
continuousActions = np.asanyarray([[0.0]])
else:
# create continuous actions from actions list
continuousActions = np.asanyarray(actions[self.DISCRETE_SIZE:])
# check if arg is include None or IS None
try:
isDisNone = discreteActions.any() is None
if discreteActions.all() is None:
print("step() Error!:discreteActions include None")
except:
isDisNone = True
try:
isConNone = continuousActions.any() is None
if continuousActions.all() is None:
print("step() Error!:continuousActions include None")
except:
isConNone = True
if isDisNone:
# if discreteActions is enpty just give nothing[[0]] to Enviroment
discreteActions = np.array([[0]], dtype=np.int)
if isConNone:
# if continuousActions is enpty just give nothing[[0]] to Enviroment
continuousActions = np.array([[0]], dtype=np.float)
if behaviorName is None:
behaviorName = self.BEHA_NAME
if trackedAgent is None:
trackedAgent = self.TRACKED_AGENT
# create actionTuple
thisActionTuple = ActionTuple(
continuous=continuousActions, discrete=discreteActions
)
thisActionTuple = ActionTuple(continuous=continuousActions, discrete=discreteActions)
# take action to env
self.env.set_actions(
behavior_name=behaviorName, action=thisActionTuple
)
self.env.set_actions(behavior_name=behaviorName, action=thisActionTuple)
self.env.step()
# get nextState & reward & done after this action
nextState, reward, done, loadDir, saveNow = self.getSteps(
behaviorName, trackedAgent
)
nextState, reward, done, loadDir, saveNow = self.getSteps(behaviorName, trackedAgent)
return nextState, reward, done, loadDir, saveNow
def getSteps(self, behaviorName=None, trackedAgent=None):
"""get enviroment now observations.
Include State, Reward, Done, LoadDir, SaveNow
Args:
behaviorName (_type_, optional): behaviorName. Defaults to None.
trackedAgent (_type_, optional): trackedAgent. Defaults to None.
Returns:
ndarray: nextState, reward, done, loadDir, saveNow
"""
# get nextState & reward & done
if behaviorName is None:
behaviorName = self.BEHA_NAME
@ -94,25 +96,17 @@ class makeEnv(object):
if trackedAgent is None:
trackedAgent = self.TRACKED_AGENT
if (
trackedAgent in decisionSteps
): # ゲーム終了していない場合、環境状態がdecision_stepsに保存される
if trackedAgent in decisionSteps: # ゲーム終了していない場合、環境状態がdecision_stepsに保存される
nextState = decisionSteps[trackedAgent].obs[0]
nextState = np.reshape(
nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
)
nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE])
saveNow = nextState[0][-1]
loadDir = nextState[0][-3:-1]
nextState = nextState[0][:-3]
reward = decisionSteps[trackedAgent].reward
done = False
if (
trackedAgent in terminalSteps
): # ゲーム終了した場合、環境状態がterminal_stepsに保存される
if trackedAgent in terminalSteps: # ゲーム終了した場合、環境状態がterminal_stepsに保存される
nextState = terminalSteps[trackedAgent].obs[0]
nextState = np.reshape(
nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
)
nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE])
saveNow = nextState[0][-1]
loadDir = nextState[0][-3:-1]
nextState = nextState[0][:-3]
@ -121,9 +115,16 @@ class makeEnv(object):
return nextState, reward, done, loadDir, saveNow
def reset(self):
"""reset enviroment and get observations
Returns:
ndarray: nextState, reward, done, loadDir, saveNow
"""
self.env.reset()
nextState, reward, done, loadDir, saveNow = self.getSteps()
return nextState, reward, done, loadDir, saveNow
def render(self):
"""render enviroment
"""
self.env.render()

View File

@ -1,29 +0,0 @@
import numpy as np
class buffer(object):
def __init__(self):
self.states = []
self.actions = []
self.rewards = []
print("√√√√√Buffer Initialized Success√√√√√")
def clearBuffer(self):
self.states = []
self.actions = []
self.rewards = []
def getStates(self):
return np.asarray(self.states)
def getActions(self):
return np.asarray(self.actions)
def getRewards(self):
return np.asarray(self.rewards)
def saveState(self,state):
self.states.append(state)
def saveAction(self,action):
self.actions.append(action)
def saveReward(self,reward):
self.rewards.append(reward)
def saveBuffers(self,state,action,reward):
self.states.append(state)
self.actions.append(action)
self.rewards.append(reward)

View File

@ -1,356 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import aimBotEnv\n",
"import PPO\n",
"import buffer\n",
"import numpy as np\n",
"\n",
"import tensorflow as tf\n",
"import time\n",
"import datetime\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Attempts to allocate only the GPU memory needed for allocation\n",
"physical_devices = tf.config.list_physical_devices('GPU')\n",
"tf.config.experimental.set_memory_growth(physical_devices[0], True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Env\n",
"ENV_PATH = \"./Build-CloseEnemyCut/Aimbot-PPO\"\n",
"WORKER_ID = 1\n",
"BASE_PORT = 200\n",
"\n",
"MAX_EP = 1000\n",
"EP_LENGTH = 100000\n",
"GAMMA = 0.99 # discount future reward (UP?)\n",
"EPSILON = 0.2 # clip Ratio range[1-EPSILON,1+EPSILON]\n",
"ACTOR_LR = 1e-5 # LR\n",
"CRITIC_LR = 2e-5 # LR\n",
"BATCH = 256 # learning step\n",
"ACTOR_EPOCH = 15 # epoch\n",
"CRITIC_EPOCH = 15 # epoch\n",
"ENTROPY_WHEIGHT = 0.001 # sigma's entropy in Actor loss\n",
"ACTION_INTERVAL = 1 # take action every ACTION_INTERVAL steps\n",
"\n",
"\n",
"TRAIN = True\n",
"SAVE_DIR = \"PPO-Model/\" + datetime.datetime.now().strftime(\"%m%d%H%M\") + \"/\"\n",
"LOAD_DIR = None\n",
"\n",
"CTN_ACTION_RANGE = 10\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"√√√√√Enviroment Initialized Success√√√√√\n",
"√√√√√Buffer Initialized Success√√√√√\n",
"No loadDir specified,Create a New Model\n",
"CONTINUOUS_SIZE 1\n",
"DISCRETE_SIZE 5\n",
"STATE_SIZE 30\n"
]
}
],
"source": [
"# initialize enviroment & buffer class\n",
"env = aimBotEnv.makeEnv(\n",
" envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT\n",
")\n",
"epBuffer = buffer.buffer()\n",
"\n",
"STATE_SIZE = env.STATE_SIZE\n",
"CONTINUOUS_SIZE = env.CONTINUOUS_SIZE\n",
"DISCRETE_SIZE = env.DISCRETE_SIZE\n",
"_, _, _, loadDir, _ = env.getSteps()\n",
"\n",
"# check load model or not\n",
"if np.any(loadDir == 0):\n",
" # create a new model\n",
" print(\"No loadDir specified,Create a New Model\")\n",
" LOAD_DIR = None\n",
"else:\n",
" # load model\n",
" loadDirDateSTR = str(int(loadDir[0]))\n",
" loadDirTimeSTR = str(int(loadDir[1]))\n",
" if len(loadDirDateSTR) != 8:\n",
" # fill lost 0 while converse float to string\n",
" for _ in range(8 - len(loadDirDateSTR)):\n",
" loadDirDateSTR = \"0\" + loadDirDateSTR\n",
" if len(loadDirTimeSTR) != 6:\n",
" # fill lost 0 while converse float to string\n",
" for _ in range(6 - len(loadDirTimeSTR)):\n",
" loadDirTimeSTR = \"0\" + loadDirTimeSTR\n",
" LOAD_DIR = \"PPO-Model/\" + loadDirDateSTR + \"/\" + loadDirTimeSTR\n",
" print(\"Load Model:\")\n",
" print(LOAD_DIR)\n",
"\n",
"print(\"CONTINUOUS_SIZE\", CONTINUOUS_SIZE)\n",
"print(\"DISCRETE_SIZE\", DISCRETE_SIZE)\n",
"print(\"STATE_SIZE\", STATE_SIZE)\n",
"\n",
"disActShape = [3, 3, 2]\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def actToKey(disAct1,disAct2,disAct3,conAct):\n",
" kW = 0\n",
" kS = 0\n",
" kA = 0\n",
" kD = 0\n",
" mouseShoot = 0\n",
" if disAct1 == 0:\n",
" kW = 0\n",
" kS = 1\n",
" elif disAct1 == 1:\n",
" kW = 0\n",
" kS = 0\n",
" elif disAct1 == 2:\n",
" kW = 1\n",
" kS = 0\n",
" if disAct2 == 0:\n",
" kA = 0\n",
" kD = 1\n",
" elif disAct2 == 1:\n",
" kA = 0\n",
" kD = 0\n",
" elif disAct2 == 2:\n",
" kA = 1\n",
" kD = 0\n",
" mouseShoot = disAct3\n",
" return kW,kS,kA,kD,mouseShoot,conAct"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"EP 0 START\n",
"√√√√√Buffer Initialized Success√√√√√\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\core\\fromnumeric.py:3474: RuntimeWarning: Mean of empty slice.\n",
" return _methods._mean(a, axis=axis, dtype=dtype,\n",
"c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\core\\_methods.py:189: RuntimeWarning: invalid value encountered in double_scalars\n",
" ret = ret.dtype.type(ret / rcount)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"A_Loss: 0.4477495511372884 C_Loss: 3.155759557088216\n",
"A_Loss: 0.14549287557601928 C_Loss: 0.5123071213563283\n",
"A_Loss: 0.055241942902406055 C_Loss: 0.13002794484297434\n",
"A_Loss: 0.057325509190559384 C_Loss: 0.11068039039770762\n",
"A_Loss: 0.04376962607105573 C_Loss: 0.03923700377345085\n"
]
}
],
"source": [
"bestScore = 200.0\n",
"stopTrainCounter = 0\n",
"\n",
"totalRewardHis = []\n",
"totalActorLossHis = []\n",
"totalCriticLossHis = []\n",
"epHis = []\n",
"maxTotalReward = -99999999999\n",
"\n",
"for ep in range(MAX_EP):\n",
" print(\"EP \", ep, \" START\")\n",
" # first time run game\n",
" s, _, _, _, _ = env.reset()\n",
" if ep == 0:\n",
" epBuffer = buffer.buffer()\n",
" s = s.reshape([STATE_SIZE])\n",
" agent = PPO.PPO(\n",
" stateSize=STATE_SIZE,\n",
" disActShape=disActShape,\n",
" conActSize=1,\n",
" conActRange=CTN_ACTION_RANGE,\n",
" criticLR=CRITIC_LR,\n",
" actorLR=ACTOR_LR,\n",
" gamma=GAMMA,\n",
" epsilon=EPSILON,\n",
" entropyWeight=ENTROPY_WHEIGHT,\n",
" saveDir=SAVE_DIR,\n",
" loadModelDir=LOAD_DIR,\n",
" )\n",
" step = 0\n",
" done = False\n",
" stopTrainCounter -= 1\n",
" epHis.append(ep)\n",
"\n",
" # reset total reward\n",
" epTotalReward = 0\n",
"\n",
" # Recorder list\n",
" epStepHis = []\n",
" epRewardHis = []\n",
" epActorLossHis = []\n",
" epCriticLossHis = []\n",
"\n",
" # save weight immediately?\n",
" saveNow = 0\n",
"\n",
" while not done:\n",
" step += 1\n",
" if (\n",
" step % ACTION_INTERVAL == 0\n",
" ): # take action every ACTION_INTERVAL steps\n",
" epStepHis.append(step)\n",
" (\n",
" disAct1,\n",
" disAct2,\n",
" disAct3,\n",
" conAct,\n",
" predictResult,\n",
" ) = agent.chooseAction(s)\n",
" kW, kS, kA, kD, mouseShoot, mouseMove = actToKey(\n",
" disAct1, disAct2, disAct3, conAct\n",
" )\n",
"\n",
" nextState, thisReward, done, _, saveNow = env.step(\n",
" discreteActions=np.array([[kW, kS, kA, kD, mouseShoot]]),\n",
" continuousActions=np.array([[mouseMove]]),\n",
" )\n",
"\n",
" epTotalReward += thisReward\n",
" epBuffer.saveBuffers(\n",
" s, [disAct1, disAct2, disAct3, conAct], thisReward\n",
" )\n",
" else:\n",
" disActs = np.array([[0, 0, 0, 0, 0]])\n",
" conActs = np.array([[0]])\n",
"\n",
" nextState, thisReward, done, _, saveNow = env.step(\n",
" discreteActions=disActs, continuousActions=conActs\n",
" )\n",
" epTotalReward += thisReward\n",
" nextState = nextState.reshape([STATE_SIZE])\n",
" s = nextState\n",
"\n",
" if done:\n",
" print(\"EP OVER!\")\n",
" if saveNow != 0:\n",
" print(\"SAVENOW!\")\n",
" saveNow = 0\n",
" agent.saveWeights()\n",
" # update PPO after Batch step or GameOver\n",
" if (step + 1) % BATCH == 0 or done:\n",
" bs = epBuffer.getStates()\n",
" ba = epBuffer.getActions()\n",
" br = epBuffer.getRewards()\n",
" epBuffer.clearBuffer()\n",
" if TRAIN:\n",
" epActorLoss, epCriticLoss = agent.trainCritcActor(\n",
" bs, ba, br, s, CRITIC_EPOCH, ACTOR_EPOCH\n",
" )\n",
" epActorLossHis.append(epActorLoss)\n",
" epCriticLossHis.append(epCriticLoss)\n",
" # update History Recorder\n",
" totalActorLossHis.append(np.mean(epActorLossHis))\n",
" totalCriticLossHis.append(np.mean(epCriticLossHis))\n",
" totalRewardHis.append(epTotalReward)\n",
"\n",
" if epTotalReward > maxTotalReward and epTotalReward != 0:\n",
" maxTotalReward = epTotalReward\n",
" agent.saveWeights(epTotalReward)\n",
" print(\"New Record! Save NN\", epTotalReward)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"aaa = 0\n",
"aaa = 1\n",
"aaa = 2\n",
"aaa = 3\n",
"aaa = 4\n",
"aaa = 5\n",
"aaa = 6\n",
"aaa = 7\n",
"aaa = 8\n",
"aaa = 9\n"
]
}
],
"source": [
"aaa = 0\n",
"while aaa<10:\n",
" print(\"aaa = \",aaa)\n",
" aaa+=1"
]
}
],
"metadata": {
"interpreter": {
"hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
},
"kernelspec": {
"display_name": "Python 3.9.7 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -1,25 +0,0 @@
import aimBotEnv
import PPO
ENV_PATH = './Build/Aimbot-PPO'
WORKER_ID = 100
MAX_EP = 1000
EP_LENGTH = 400
GAMMA = 0.99 # discount future reward (UP?)
EPSILON = 0.2 # clip Ratio range[1-EPSILON,1+EPSILON]
ACTOR_LR = 1e-5 # LR
CRITIC_LR = 2e-5 # LR
BATCH = 32 # learning step
ACTOR_EPOCH = 10 # epoch
CRITIC_EPOCH = 10 # epoch
ENTROPY_WHEIGHT = 0.01 # sigma's entropy in Actor loss
ACTION_INTERVAL = 1 # take action every ACTION_INTERVAL steps
TRAIN = True
env = aimBotEnv.makeEnv(envPath = ENV_PATH,workerID = WORKER_ID)
STATE_SIZE = env.STATE_SIZE
CONTINUOUS_SIZE = env.CONTINUOUS_SIZE
DISCRETE_SIZE = env.DISCRETE_SIZE
CTN_ACTION_RANGE = 2

View File

@ -29,7 +29,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@ -73,7 +73,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -118,7 +118,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -182,7 +182,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -205,7 +205,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -240,16 +240,16 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
"array([[0.]])"
]
},
"execution_count": 20,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@ -259,28 +259,119 @@
"\n",
"a = np.array([10, 20, 30, 0])\n",
"\n",
"np.any(a == 0)\n"
"np.asarray([[0.]])\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4"
"1.5"
]
},
"execution_count": 1,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"asd = \"adsf\"\n",
"len(asd)"
"import numpy as np\n",
"\n",
"asd = [1,2,3,np.array([0.5]),np.array([0.5])]\n",
"\n",
"asd[3:]\n",
"len(asd)\n",
"\n",
"np.mean([1,2])"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.0\n",
"0.0\n"
]
}
],
"source": [
"import time\n",
"import pyautogui as pag\n",
"\n",
"from pynput.mouse import Button, Controller\n",
"\n",
"w = pag.size().width\n",
"h = pag.size().height\n",
"mouse = Controller()\n",
"\n",
"nowt = time.time()\n",
"\n",
"middletime = time.time() - nowt\n",
"print(middletime)\n",
"# print(nowPos-(w/2))\n",
"\n",
"print(time.time() - middletime - nowt)\n",
"while True:\n",
" x,_ = mouse.position\n",
" #print(mouse.press)\n",
" #print(mouse.position)\n",
" \n",
" mouse.position = (w / 2, h / 2)\n",
" time.sleep(1/60)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"import pyautogui as pag\n",
"\n",
"import mouse\n",
"\n",
"w = pag.size().width\n",
"h = pag.size().height\n",
"\n",
"nowt = time.time()\n",
"\n",
"middletime = time.time() - nowt\n",
"print(middletime)\n",
"# print(nowPos-(w/2))\n",
"\n",
"print(time.time() - middletime - nowt)\n",
"while True:\n",
" x = mouse.get_position()\n",
" print(x)\n",
" #print(mouse.position)\n",
" \n",
" mouse.move(w / 2, h / 2)\n",
" time.sleep(1/60)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import keyboard\n",
"\n",
"while True:\n",
" if keyboard.is_pressed(\"w\"):\n",
" print(\"w\")\n",
" elif keyboard.is_pressed(\"s\"):\n",
" print(\"s\")"
]
}
],