diff --git a/Assets/Script/GameScript/MLAgentsCustomController.cs b/Assets/Script/GameScript/MLAgentsCustomController.cs index 2755fa3..4d1444d 100644 --- a/Assets/Script/GameScript/MLAgentsCustomController.cs +++ b/Assets/Script/GameScript/MLAgentsCustomController.cs @@ -37,7 +37,7 @@ public class MLAgentsCustomController : Agent private float[] rayDisResult; private float[] targetStates; private float remainTime; - private float inAreaState; + private float inFireBaseState; private int endTypeInt; @@ -92,22 +92,24 @@ public override void CollectObservations(VectorSensor sensor) myObserve[1] = transform.localPosition.y / raySensors.viewDistance; myObserve[2] = transform.localPosition.z / raySensors.viewDistance; myObserve[3] = transform.eulerAngles.y / 360f;**/ + float angleInRadians = transform.eulerAngles.y * Mathf.Deg2Rad; myObserve[0] = transform.localPosition.x; myObserve[1] = transform.localPosition.y; myObserve[2] = transform.localPosition.z; - myObserve[3] = transform.eulerAngles.y / 36f; + myObserve[3] = MathF.Sin(angleInRadians); + myObserve[4] = MathF.Cos(angleInRadians); rayTagResult = raySensors.rayTagResult;// 探测用RayTag类型结果 float[](raySensorNum,1) rayTagResultOnehot = raySensors.rayTagResultOneHot; // 探测用RayTagonehot结果 List[](raySensorNum*Tags,1) rayDisResult = raySensors.rayDisResult; // 探测用RayDis距离结果 float[](raySensorNum,1) targetStates = targetController.targetState; // (6) targettype, target x,y,z, firebasesAreaDiameter remainTime = targetController.leftTime; - inAreaState = targetController.GetInAreaState(); + inFireBaseState = targetController.GetInAreaState(); agentController.UpdateGunState(); //float[] focusEnemyObserve = RaySensors.focusEnemyInfo;// 最近的Enemy情报 float[](3,1) MinEnemyIndex,x,z //sensor.AddObservation(allEnemyNum); // 敌人数量 int - sensor.AddObservation(targetStates);// (6) targettype, target x,y,z, firebasesAreaDiameter - sensor.AddObservation(inAreaState); // (1) + sensor.AddObservation(targetStates);// (5) targettype, target x,y,z, firebasesAreaDiameter + sensor.AddObservation(inFireBaseState); // (1) sensor.AddObservation(remainTime); // (1) sensor.AddObservation(agentController.gunReadyToggle); // (1) save gun is ready? sensor.AddObservation(myObserve); // (4)自机位置xyz+朝向 float[](4,1) @@ -120,7 +122,7 @@ public override void CollectObservations(VectorSensor sensor) sensor.AddObservation(rayTagResult); } sensor.AddObservation(rayDisResult); // 探测用RayDis距离结果 float[](raySensorNum,1) - envUIController.UpdateStateText(targetStates, inAreaState, remainTime, agentController.gunReadyToggle, myObserve, rayTagResultOnehot, rayDisResult); + envUIController.UpdateStateText(targetStates, inFireBaseState, remainTime, agentController.gunReadyToggle, myObserve, rayTagResultOnehot, rayDisResult); /*foreach(float aaa in rayDisResult) { Debug.Log(aaa); diff --git a/Assets/Script/GameScript/TargetController.cs b/Assets/Script/GameScript/TargetController.cs index df10932..c448274 100644 --- a/Assets/Script/GameScript/TargetController.cs +++ b/Assets/Script/GameScript/TargetController.cs @@ -38,7 +38,7 @@ public class TargetController : MonoBehaviour [System.NonSerialized] public Targets targetType; [System.NonSerialized] public int gotoLevelNum; [System.NonSerialized] public int attackLevelNum; - public float[] targetState = new float[6]; + public float[] targetState = new float[5]; public enum EndType { Win, Lose, Running, Num }; @@ -348,7 +348,6 @@ private void UpdateTargetStates(Vector3? targetPosition = null) targetState[2] = this.targetPosition.y; targetState[3] = this.targetPosition.z; targetState[4] = sceneBlockCon.nowBlock.firebasesAreaDiameter; - targetState[5] = sceneBlockCon.nowBlock.belongRatio; } } diff --git a/README.md b/README.md index ca5a5ba..06541a9 100644 --- a/README.md +++ b/README.md @@ -42,20 +42,20 @@ Python侧则使用[mlagents-envs 0.30.0](https://pypi.org/project/mlagents-envs/ | 5 | InFireBaseState | 1 | 0,1 | 描述Agent是否处于FireBase中,0=False,1=True | | 6 | RemainTime | 1 | 0~30 | 描述本回合剩余时间,取值范围为在0到30之间的连续值 | ### AgentState(自机状态) -`AgentState`包含了Agent的武器可攻击状态,Agent的x,y,z坐标和Agent的朝向角度。 -| Num | Observation | Size | State Space | Description | -|-----|-------------|------|-------------|----------------------------------------------------------------------------------| -| 7 | GunState | 1 | 0,1 | 描述Agent武器是否处于可使用状态。0=False,1=True | -| 8 | Agent坐标 | 3 | 0~47 | 描述Agent的连续空间坐标,取值范围为0到47之间的连续值 | -| 9 | Agent朝向 | 1 | 0~10 | 描述Agent的正面朝向,取值范围为0到10,映射到0~360度的旋转(每增加1代表增加36度) | +`AgentState`包含了Agent的武器可攻击状态,Agent的x,y,z坐标和Agent的朝向角度。为了避免直接使用Agent GameObject朝向角度时,0到360度变化时值的大幅度跳变,同时为了实现更好的周期性表示,这里使用了Agent GameObject旋转角度的余弦和正弦值来表示Agent的朝向。 +| Num | Observation | Size | State Space | Description | +|------|-------------|------|-------------|---------------------------------------------------------------------| +| 7 | GunState | 1 | 0,1 | 描述Agent武器是否处于可使用状态。0=False,1=True | +| 8 | Agent坐标 | 3 | 0~47 | 描述Agent的连续空间坐标,取值范围为0到47之间的连续值 | +| 9~10 | Agent朝向 | 2 | -1~1 | 通过计算Agent GameObject旋转角度的余弦和正弦以描述Agent的正面朝向。 | ### RaycastState(射线探测状态) `RaycastState`记录了视野内发射的射线探测到的Object的Tag及其距离、其中Tag可以通过Label Encoding和OneHot Encoding两种方式来记录,默认使用Label Encoding,可以在Object`CommonParameterContainer`中进行调整。`RaycastState`由[`RaySensors`](https://koha9.asuscomm.com/yurugit/Koha9/Aimbot-ParallelEnv/src/branch/main/Assets/Script/GameScript/RaySensors.cs)类管理。它实现了射线的非均匀分布,默认视野中间15%的区域射线分布更密集,两侧则较为稀疏,默认中间密集部分共射出5条,两侧稀疏部分各7条。每个射线可探测的对象最远距离为100个单位,超过探测距离则返回0。其中视野中间区域的覆盖范围,区域内RayCast数量以及可探测距离均可以在每个`Agent`GameObject的Inspector中进行调整。 射线分布方式 | Num | Observation | Size | State Space | Description | |------------------------------------------------------------------|-----------------|--------------------|---------------|----------------------------------------------------------------------------------| -| 10~Raynum+10 | TagType(Label) | Number of Raycasts | 0, 1, 2 | 描述Raycast所探测到物体的Tag, 0=Wall, 1=Enemy, 2=Player, -1=Nothing | -| 10~Raynum * 3+10 | TagType(Onehot) | Raynum * 3 | 0, 1 | 描述Raycast所探测到物体的Tag, 使用Onehot编码记录 | -| Raynum+11~2* Raynum+11(Label),
3* Raynum+11~4* Raynum+11(OneHot) | Distance | Number of Raycasts | 0~MaxDistance | 描述Raycast所探测到物体的距离,取值范围在0~MaxDistance,其中MaxDistance默认为100 | +| 11~Raynum+11 | TagType(Label) | Number of Raycasts | 0, 1, 2 | 描述Raycast所探测到物体的Tag, 0=Wall, 1=Enemy, 2=Player, -1=Nothing | +| 11~Raynum * 3+11 | TagType(Onehot) | Raynum * 3 | 0, 1 | 描述Raycast所探测到物体的Tag, 使用Onehot编码记录 | +| Raynum+12~2* Raynum+12(Label),
3* Raynum+12~4* Raynum+12(OneHot) | Distance | Number of Raycasts | 0~MaxDistance | 描述Raycast所探测到物体的距离,取值范围在0~MaxDistance,其中MaxDistance默认为100 | *注:n为每个Agent的RayCast数量,MaxDistance为每个Agent的RayCast最大探测距离*