first readme commit

This commit is contained in:
Koha9 2023-12-29 23:51:05 +09:00
commit f3c5f539ec
10 changed files with 30836 additions and 102 deletions

View File

@ -1 +1 @@
{"count":1,"self":113.55664639999999,"total":114.9330107,"children":{"InitializeActuators":{"count":2,"self":0.00099969999999999985,"total":0.00099969999999999985,"children":null},"InitializeSensors":{"count":2,"self":0.0015095999999999998,"total":0.0015095999999999998,"children":null},"AgentSendState":{"count":5526,"self":0.0352491,"total":0.2273044,"children":{"CollectObservations":{"count":5526,"self":0.1629312,"total":0.1629312,"children":null},"WriteActionMask":{"count":5526,"self":0.0095122,"total":0.0095122,"children":null},"RequestDecision":{"count":5526,"self":0.019611899999999998,"total":0.019611899999999998,"children":null}}},"DecideAction":{"count":5526,"self":0.0385606,"total":0.0385606,"children":null},"AgentAct":{"count":5526,"self":1.1079914,"total":1.1079914,"children":null}},"gauges":{},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1700043195","unity_version":"2021.3.14f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2021.3.14f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-ParallelEnv -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-Koha9 -hubSessionId 54250353-7623-46bc-bba3-639785619018 -accessToken dmg7obCqK6FT1OSbiLk7TkfQFUczEL1FqgnKIHh02iQ00ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.1","scene_name":"Play","end_time_seconds":"1700043310"}} {"count":1,"self":26.837244799999997,"total":27.174796399999998,"children":{"InitializeActuators":{"count":2,"self":0.0020071999999999998,"total":0.0020071999999999998,"children":null},"InitializeSensors":{"count":2,"self":0.001,"total":0.001,"children":null},"AgentSendState":{"count":1115,"self":0.008007499999999999,"total":0.0535482,"children":{"CollectObservations":{"count":1115,"self":0.0410334,"total":0.0410334,"children":null},"WriteActionMask":{"count":1115,"self":0.0025057,"total":0.0025057,"children":null},"RequestDecision":{"count":1115,"self":0.0020016,"total":0.0020016,"children":null}}},"DecideAction":{"count":1115,"self":0.0070135,"total":0.0070135,"children":null},"AgentAct":{"count":1115,"self":0.2734714,"total":0.2734714,"children":null}},"gauges":{},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1702212209","unity_version":"2021.3.14f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2021.3.14f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-ParallelEnv -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-4ll_RrA1WPOxOo53NVlUn -hubSessionId 51847070-2fd3-4dd9-bd34-395d4d996780 -accessToken az5jhjWoFTvfMlF175YCJCcakxJHc-9-nobp1oGkD0w00ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.1","scene_name":"Play","end_time_seconds":"1702212236"}}

View File

@ -1 +1 @@
{"count":1,"self":10.5552432,"total":10.6509302,"children":{"InitializeActuators":{"count":2,"self":0.0010058,"total":0.0010058,"children":null},"InitializeSensors":{"count":2,"self":0.0010006,"total":0.0010006,"children":null},"AgentSendState":{"count":298,"self":0.0020150999999999997,"total":0.0205401,"children":{"CollectObservations":{"count":298,"self":0.0165191,"total":0.0165191,"children":null},"WriteActionMask":{"count":298,"self":0,"total":0,"children":null},"RequestDecision":{"count":298,"self":0.0020058999999999997,"total":0.0020058999999999997,"children":null}}},"DecideAction":{"count":298,"self":0.004502,"total":0.004502,"children":null},"AgentAct":{"count":298,"self":0.0676306,"total":0.0676306,"children":null}},"gauges":{},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1700195050","unity_version":"2021.3.14f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2021.3.14f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-ParallelEnv -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-FDy4mXKexQ6X1xL3HWyzB -hubSessionId 8d021f9c-c18a-42ae-b0f7-4f4093f87cc2 -accessToken K_UtjVRjVp9Nhbf1MZoN5JxRN3F_Qnk7hi8pO7re9pw00ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.1","scene_name":"Train","end_time_seconds":"1700195061"}} {"count":1,"self":57.934591999999995,"total":63.885060599999996,"children":{"InitializeActuators":{"count":12,"self":0.001501,"total":0.001501,"children":null},"InitializeSensors":{"count":12,"self":0.0015037,"total":0.0015037,"children":null},"AgentSendState":{"count":2938,"self":0.0366666,"total":0.3843504,"children":{"CollectObservations":{"count":17628,"self":0.31354499999999996,"total":0.31354499999999996,"children":null},"WriteActionMask":{"count":17628,"self":0.0070104,"total":0.0070104,"children":null},"RequestDecision":{"count":17628,"self":0.0271284,"total":0.0271284,"children":null}}},"DecideAction":{"count":2938,"self":2.5410512,"total":2.5410512,"children":null},"AgentAct":{"count":2938,"self":3.0220654,"total":3.0220654,"children":null}},"gauges":{"AKMAgent.CumulativeReward":{"count":6,"max":-1099.64661,"min":-3249.88,"runningAverage":-2600.68726,"value":-2739.38037,"weightedAverage":-2673.02979}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1703171035","unity_version":"2021.3.14f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2021.3.14f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-ParallelEnv -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-DCdlO2Cen1HBnuMk-PngQ -hubSessionId 0c43c1fc-cbb5-493c-917c-268bb9f763ac -accessToken 8py-opTGbwVFWF-QmaeXuVLWUZD3ZUdDRnQkQgsPfJU00ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.0.1","scene_name":"Train","end_time_seconds":"1703171098"}}

File diff suppressed because it is too large Load Diff

View File

@ -99,9 +99,9 @@ public override void CollectObservations(VectorSensor sensor)
myObserve[1] = transform.localPosition.y; myObserve[1] = transform.localPosition.y;
myObserve[2] = transform.localPosition.z; myObserve[2] = transform.localPosition.z;
myObserve[3] = transform.eulerAngles.y / 36f; myObserve[3] = transform.eulerAngles.y / 36f;
rayTagResult = raySensors.rayTagResult;// 探测用RayTag结果 float[](raySensorNum,1) rayTagResult = raySensors.rayTagResult;// 探测用RayTag类型结果 float[](raySensorNum,1)
rayTagResultOnehot = raySensors.rayTagResultOneHot.ToArray(); // 探测用RayTagonehot结果 List<int>[](raySensorNum*Tags,1) rayTagResultOnehot = raySensors.rayTagResultOneHot.ToArray(); // 探测用RayTagonehot结果 List<int>[](raySensorNum*Tags,1)
rayDisResult = raySensors.rayDisResult; // 探测用RayDis结果 float[](raySensorNum,1) rayDisResult = raySensors.rayDisResult; // 探测用RayDis距离结果 float[](raySensorNum,1)
targetStates = targetController.targetState; // (6) targettype, target x,y,z, firebasesAreaDiameter targetStates = targetController.targetState; // (6) targettype, target x,y,z, firebasesAreaDiameter
remainTime = targetController.leftTime; remainTime = targetController.leftTime;
inAreaState = targetController.GetInAreaState(); inAreaState = targetController.GetInAreaState();
@ -122,7 +122,7 @@ public override void CollectObservations(VectorSensor sensor)
{ {
sensor.AddObservation(rayTagResult); sensor.AddObservation(rayTagResult);
} }
sensor.AddObservation(rayDisResult); // 探测用RayDis结果 float[](raySensorNum,1) sensor.AddObservation(rayDisResult); // 探测用RayDis距离结果 float[](raySensorNum,1)
envUIController.UpdateStateText(targetStates, inAreaState, remainTime, agentController.gunReadyToggle, myObserve, rayTagResultOnehot, rayDisResult); envUIController.UpdateStateText(targetStates, inAreaState, remainTime, agentController.gunReadyToggle, myObserve, rayTagResultOnehot, rayDisResult);
/*foreach(float aaa in rayDisResult) /*foreach(float aaa in rayDisResult)
{ {

133
README.md
View File

@ -1 +1,134 @@
# Aimbot-ParallelEnv # Aimbot-ParallelEnv
这是一个基于Unity ML-Agents的基于FPS游戏的多智能体训练环境用于生成指定目标和对应难度并且根据Agent的Action反馈Reward来训练强化学习Agent的环境。
## Description
项目基于[ML-Agents 2.0.1](https://github.com/Unity-Technologies/ml-agents/tree/develop)使用Unity 2021.3.14f1开发。
Python侧则使用[mlagents-envs 0.30.0](https://pypi.org/project/mlagents-envs/)与环境进行通信。
## 快速开始
在项目[Aimbot-PPO](https://koha9.asuscomm.com/yurugit/Koha9/Aimbot-PPO)中使用了本环境进行了PPO算法的训练其中在[AimbotEnv.py](https://koha9.asuscomm.com/yurugit/Koha9/Aimbot-PPO/src/branch/OffP-FullMNN-V2/Aimbot-PPO-Python/Pytorch/AimbotEnv.py)中给出了本环境的使用方法。
## 游戏简介
这是一个模仿初代Doom的基本模式的FPS游戏环境Agent将会被给到不同的目标Agent需要根据目标的类型和位置以及自身的状态来决定是否开枪射击射击的方向以及移动方向。环境内包含两种模式和3种目标分别是训练模式和测试模式以及`FreeTarget``GotoTarget`和`AttackTarget`。`FreeTarget`需要Agent使用武器击倒区域内所有的敌人`GotoTarget`和`AttackTarget`会在区域内生成一个目标区块Agent需要对目标区块进行不同的对应来完成任务。同时`GotoTarget`和`AttackTarget`中根据生成目标区块的构造不同分为了5种不同的难度。
### 模式介绍
训练模式和测试模式都需要与Python侧进行通信根据从本环境中观测到的Observation生成Action并传递回本环境。在环境启动时需要使用者在游戏内进行操作来选择模式由于ML-Agents存在Time limit所以需要在45s内进行模式选择超过45s后将自动退出游戏环境。
#### 训练模式
在进入训练模式前使用者需要从Start界面中只训练单个模式的`Train-Free``Train-Goto``Train-Attack`和训练所有模式的`Train-Mix`中进行选择。在选择后,将会进入训练模式,并且注意该选择不能在之后进行更改。对于拥有多种难度的目标区块的模式`Train-Goto`和`Train-Attack`在Start界面将会有一个用来调整生成不同难易度目标区块的概率的面板默认的生成概率为平均。面板为了保证该模式各难易度生成概率总和为1在调整某一难度的生成概率时其他难度的生成概率将会被自动调整用户可以通过点击右侧锁定按钮以锁定或解锁某一难度的生成概率。该面板将在训练模式执行中也可见用户在训练模式执行中对概率面板进行调整后将会在应用在下次目标区块生成时。
![TargetLevelProbabilityPanel](./ReadmeImages/LevelProbabilityPanel.jpg)
#### 测试模式
测试模式中用户需要手动来对Agent下达命令用户可以通过点击右上角的菜单来执行生成目标区块或敌人切换目标模式等。在该模式中比训练模式多出一个新的Target为StayTarget当用户未指定目标或者清空游戏时将会以将该Target指派给Agent。通过点击右上角的菜单可以将鼠标的模式切换为生成Enemy或者生成对应难度Target区块的模式此时鼠标移动到游戏区域内后将会出现生成物件的预览内点击鼠标右键则可以在对应的位置生成对应的Enemy或者区块。当生成Target区块时会自动指派对应target给agent需要指派`FreeTarget`时则需要点击`FreeMode`按钮。通过点击`ClearGame`可以清空所有enemy和区块并将Agent的目标指派为`StayTarget`,通过点击`StayMode`则可以强制将Agent目标指派为`StayTarget`。
### 目标介绍
#### FreeTarget
在Agent被指派到`FreeTarget`时Agent需要使用武器击倒区域内所有的敌人。在该模式中一个回合开始时将会在区域内随机生成一定数量的敌人默认为6Agent将会生成在区域的随机位置Agent朝向继承上一回合如果是第一回合则是默认角度为0。Agent在成功击倒所有敌人后被判断为胜利或者超过30s未能成功击倒所有敌人则被判断为失败。
#### GotoTarget
在Agent被指派到`GotoTarget`时Agent的目标是移动到指定目标区块的指定位置。在该模式中一个回合开始时Agent的位置总是继承上一个回合如果是第一回合将会默认生成在地图的左下角。Goto的目标区块将会根据难度概率在区域内随机生成对应难度的预设区块不同难易度拥有多个已保存好的预设区块这些预设区块通过[`SceneBlocksSet`](https://koha9.asuscomm.com/yurugit/Koha9/Aimbot-ParallelEnv/src/branch/main/Assets/Script/TargetContainer/SceneBlocksSet.cs)保存在[Prefab文件夹](https://koha9.asuscomm.com/yurugit/Koha9/Aimbot-ParallelEnv/src/branch/main/Assets/Prefeb)中。所有目标区块的总大小为10*10生成目标区块时区块将会避开Agent的所在位置。指定目标区域是名为`FireBase`的圆形区域该区域所覆盖的范围不会超过目标区块但是它的位置和大小会根据预设区块的不同而不同。Agent在成功移动到目标区块中心位置后被判断为胜利或者超过30s未能成功移动到目标区块中心位置则被判断为失败。
#### AttackTarget
在Agent被指派到`AttackTarget`时Agent的目标是对目标区域进行火力压制同时主要目标是击倒区块内所有敌人。在该模式中Agent与目标区块的生成策略还有`FireBase`的构成与`GotoTarget`相同。但是在`AttackTarget`的`FireBase`区域内必定会有Enemy生成。Agent在成功击倒区块内所有敌人后被判断为胜利或者超过30s未能成功击倒区块内所有敌人则被判断为失败。
# Environment
## Observation Space
在本强化学习训练设置中,观测数据是理解和交互环境的关键。观测环境由三个主要部分组成:`TargetState`(目标状态)、`AgentState`(自机状态)和`RaycastState`射线探测状态。这些观测数据通过ML-Agents的`VectorSensor`类进行收集并发送给Python侧为决策提供必要的信息。Observation的Source Code位于[`MLAgentsCustomController.cs`](https://koha9.asuscomm.com/yurugit/Koha9/Aimbot-ParallelEnv/src/branch/main/Assets/Script/GameScript/MLAgentsCustomController.cs)中被override的`CollectObservations`函数中。
### TargetState目标状态
`TargetState`包含了Agent接收的目标指令和相关信息,总大小为6。其中包含Target的类型Target的x,y,z坐标TargetArea的直径以及Agent是否处于TargetArea和本回合剩余时间内这些信息。
| Num | Observation | Size | State Space | Description |
|-----|------------------|------|-------------|-------------------------------------------------------------------------------------------------------------|
| 0 | Target类型 | 1 | 0,1,2,3,4 | 描述被指派的Target类型0=FreeTarget1=GotoTarget2=AttackTarget3=DefenceTarget未使用4=StayTarget |
| 1~3 | Target坐标 | 3 | 0~47 | 描述Target中Firebase的连续空间坐标取值范围为0到47之间的连续值 |
| 4 | FireBaseDiameter | 1 | 1~10 | 描述FireBase的直径取值范围为在1到10之间的连续值 |
| 5 | InFireBaseState | 1 | 0,1 | 描述Agent是否处于FireBase中0=False1=True |
| 6 | RemainTime | 1 | 0~30 | 描述本回合剩余时间取值范围为在0到30之间的连续值 |
### AgentState自机状态
`AgentState`包含了Agent的武器可攻击状态Agent的x,y,z坐标和Agent的朝向角度。
| Num | Observation | Size | State Space | Description |
|-----|-------------|------|-------------|----------------------------------------------------------------------------------|
| 7 | GunState | 1 | 0,1 | 描述Agent武器是否处于可使用状态。0=False1=True |
| 8 | Agent坐标 | 3 | 0~47 | 描述Agent的连续空间坐标取值范围为0到47之间的连续值 |
| 9 | Agent朝向 | 1 | 0~10 | 描述Agent的正面朝向取值范围为0到10映射到0~360度的旋转每增加1代表增加36度 |
### RaycastState射线探测状态
`RaycastState`记录了视野内发射的射线探测到的结果及其距离、由[`RaySensors`](https://koha9.asuscomm.com/yurugit/Koha9/Aimbot-ParallelEnv/src/branch/main/Assets/Script/GameScript/RaySensors.cs)类管理。它实现了射线的非均匀分布默认视野中间15%的区域射线分布更密集两侧则较为稀疏默认中间密集部分共射出5条两侧稀疏部分各7条共每个射线可探测的对象最远距离为100个单位超过探测距离则返回0。其中视野中间区域的覆盖范围区域内RayCast数量以及可探测距离均可以在每个`Agent`GameObject的Inspector中进行调整。
<img src="./ReadmeImages/RayCastLayout.png" alt = "射线分布方式" width="500" height = "auto">
| Num | Observation | Size | State Space | Description |
|-----------------------|-------------|--------------------|---------------|----------------------------------------------------------------------------------|
| 10~n+10 | TagType | n | 0, 1, 2 | 描述Raycast所探测到物体的Tag1=Wall2=Enemy0=Nothing |
| n+11~2*n+11 | Distance | n | 0~MaxDistance | 描述Raycast所探测到物体的距离取值范围在0~MaxDistance |
*注n为每个Agent的RayCast数量MaxDistance为每个Agent的RayCast最大探测距离*
## Action Space
为了模拟FPS游戏中玩家对于键盘和鼠标的同时操作本环境中的Action Space分为两个部分分别是`Discrete Action`和`Continuous Action`。其中`Discrete Action`用于模拟玩家对于键盘的离散操作,`Continuous Action`用于模拟玩家对于鼠标的连续操作。两个Action Space的对应操作可以参考于[`MLAgentsCustomController.cs`](https://koha9.asuscomm.com/yurugit/Koha9/Aimbot-ParallelEnv/src/branch/main/Assets/Script/GameScript/MLAgentsCustomController.cs)中被override的`Heuristic`函数。具体实现则于[`AgentController.cs`](https://koha9.asuscomm.com/yurugit/Koha9/Aimbot-ParallelEnv/src/branch/main/Assets/Script/GameScript/Character/AgentController.cs)中的`MoveAgent`和`CameraControl`函数中。
### Discrete Action
在`Discrete Action`中Agent可以对各方向的移动和攻击进行离散操作其中移动操作包括垂直移动操作和水平移动操作攻击操作可以对武器进行操。
| Num | Action | Action Space | Description |
|-----|-----------------|--------------|---------------------------------------------------------------------------|
| 0 | Vertical Move | 0,1,-1 | 用于控制Agent在其正面方向的垂直轴上移动。0=停止1=向前移动,-1=向后移动 |
| 1 | Horizontal Move | 0,1,-1 | 用于控制Agent在其正面方向的水平轴上移动。0=停止1=向右移动,-1=向左移动 |
| 2 | Attack | 0,1 | 用于控制Agent的攻击操作。0=不攻击1=执行攻击 |
### Continuous Action
在`Continuous Action`中可以对Agent的视角进行操作。由于本环境是模拟早期Doom的FPS环境所以Agent的视角被设定为仅能在水平方向上进行旋转。
| Num | Action | Action Space | Description |
|-----|-------------------|--------------|---------------------------------------------------------------------------------|
| 0 | Vertical Rotation | -Inf~Inf | 控制Agent在水平方向左右上的视角旋转。正值使视角向右负值则使视角向左旋转。 |
## Reward
在各给定的目标中为了让Agent注重于完成不同的任务Reward的设计也有所不同。在训练模式中Reward由共同Reward`Common Reward`和各个目标的专用Reward组成。
### Common Reward
为了引导Agent在训练过程中执行对完成目标有帮助的操作和避免无意义的动作在`Common Reward`中环境为Agent设计了以下的Reward和Penalty。其中由于ML-Agents不能向Python直接传递回合结束的结果同时为了避免其他Reward数值过小或者过大导致无法判断胜利或者失败因此在回合结束的Reward中添加了极大数和极小数来表示胜利和失败。
| Reward/Penalty | Value/Symbol | Description |
|----------------|---------------|-----------------------------------------------------|
| NonTargetHit | 3 | 当Agent击中或者击倒不作为Target的敌人时所获得Reward |
| TargetHit | 25 | 当Agent击中或者击倒作为Target的敌人时所获得Reward |
| MovePenalty | -0.5 | 当Agent在场地中移动时给到的Penalty |
| SpinPenalty | $P_s$ | 当Agent进行视角旋转时给到的Penalty |
| WinReward | 999 | 当回合结束并且本回合Agent达成目标时给到的Reward |
| LoseReward | 999 | 当回合结束并且本回合Agent失败时给到的Reward |
为了防止Agent在训练中出现无意义的旋转和旋转过快这里用下列函数来给到Agent一个`SpinPenalty`。当40个Action的旋转记录未达到阈值时会给到一个较小的Penalty而当超过阈值时我们可以知道Agent正在向同一方向持续旋转则给到一个较大的Penalty。其中<i>P<sub>s<sub>t</sub></sub></i>为在`t`时刻时的旋转的Penalty<i>mouseX<sub>t</sub></i>为`t`时刻Agent的`Contunuouse Action`中的旋转Action数值。
$$
P_{st} =
\begin{cases}
-|mouseX_t| \cdot 0.06 &, & \left| \sum_{t=0}^{-40} mouseX_t \cdot 0.08 \right| < 10 \\
-\left| \sum_{t=0}^{-40} mouseX_t \cdot 0.08 \right| & , & \left| \sum_{t=0}^{-40} mouseX_t \cdot 0.08 \right| \geq 10
\end{cases}
$$
### FreeTarget
在`FreeTarget`中主要目标是消灭所有敌人因此追踪敌人的能力非常关键。本模式设置了两种奖励机制当视野中心探测到敌人时给到Agent一个为2的Reward当未探测到敌人时则根据探测到的敌人距离视野中心的距离 $D$ 给到一个小于`2`的Reward。距离的计算仅考虑通过 Raycast 探测到的敌人且当距离越接近0.5即Enemy半径则Reward越接近`2`。例如,在图中,只计算了与`Enemy1`和`Enemy3`的距离,并且取用最短距离 $D_1$ 来计算Reward。未探测到的`Enemy2`和`Enemy4`被排除在外。在这个模式下所有敌人都被视为目标当敌人被击败或击中时Agent 将获得`Common Reward`中的`TargetHit Reward`。
<img src="./ReadmeImages/FreeTargetFacingreward.png" alt = "FacingReward" width="500" height = "auto">
| Reward/Penalty | Value/Symbol | Description |
|----------------|----------------------|----------------------------------------|
| FacingReward | $R_f$ | 视野中心与所探测到最近敌人距离的Reward |
$$
R_{f} =
\begin{cases}
2 &, &min(D) \leq 0.5 \\
\frac{1}{\sqrt{\frac{min(D)}{2}}} &,& min(D) \gt 0.5
\end{cases}
$$
### GotoTarget
在`GotoTarget`中,主要目标是向指定的`FireBase`移动。根据在`t`时刻和`t-1`时刻的玩家与目标间的距离 $D_t$ 和 $D_{t-1}$ 的差值,设置了每个 Step 可获得的距离奖励 $R_d$ 。当Agent接近目标时将获得正向Reward远离目标时则获得小于0的Penalty。当玩家处于`FireBase`内时,可以获得更高的固定奖励,然而此时`DistanceReward`将不会生效。在此模式中,所有敌人均不被视为攻击目标,但在击中或击败敌人时,可以获得`Common Reward`中的`NonTargetHit`。
| Reward/Penalty | Value/Symbol | Description |
|----------------|--------------|-----------------------------------------|
| DistanceReward | $R_d$ | Agent不在目标内时和目标的距离差Reward |
| InAreaReward | 5 | Agent处于目标内时的持续奖励 |
$$
R_{d} = (D_{t-1} - D_{t}) \cdot 20
$$
### AttackTarget
在 Attack Mode 中主要目的是消灭目标地点的敌人并鼓励Agent对目标地点进行设计。因此与`FreeTarget`类似将视线移动到目标地点同样重要。当Agent面对目标地点并且进行攻击时将会被视为压制行为并获得较低的`SuppressiveReward`。面对目标地点但未进行攻击时,获得持续获得更低的`FacingAreaReward`。处于`FireBase`中的Enemy将会被视为目标当敌人被击败或击中时Agent 将获得`Common Reward`中的`TargetHit Reward`。而`FireBase`外的敌人则不会被视为目标当敌人被击败或击中时Agent 将只能获得`Common Reward`中的`NonTargetHit Reward`。
| Reward/Penalty | Value/Symbol | Description |
|-------------------|--------------|-----------------------------------------|
| SuppressiveReward | 5 | Agent对`FireBase`进行压制攻击时获得奖励 |
| FacingAreaReward | 2 | Agent面朝目标时获得的持续奖励 |
## Side Channel
为了方便对于环境的调试和实现Unity与Python间非实时通信本环境中使用了ML-Agents提供的Side Channel用于向Python侧传递一些额外的信息。这里我参考了huggingface的[Custom Side Channels](https://github.com/huggingface/ml-agents-patch/blob/develop/docs/Custom-SideChannels.md)。在Unity端Side Channel的实现位于[`AimbotSideChannel.cs`](https://koha9.asuscomm.com/yurugit/Koha9/Aimbot-ParallelEnv/src/branch/main/Assets/Script/GameScript/SideChannel/AimbotSideChannel.cs)和[`AimBotSideChannelController.cs`](https://koha9.asuscomm.com/yurugit/Koha9/Aimbot-ParallelEnv/src/branch/main/Assets/Script/GameScript/SideChannel/AimBotSideChannelController.cs)中。每个Log信息将会以`|`来分隔字段其中第一个字段为Log的类型随后字段为自定义Log信息。在本环境中Unity的SideChannel将会发送所有`LogType.Warning`和`LogType.Error`类型的Log到Python侧。其中`LogType.Warning`会传递一回合结束后的的胜败信息和从Unity发送至Python的指令。胜败信息将类似于`Warning|Result|Win`和`Warning|Result|Lose`。而指令则传递了在下一训练结束后保存模型的命令,它的内容为`Warning|Command`。

BIN
ReadmeImages/FreeTargetFacingreward.png (Stored with Git LFS) Normal file

Binary file not shown.

BIN
ReadmeImages/LevelProbabilityPanel.jpg (Stored with Git LFS) Normal file

Binary file not shown.

BIN
ReadmeImages/RayCastLayout.png (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -9,13 +9,13 @@ EditorUserSettings:
value: 55090405535008090b5608764626074415151c79747c74602b7c1861e4b96c6c value: 55090405535008090b5608764626074415151c79747c74602b7c1861e4b96c6c
flags: 0 flags: 0
RecentlyUsedSceneGuid-1: RecentlyUsedSceneGuid-1:
value: 5453565f53020f085f5d0e72157a0844454f4c2b757d7265757a4f64b7b4313c value: 5703025650035e5d090f087148715d4443161e2c7e2c20357b281b31b0e43060
flags: 0 flags: 0
RecentlyUsedSceneGuid-2: RecentlyUsedSceneGuid-2:
value: 06570c0704040b0e5a575520137b5e44154f192e292d22367c2c4866b7b3376f value: 5453565f53020f085f5d0e72157a0844454f4c2b757d7265757a4f64b7b4313c
flags: 0 flags: 0
RecentlyUsedSceneGuid-3: RecentlyUsedSceneGuid-3:
value: 5703025650035e5d090f087148715d4443161e2c7e2c20357b281b31b0e43060 value: 06570c0704040b0e5a575520137b5e44154f192e292d22367c2c4866b7b3376f
flags: 0 flags: 0
RecentlyUsedScenePath-0: RecentlyUsedScenePath-0:
value: 22424703114646680e0b0227036c6c111b07142f1f2b233e2867083debf42d value: 22424703114646680e0b0227036c6c111b07142f1f2b233e2867083debf42d

View File

@ -14,12 +14,12 @@ MonoBehaviour:
m_EditorClassIdentifier: m_EditorClassIdentifier:
m_PixelRect: m_PixelRect:
serializedVersion: 2 serializedVersion: 2
x: 240 x: -230
y: 178 y: 320
width: 1485 width: 1920
height: 835 height: 1037
m_ShowMode: 4 m_ShowMode: 4
m_Title: Game m_Title: Inspector
m_RootView: {fileID: 2} m_RootView: {fileID: 2}
m_MinSize: {x: 875, y: 300} m_MinSize: {x: 875, y: 300}
m_MaxSize: {x: 10000, y: 10000} m_MaxSize: {x: 10000, y: 10000}
@ -44,8 +44,8 @@ MonoBehaviour:
serializedVersion: 2 serializedVersion: 2
x: 0 x: 0
y: 0 y: 0
width: 1485 width: 1920
height: 835 height: 1037
m_MinSize: {x: 875, y: 300} m_MinSize: {x: 875, y: 300}
m_MaxSize: {x: 10000, y: 10000} m_MaxSize: {x: 10000, y: 10000}
m_UseTopView: 1 m_UseTopView: 1
@ -69,7 +69,7 @@ MonoBehaviour:
serializedVersion: 2 serializedVersion: 2
x: 0 x: 0
y: 0 y: 0
width: 1485 width: 1920
height: 30 height: 30
m_MinSize: {x: 0, y: 0} m_MinSize: {x: 0, y: 0}
m_MaxSize: {x: 0, y: 0} m_MaxSize: {x: 0, y: 0}
@ -90,8 +90,8 @@ MonoBehaviour:
m_Position: m_Position:
serializedVersion: 2 serializedVersion: 2
x: 0 x: 0
y: 815 y: 1017
width: 1485 width: 1920
height: 20 height: 20
m_MinSize: {x: 0, y: 0} m_MinSize: {x: 0, y: 0}
m_MaxSize: {x: 0, y: 0} m_MaxSize: {x: 0, y: 0}
@ -115,12 +115,12 @@ MonoBehaviour:
serializedVersion: 2 serializedVersion: 2
x: 0 x: 0
y: 30 y: 30
width: 1485 width: 1920
height: 785 height: 987
m_MinSize: {x: 300, y: 200} m_MinSize: {x: 300, y: 200}
m_MaxSize: {x: 24288, y: 16192} m_MaxSize: {x: 24288, y: 16192}
vertical: 0 vertical: 0
controlID: 25 controlID: 102
--- !u!114 &6 --- !u!114 &6
MonoBehaviour: MonoBehaviour:
m_ObjectHideFlags: 52 m_ObjectHideFlags: 52
@ -140,12 +140,12 @@ MonoBehaviour:
serializedVersion: 2 serializedVersion: 2
x: 0 x: 0
y: 0 y: 0
width: 835 width: 1080
height: 785 height: 987
m_MinSize: {x: 100, y: 200} m_MinSize: {x: 100, y: 200}
m_MaxSize: {x: 8096, y: 16192} m_MaxSize: {x: 8096, y: 16192}
vertical: 1 vertical: 1
controlID: 26 controlID: 103
--- !u!114 &7 --- !u!114 &7
MonoBehaviour: MonoBehaviour:
m_ObjectHideFlags: 52 m_ObjectHideFlags: 52
@ -163,8 +163,8 @@ MonoBehaviour:
serializedVersion: 2 serializedVersion: 2
x: 0 x: 0
y: 0 y: 0
width: 835 width: 1080
height: 562 height: 707
m_MinSize: {x: 201, y: 221} m_MinSize: {x: 201, y: 221}
m_MaxSize: {x: 4001, y: 4021} m_MaxSize: {x: 4001, y: 4021}
m_ActualView: {fileID: 13} m_ActualView: {fileID: 13}
@ -189,9 +189,9 @@ MonoBehaviour:
m_Position: m_Position:
serializedVersion: 2 serializedVersion: 2
x: 0 x: 0
y: 562 y: 707
width: 835 width: 1080
height: 223 height: 280
m_MinSize: {x: 231, y: 271} m_MinSize: {x: 231, y: 271}
m_MaxSize: {x: 10001, y: 10021} m_MaxSize: {x: 10001, y: 10021}
m_ActualView: {fileID: 15} m_ActualView: {fileID: 15}
@ -217,14 +217,14 @@ MonoBehaviour:
- {fileID: 11} - {fileID: 11}
m_Position: m_Position:
serializedVersion: 2 serializedVersion: 2
x: 835 x: 1080
y: 0 y: 0
width: 366 width: 473
height: 785 height: 987
m_MinSize: {x: 100, y: 200} m_MinSize: {x: 100, y: 200}
m_MaxSize: {x: 8096, y: 16192} m_MaxSize: {x: 8096, y: 16192}
vertical: 1 vertical: 1
controlID: 105 controlID: 16
--- !u!114 &10 --- !u!114 &10
MonoBehaviour: MonoBehaviour:
m_ObjectHideFlags: 52 m_ObjectHideFlags: 52
@ -242,8 +242,8 @@ MonoBehaviour:
serializedVersion: 2 serializedVersion: 2
x: 0 x: 0
y: 0 y: 0
width: 366 width: 473
height: 442 height: 556
m_MinSize: {x: 202, y: 221} m_MinSize: {x: 202, y: 221}
m_MaxSize: {x: 4002, y: 4021} m_MaxSize: {x: 4002, y: 4021}
m_ActualView: {fileID: 17} m_ActualView: {fileID: 17}
@ -267,9 +267,9 @@ MonoBehaviour:
m_Position: m_Position:
serializedVersion: 2 serializedVersion: 2
x: 0 x: 0
y: 442 y: 556
width: 366 width: 473
height: 343 height: 431
m_MinSize: {x: 102, y: 121} m_MinSize: {x: 102, y: 121}
m_MaxSize: {x: 4002, y: 4021} m_MaxSize: {x: 4002, y: 4021}
m_ActualView: {fileID: 18} m_ActualView: {fileID: 18}
@ -293,10 +293,10 @@ MonoBehaviour:
m_Children: [] m_Children: []
m_Position: m_Position:
serializedVersion: 2 serializedVersion: 2
x: 1201 x: 1553
y: 0 y: 0
width: 284 width: 367
height: 785 height: 987
m_MinSize: {x: 276, y: 71} m_MinSize: {x: 276, y: 71}
m_MaxSize: {x: 4001, y: 4021} m_MaxSize: {x: 4001, y: 4021}
m_ActualView: {fileID: 20} m_ActualView: {fileID: 20}
@ -324,10 +324,10 @@ MonoBehaviour:
m_Tooltip: m_Tooltip:
m_Pos: m_Pos:
serializedVersion: 2 serializedVersion: 2
x: 240 x: -230
y: 208 y: 350
width: 834 width: 1079
height: 541 height: 686
m_ViewDataDictionary: {fileID: 0} m_ViewDataDictionary: {fileID: 0}
m_OverlayCanvas: m_OverlayCanvas:
m_LastAppliedPresetName: Default m_LastAppliedPresetName: Default
@ -557,14 +557,14 @@ MonoBehaviour:
m_OverrideSceneCullingMask: 6917529027641081856 m_OverrideSceneCullingMask: 6917529027641081856
m_SceneIsLit: 1 m_SceneIsLit: 1
m_SceneLighting: 1 m_SceneLighting: 1
m_2DMode: 1 m_2DMode: 0
m_isRotationLocked: 0 m_isRotationLocked: 0
m_PlayAudio: 0 m_PlayAudio: 0
m_AudioPlay: 0 m_AudioPlay: 0
m_Position: m_Position:
m_Target: {x: 523.5, y: 332.7, z: -2.5} m_Target: {x: -14.506892, y: 1.7234917, z: -131.04033}
speed: 2 speed: 2
m_Value: {x: 523.5, y: 332.7, z: -2.5} m_Value: {x: -14.506892, y: 1.7234917, z: -131.04033}
m_RenderMode: 0 m_RenderMode: 0
m_CameraMode: m_CameraMode:
drawMode: 0 drawMode: 0
@ -611,17 +611,17 @@ MonoBehaviour:
m_GridAxis: 1 m_GridAxis: 1
m_gridOpacity: 0.529 m_gridOpacity: 0.529
m_Rotation: m_Rotation:
m_Target: {x: 0, y: 0, z: 0, w: 1} m_Target: {x: -0.71379465, y: 0.056885682, z: -0.058373097, w: -0.6955967}
speed: 2 speed: 2
m_Value: {x: 0, y: 0, z: 0, w: 1} m_Value: {x: -0.71379465, y: 0.056885682, z: -0.058373097, w: -0.6955967}
m_Size: m_Size:
m_Target: 620.32074 m_Target: 18.774706
speed: 2 speed: 2
m_Value: 620.32074 m_Value: 18.774706
m_Ortho: m_Ortho:
m_Target: 1 m_Target: 0
speed: 2 speed: 2
m_Value: 1 m_Value: 0
m_CameraSettings: m_CameraSettings:
m_Speed: 1.0005 m_Speed: 1.0005
m_SpeedNormalized: 0.5 m_SpeedNormalized: 0.5
@ -662,9 +662,9 @@ MonoBehaviour:
m_Tooltip: m_Tooltip:
m_Pos: m_Pos:
serializedVersion: 2 serializedVersion: 2
x: 64 x: 1920
y: 73 y: 73
width: 1046 width: 1079
height: 686 height: 686
m_ViewDataDictionary: {fileID: 0} m_ViewDataDictionary: {fileID: 0}
m_OverlayCanvas: m_OverlayCanvas:
@ -676,7 +676,7 @@ MonoBehaviour:
m_ShowGizmos: 0 m_ShowGizmos: 0
m_TargetDisplay: 0 m_TargetDisplay: 0
m_ClearColor: {r: 0, g: 0, b: 0, a: 0} m_ClearColor: {r: 0, g: 0, b: 0, a: 0}
m_TargetSize: {x: 1046, y: 665} m_TargetSize: {x: 1079, y: 665}
m_TextureFilterMode: 0 m_TextureFilterMode: 0
m_TextureHideFlags: 61 m_TextureHideFlags: 61
m_RenderIMGUI: 1 m_RenderIMGUI: 1
@ -691,8 +691,8 @@ MonoBehaviour:
m_VRangeLocked: 0 m_VRangeLocked: 0
hZoomLockedByDefault: 0 hZoomLockedByDefault: 0
vZoomLockedByDefault: 0 vZoomLockedByDefault: 0
m_HBaseRangeMin: -523 m_HBaseRangeMin: -539.5
m_HBaseRangeMax: 523 m_HBaseRangeMax: 539.5
m_VBaseRangeMin: -332.5 m_VBaseRangeMin: -332.5
m_VBaseRangeMax: 332.5 m_VBaseRangeMax: 332.5
m_HAllowExceedBaseRangeMin: 1 m_HAllowExceedBaseRangeMin: 1
@ -712,23 +712,23 @@ MonoBehaviour:
serializedVersion: 2 serializedVersion: 2
x: 0 x: 0
y: 21 y: 21
width: 1046 width: 1079
height: 665 height: 665
m_Scale: {x: 1, y: 1} m_Scale: {x: 1, y: 1}
m_Translation: {x: 523, y: 332.5} m_Translation: {x: 539.5, y: 332.5}
m_MarginLeft: 0 m_MarginLeft: 0
m_MarginRight: 0 m_MarginRight: 0
m_MarginTop: 0 m_MarginTop: 0
m_MarginBottom: 0 m_MarginBottom: 0
m_LastShownAreaInsideMargins: m_LastShownAreaInsideMargins:
serializedVersion: 2 serializedVersion: 2
x: -523 x: -539.5
y: -332.5 y: -332.5
width: 1046 width: 1079
height: 665 height: 665
m_MinimalGUI: 1 m_MinimalGUI: 1
m_defaultScale: 1 m_defaultScale: 1
m_LastWindowPixelSize: {x: 1046, y: 686} m_LastWindowPixelSize: {x: 1079, y: 686}
m_ClearInEditMode: 1 m_ClearInEditMode: 1
m_NoCameraWarning: 1 m_NoCameraWarning: 1
m_LowResolutionForAspectRatios: 01000000000000000000 m_LowResolutionForAspectRatios: 01000000000000000000
@ -755,9 +755,9 @@ MonoBehaviour:
m_Pos: m_Pos:
serializedVersion: 2 serializedVersion: 2
x: 0 x: 0
y: 581 y: 726
width: 834 width: 1079
height: 202 height: 259
m_ViewDataDictionary: {fileID: 0} m_ViewDataDictionary: {fileID: 0}
m_OverlayCanvas: m_OverlayCanvas:
m_LastAppliedPresetName: Default m_LastAppliedPresetName: Default
@ -775,22 +775,22 @@ MonoBehaviour:
m_SkipHidden: 0 m_SkipHidden: 0
m_SearchArea: 1 m_SearchArea: 1
m_Folders: m_Folders:
- Assets/Prefeb/ProbabilityPanel - Assets/Script/GameScript/SideChannel
m_Globs: [] m_Globs: []
m_OriginalText: m_OriginalText:
m_ViewMode: 1 m_ViewMode: 1
m_StartGridSize: 16 m_StartGridSize: 16
m_LastFolders: m_LastFolders:
- Assets/Prefeb/ProbabilityPanel - Assets/Script/GameScript/SideChannel
m_LastFoldersGridSize: 16 m_LastFoldersGridSize: 16
m_LastProjectPath: C:\Users\UCUNI\OneDrive\Unity\ML-Agents\Aimbot-ParallelEnv m_LastProjectPath: C:\Users\UCUNI\OneDrive\Unity\ML-Agents\Aimbot-ParallelEnv
m_LockTracker: m_LockTracker:
m_IsLocked: 0 m_IsLocked: 0
m_FolderTreeState: m_FolderTreeState:
scrollPos: {x: 0, y: 257} scrollPos: {x: 0, y: 234}
m_SelectedIDs: c88a0000 m_SelectedIDs: 84940000
m_LastClickedID: 35528 m_LastClickedID: 38020
m_ExpandedIDs: 000000001e830000648300006c83000000ca9a3bffffff7f m_ExpandedIDs: 00000000409400007c9400008694000000ca9a3b
m_RenameOverlay: m_RenameOverlay:
m_UserAcceptedRename: 0 m_UserAcceptedRename: 0
m_Name: m_Name:
@ -818,7 +818,7 @@ MonoBehaviour:
scrollPos: {x: 0, y: 0} scrollPos: {x: 0, y: 0}
m_SelectedIDs: m_SelectedIDs:
m_LastClickedID: 0 m_LastClickedID: 0
m_ExpandedIDs: 000000001e830000 m_ExpandedIDs: 0000000040940000
m_RenameOverlay: m_RenameOverlay:
m_UserAcceptedRename: 0 m_UserAcceptedRename: 0
m_Name: m_Name:
@ -845,7 +845,7 @@ MonoBehaviour:
m_ListAreaState: m_ListAreaState:
m_SelectedInstanceIDs: m_SelectedInstanceIDs:
m_LastClickedInstanceID: 0 m_LastClickedInstanceID: 0
m_HadKeyboardFocusLastEvent: 1 m_HadKeyboardFocusLastEvent: 0
m_ExpandedInstanceIDs: c62300008a5c000000000000 m_ExpandedInstanceIDs: c62300008a5c000000000000
m_RenameOverlay: m_RenameOverlay:
m_UserAcceptedRename: 0 m_UserAcceptedRename: 0
@ -894,10 +894,10 @@ MonoBehaviour:
m_Tooltip: m_Tooltip:
m_Pos: m_Pos:
serializedVersion: 2 serializedVersion: 2
x: 65 x: 64
y: 667 y: 780
width: 1041 width: 1043
height: 372 height: 259
m_ViewDataDictionary: {fileID: 0} m_ViewDataDictionary: {fileID: 0}
m_OverlayCanvas: m_OverlayCanvas:
m_LastAppliedPresetName: Default m_LastAppliedPresetName: Default
@ -924,8 +924,8 @@ MonoBehaviour:
serializedVersion: 2 serializedVersion: 2
x: 1 x: 1
y: 19 y: 19
width: 364 width: 471
height: 421 height: 535
m_ViewDataDictionary: {fileID: 0} m_ViewDataDictionary: {fileID: 0}
m_OverlayCanvas: m_OverlayCanvas:
m_LastAppliedPresetName: Default m_LastAppliedPresetName: Default
@ -933,9 +933,9 @@ MonoBehaviour:
m_SceneHierarchy: m_SceneHierarchy:
m_TreeViewState: m_TreeViewState:
scrollPos: {x: 0, y: 0} scrollPos: {x: 0, y: 0}
m_SelectedIDs: 5a8b0000 m_SelectedIDs:
m_LastClickedID: 35674 m_LastClickedID: 0
m_ExpandedIDs: eaf2ffff9cf7ffff32fbfffff4ffffff8c6700001a86000074890000768b0000 m_ExpandedIDs: 32fbffff
m_RenameOverlay: m_RenameOverlay:
m_UserAcceptedRename: 0 m_UserAcceptedRename: 0
m_Name: m_Name:
@ -980,9 +980,9 @@ MonoBehaviour:
m_Pos: m_Pos:
serializedVersion: 2 serializedVersion: 2
x: 1 x: 1
y: 461 y: 575
width: 364 width: 471
height: 322 height: 410
m_ViewDataDictionary: {fileID: 0} m_ViewDataDictionary: {fileID: 0}
m_OverlayCanvas: m_OverlayCanvas:
m_LastAppliedPresetName: Default m_LastAppliedPresetName: Default
@ -999,7 +999,7 @@ MonoBehaviour:
m_Script: {fileID: 11500000, guid: 6c262c1329a02fa49b5cb4c297106f3f, type: 3} m_Script: {fileID: 11500000, guid: 6c262c1329a02fa49b5cb4c297106f3f, type: 3}
m_Name: m_Name:
m_EditorClassIdentifier: m_EditorClassIdentifier:
m_MinSize: {x: 136, y: 30} m_MinSize: {x: 129, y: 28}
m_MaxSize: {x: 4000, y: 4000} m_MaxSize: {x: 4000, y: 4000}
m_TitleContent: m_TitleContent:
m_Text: ProBuilder m_Text: ProBuilder
@ -1035,10 +1035,10 @@ MonoBehaviour:
m_Tooltip: m_Tooltip:
m_Pos: m_Pos:
serializedVersion: 2 serializedVersion: 2
x: 1202 x: 1554
y: 19 y: 19
width: 283 width: 366
height: 764 height: 966
m_ViewDataDictionary: {fileID: 0} m_ViewDataDictionary: {fileID: 0}
m_OverlayCanvas: m_OverlayCanvas:
m_LastAppliedPresetName: Default m_LastAppliedPresetName: Default