Delete near Enemy Detect future. Use different density sensor.

No more detect Closest enemy info. Add different density sensor let agent get more state information on the center of view. 
Adjust Start Scene UI manager. Add in game visible rayCast & information that rayCast detect.
Start use mypy black and flake8 to format Python.
Koha9 2022-09-14 02:33:03 +09:00
{"count":1,"self":33.6679968,"total":34.5046305,"children":{"InitializeActuators":{"count":2,"self":0.0010002,"total":0.0010002,"children":null},"InitializeSensors":{"count":2,"self":0.0010004,"total":0.0010004,"children":null},"AgentSendState":{"count":1489,"self":0.011503399999999999,"total":0.2010688,"children":{"CollectObservations":{"count":1489,"self":0.1780647,"total":0.1780647,"children":null},"WriteActionMask":{"count":1488,"self":0.0019993999999999997,"total":0.0019993999999999997,"children":null},"RequestDecision":{"count":1488,"self":0.009501299999999999,"total":0.009501299999999999,"children":null}}},"DecideAction":{"count":1488,"self":0.0117408,"total":0.0117408,"children":null},"AgentAct":{"count":1488,"self":0.6208231,"total":0.6208231,"children":null}},"gauges":{},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1663089804","unity_version":"2020.3.19f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2020.3.19f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-MultiScene -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-UCUNI -hubSessionId 4cf980b0-326c-11ed-87c2-a7333acffe7c -accessToken j61gZPw8-vc4ZH7TJMvrSAAPQLV9SK6U72z_dek2xhw00ef","communication_protocol_version":"1.5.0","":"2.0.0","scene_name":"InGame","end_time_seconds":"1663089838"}}
{"count":1,"self":33.6679968,"total":34.5046305,"children":{"InitializeActuators":{"count":2,"self":0.0010002,"total":0.0010002,"children":null},"InitializeSensors":{"count":2,"self":0.0010004,"total":0.0010004,"children":null},"AgentSendState":{"count":1489,"self":0.011503399999999999,"total":0.2010688,"children":{"CollectObservations":{"count":1489,"self":0.1780647,"total":0.1780647,"children":null},"WriteActionMask":{"count":1488,"self":0.0019993999999999997,"total":0.0019993999999999997,"children":null},"RequestDecision":{"count":1488,"self":0.009501299999999999,"total":0.009501299999999999,"children":null}}},"DecideAction":{"count":1488,"self":0.0117408,"total":0.0117408,"children":null},"AgentAct":{"count":1488,"self":0.6208231,"total":0.6208231,"children":null}},"gauges":{},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1663089804","unity_version":"2020.3.19f1","command_line_arguments":"C:\\Program Files\\Unity\\Hub\\Editor\\2020.3.19f1\\Editor\\Unity.exe -projectpath C:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-MultiScene -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-UCUNI -hubSessionId 4cf980b0-326c-11ed-87c2-a7333acffe7c -accessToken j61gZPw8-vc4ZH7TJMvrSAAPQLV9SK6U72z_dek2xhw00ef","communication_protocol_version":"1.5.0","":"2.0.0","scene_name":"InGame","end_time_seconds":"1663089838"}}

View File

View File

@ -299,7 +299,7 @@ MonoBehaviour:
- m_Target: {fileID: 1544423168}
- m_Target: {fileID: 0}
m_TargetAssemblyTypeName: StartSceneTimeLimChanger, Assembly-CSharp
m_MethodName: onValueTimeChanged
m_Mode: 1
@ -854,7 +854,6 @@ GameObject:
serializedVersion: 6
- component: {fileID: 289162992}
- component: {fileID: 289162993}
m_Layer: 5
m_Name: EnemyNum
m_TagString: Untagged
@ -876,28 +875,13 @@ RectTransform:
- {fileID: 409680776}
- {fileID: 1746261329}
m_Father: {fileID: 1375409014}
m_RootOrder: 6
m_RootOrder: 7
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0, y: 1}
m_AnchorMax: {x: 0, y: 1}
m_AnchoredPosition: {x: 214, y: -290}
m_SizeDelta: {x: 100, y: 100}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &289162993
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 289162991}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 64d0bcf55e4db0c488996ba1051c279f, type: 3}
DataTransfer: {fileID: 1483319556}
EnemyNumInput: {fileID: 409680777}
EnemyNumText: {fileID: 1746261330}
--- !u!1 &293920964
m_ObjectHideFlags: 0
@ -910,7 +894,6 @@ GameObject:
- component: {fileID: 293920968}
- component: {fileID: 293920967}
- component: {fileID: 293920966}
- component: {fileID: 293920969}
m_Layer: 5
m_Name: LoadDirTextBox-Date
m_TagString: Untagged
@ -932,7 +915,7 @@ RectTransform:
- {fileID: 884034940}
- {fileID: 746911212}
m_Father: {fileID: 1539152182}
m_RootOrder: 2
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0.5, y: 0.5}
m_AnchorMax: {x: 0.5, y: 0.5}
@ -996,9 +979,9 @@ MonoBehaviour:
- m_Target: {fileID: 293920969}
m_TargetAssemblyTypeName: LoadDirDateTextChange, Assembly-CSharp
m_MethodName: OnValueChanged
- m_Target: {fileID: 2024406052}
m_TargetAssemblyTypeName: LoadDirChanger, Assembly-CSharp
m_MethodName: OnDateValueChanged
m_Mode: 1
m_ObjectArgument: {fileID: 0}
@ -1054,19 +1037,6 @@ CanvasRenderer:
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 293920964}
m_CullTransparentMesh: 1
--- !u!114 &293920969
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 293920964}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: cce4f6a22ca8eba4b94c1bfc6ac08072, type: 3}
DataTransfer: {fileID: 1483319556}
--- !u!1 &350269940
m_ObjectHideFlags: 0
@ -1245,7 +1215,7 @@ MonoBehaviour:
- m_Target: {fileID: 289162993}
- m_Target: {fileID: 0}
m_TargetAssemblyTypeName: StartSceneEnemyNumChanger, Assembly-CSharp
m_MethodName: onValueChanged
m_Mode: 1
@ -1936,7 +1906,6 @@ GameObject:
- component: {fileID: 651349052}
- component: {fileID: 651349054}
- component: {fileID: 651349053}
m_Layer: 5
m_Name: LoadDirToggle
m_TagString: Untagged
@ -1964,22 +1933,6 @@ RectTransform:
m_AnchoredPosition: {x: 10, y: 0}
m_SizeDelta: {x: 20, y: 20}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &651349053
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 651349051}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: e18f417f93a39c74481660da3236c133, type: 3}
LoadDirDateTextBox: {fileID: 293920966}
LoadDirTimeTextBox: {fileID: 653235284}
Toggle: {fileID: 651349054}
DataTransfer: {fileID: 1483319556}
--- !u!114 &651349054
m_ObjectHideFlags: 0
@ -2027,9 +1980,9 @@ MonoBehaviour:
- m_Target: {fileID: 651349053}
m_TargetAssemblyTypeName: LoadDirToggle, Assembly-CSharp
m_MethodName: OnToggleChanged
- m_Target: {fileID: 2024406052}
m_TargetAssemblyTypeName: LoadDirChanger, Assembly-CSharp
m_MethodName: OnDirToggleChanged
m_Mode: 1
m_ObjectArgument: {fileID: 0}
@ -2052,7 +2005,6 @@ GameObject:
- component: {fileID: 653235286}
- component: {fileID: 653235285}
- component: {fileID: 653235284}
- component: {fileID: 653235287}
m_Layer: 5
m_Name: LoadDirTextBox-Time
m_TagString: Untagged
@ -2074,7 +2026,7 @@ RectTransform:
- {fileID: 112144286}
- {fileID: 1086771388}
m_Father: {fileID: 1539152182}
m_RootOrder: 1
m_RootOrder: 2
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0.5, y: 0.5}
m_AnchorMax: {x: 0.5, y: 0.5}
@ -2138,9 +2090,9 @@ MonoBehaviour:
- m_Target: {fileID: 653235287}
m_TargetAssemblyTypeName: LoadDirTimeTextChange, Assembly-CSharp
m_MethodName: OnValueChanged
- m_Target: {fileID: 2024406052}
m_TargetAssemblyTypeName: LoadDirChanger, Assembly-CSharp
m_MethodName: OnTimeValueChanged
m_Mode: 1
m_ObjectArgument: {fileID: 0}
@ -2196,19 +2148,6 @@ CanvasRenderer:
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 653235282}
m_CullTransparentMesh: 1
--- !u!114 &653235287
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 653235282}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 8396a1a162e012447a0e4f7626e70dc7, type: 3}
DataTransfer: {fileID: 1483319556}
--- !u!1 &658127036
m_ObjectHideFlags: 0
@ -3041,7 +2980,6 @@ GameObject:
- component: {fileID: 883289519}
- component: {fileID: 883289518}
- component: {fileID: 883289517}
- component: {fileID: 883289520}
m_Layer: 5
m_Name: Start
m_TagString: Untagged
@ -3062,7 +3000,7 @@ RectTransform:
- {fileID: 42682888}
m_Father: {fileID: 1375409014}
m_RootOrder: 2
m_RootOrder: 3
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0.5, y: 0}
m_AnchorMax: {x: 0.5, y: 0}
@ -3113,7 +3051,7 @@ MonoBehaviour:
- m_Target: {fileID: 883289520}
- m_Target: {fileID: 2024406053}
m_TargetAssemblyTypeName: SceneChange, Assembly-CSharp
m_MethodName: onStartClick
m_Mode: 1
@ -3163,20 +3101,6 @@ CanvasRenderer:
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 883289515}
m_CullTransparentMesh: 1
--- !u!114 &883289520
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 883289515}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 73ae3df5f5faba1428ab2529c043b7ae, type: 3}
DataTransfer: {fileID: 1483319556}
errorText: {fileID: 1951625460}
--- !u!1 &884034939
m_ObjectHideFlags: 0
@ -4400,6 +4324,7 @@ RectTransform:
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 0, y: 0, z: 0}
- {fileID: 2024406050}
- {fileID: 1982556591}
- {fileID: 1951625459}
- {fileID: 883289516}
@ -4714,11 +4639,11 @@ RectTransform:
m_LocalScale: {x: 1, y: 1, z: 1}
- {fileID: 132436945}
- {fileID: 653235283}
- {fileID: 293920965}
- {fileID: 653235283}
- {fileID: 651349052}
m_Father: {fileID: 1375409014}
m_RootOrder: 3
m_RootOrder: 4
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0, y: 1}
m_AnchorMax: {x: 0, y: 1}
@ -4734,7 +4659,6 @@ GameObject:
serializedVersion: 6
- component: {fileID: 1544423167}
- component: {fileID: 1544423168}
m_Layer: 5
m_Name: TimeLimit
m_TagString: Untagged
@ -4756,28 +4680,13 @@ RectTransform:
- {fileID: 83479714}
- {fileID: 1165772551}
m_Father: {fileID: 1375409014}
m_RootOrder: 7
m_RootOrder: 8
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0, y: 1}
m_AnchorMax: {x: 0, y: 1}
m_AnchoredPosition: {x: 214, y: -320}
m_SizeDelta: {x: 100, y: 100}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &1544423168
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1544423166}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 7857e2b2e5caf6b4686c4a7d87fa998e, type: 3}
DataTransfer: {fileID: 1483319556}
TimeLimText: {fileID: 1165772552}
TimelimInput: {fileID: 83479715}
--- !u!1 &1546066799
m_ObjectHideFlags: 0
@ -4821,7 +4730,7 @@ RectTransform:
- {fileID: 658127037}
- {fileID: 1792678409}
m_Father: {fileID: 1375409014}
m_RootOrder: 4
m_RootOrder: 5
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0, y: 1}
m_AnchorMax: {x: 0, y: 1}
@ -5019,9 +4928,9 @@ MonoBehaviour:
- m_Target: {fileID: 2046915485}
m_TargetAssemblyTypeName: DecisionPeriodChanger, Assembly-CSharp
m_MethodName: onToggleChanged
- m_Target: {fileID: 2024406051}
m_TargetAssemblyTypeName: EnvArgsChanger, Assembly-CSharp
m_MethodName: onABDToggleChanged
m_Mode: 1
m_ObjectArgument: {fileID: 0}
@ -5845,7 +5754,7 @@ RectTransform:
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 1375409014}
m_RootOrder: 1
m_RootOrder: 2
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0.5, y: 0.5}
m_AnchorMax: {x: 0.5, y: 0.5}
@ -6082,7 +5991,7 @@ RectTransform:
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 1375409014}
m_RootOrder: 0
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0.5, y: 0.5}
m_AnchorMax: {x: 0.5, y: 0.5}
@ -6206,6 +6115,94 @@ CanvasRenderer:
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 2010289379}
m_CullTransparentMesh: 1
--- !u!1 &2024406049
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
- component: {fileID: 2024406050}
- component: {fileID: 2024406053}
- component: {fileID: 2024406052}
- component: {fileID: 2024406051}
m_Layer: 5
m_Name: UIManager
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!224 &2024406050
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 2024406049}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 1375409014}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0.5, y: 0.5}
m_AnchorMax: {x: 0.5, y: 0.5}
m_AnchoredPosition: {x: 0, y: 0}
m_SizeDelta: {x: 100, y: 100}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &2024406051
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 2024406049}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: ccfe892ab0f980343b6df0fafaec38e2, type: 3}
DataTransfer: {fileID: 1483319556}
EnemyNumText: {fileID: 1746261330}
EnemyNumInput: {fileID: 409680777}
TimeLimText: {fileID: 1165772552}
TimelimInput: {fileID: 83479715}
DecisionPeriodSlide: {fileID: 2025428252}
DecisionPeriodDataText: {fileID: 901241037}
TakeActionsBetweenDecisionsToggle: {fileID: 1633180793}
--- !u!114 &2024406052
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 2024406049}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 664e98da80fb4d842b0c3aff09fd4dc9, type: 3}
LoadDirDateTextBox: {fileID: 293920966}
LoadDirTimeTextBox: {fileID: 653235284}
Toggle: {fileID: 651349054}
DataTransfer: {fileID: 1483319556}
--- !u!114 &2024406053
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 2024406049}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 73ae3df5f5faba1428ab2529c043b7ae, type: 3}
DataTransfer: {fileID: 1483319556}
errorText: {fileID: 1951625460}
--- !u!1 &2025428250
m_ObjectHideFlags: 0
@ -6296,9 +6293,9 @@ MonoBehaviour:
- m_Target: {fileID: 2046915485}
m_TargetAssemblyTypeName: DecisionPeriodChanger, Assembly-CSharp
m_MethodName: onSlideValueChanged
- m_Target: {fileID: 2024406051}
m_TargetAssemblyTypeName: EnvArgsChanger, Assembly-CSharp
m_MethodName: onDPSlideValueChanged
m_Mode: 1
m_ObjectArgument: {fileID: 0}
@ -6317,7 +6314,6 @@ GameObject:
serializedVersion: 6
- component: {fileID: 2046915484}
- component: {fileID: 2046915485}
m_Layer: 5
m_Name: DecisionPeriod
m_TagString: Untagged
@ -6341,29 +6337,13 @@ RectTransform:
- {fileID: 473186479}
- {fileID: 901241036}
m_Father: {fileID: 1375409014}
m_RootOrder: 5
m_RootOrder: 6
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
m_AnchorMin: {x: 0, y: 1}
m_AnchorMax: {x: 0, y: 1}
m_AnchoredPosition: {x: 214, y: -348.5}
m_SizeDelta: {x: 100, y: 100}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &2046915485
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 2046915483}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 0d097ecbef2d7b843b772144dd301b2b, type: 3}
DataTransfer: {fileID: 1483319556}
DecisionPeriodSlide: {fileID: 2025428252}
DecisionPeriodDataText: {fileID: 901241037}
TakeActionsBetweenDecisionsToggle: {fileID: 1633180793}
--- !u!1 &2070899079
m_ObjectHideFlags: 0

View File

@ -65,7 +65,6 @@ public class AgentWithGun : Agent
public float mouseYSensitivity = 200;
public float yRotation = 0.1f;//定义一个浮点类型的量记录围绕X轴旋转的角度
private float startTime = 0;
private int shoot = 0;
private float lastShootTime = 0.0f;
@ -95,47 +94,80 @@ public class AgentWithGun : Agent
void Start()
DataTransfer = GameObject.Find("StartSeneDataTransfer").GetComponent<StartSeneData>();
UICon = transform.GetComponent<UIController>();
HistoryRec = transform.GetComponent<HistoryRecorder>();
rayScript = GetComponent<RaySensors>();
// get DataTranfer
DataTransfer = GameObject.Find("StartSeneDataTransfer").GetComponent<StartSeneData>();
// Enemy Num
enemyNum = DataTransfer.EnemyNum;
// Enemy Num
enemyNum = DataTransfer.EnemyNum;
// Time Limit
timeLimit = DataTransfer.Timelim;
// Time Limit
timeLimit = DataTransfer.Timelim;
// get load directory.
LoadDirDate = DataTransfer.LoadDirDate;
LoadDirTime = DataTransfer.LoadDirTime;
LoadDirDateF = float.Parse(LoadDirDate);
loadDirTimeF = float.Parse(LoadDirTime);
// get load directory.
LoadDirDate = DataTransfer.LoadDirDate;
LoadDirTime = DataTransfer.LoadDirTime;
LoadDirDateF = float.Parse(LoadDirDate);
loadDirTimeF = float.Parse(LoadDirTime);
// get Default reward.
nonRewardDefault = DataTransfer.nonReward;
shootRewardDefault = DataTransfer.shootReward;
shootWithoutReadyRewardDefault = DataTransfer.shootWithoutReadyReward;
hitRewardDefault = DataTransfer.hitReward;
killRewardDefault = DataTransfer.killReward;
winRewardDefault = DataTransfer.winReward;
loseRewardDefault = DataTransfer.loseReward;
// get Default reward.
nonRewardDefault = DataTransfer.nonReward;
shootRewardDefault = DataTransfer.shootReward;
shootWithoutReadyRewardDefault = DataTransfer.shootWithoutReadyReward;
hitRewardDefault = DataTransfer.hitReward;
killRewardDefault = DataTransfer.killReward;
winRewardDefault = DataTransfer.winReward;
loseRewardDefault = DataTransfer.loseReward;
// give default Reward to Reward value will be used.
nonReward = nonRewardDefault;
shootReward = shootRewardDefault;
shootWithoutReadyReward = shootWithoutReadyRewardDefault;
hitReward = hitRewardDefault;
winReward = winRewardDefault;
loseReward = loseRewardDefault;
killReward = killRewardDefault;
// change Decision Period & Take Actions Between Decisions
transform.GetComponent<DecisionRequester>().DecisionPeriod = DataTransfer.DecisionPeriod;
transform.GetComponent<DecisionRequester>().TakeActionsBetweenDecisions = DataTransfer.ActionsBetweenDecisions;
// change Decision Period & Take Actions Between Decisions
transform.GetComponent<DecisionRequester>().DecisionPeriod = DataTransfer.DecisionPeriod;
transform.GetComponent<DecisionRequester>().TakeActionsBetweenDecisions = DataTransfer.ActionsBetweenDecisions;
catch (NullReferenceException)
// Enemy Num
enemyNum = 3;
//initialize remainTime
remainTime = (int)(timeLimit - Time.time + startTime);
// Time Limit
timeLimit = 30;
// get load directory.
LoadDirDate = "0";
LoadDirTime = "0";
LoadDirDateF = float.Parse(LoadDirDate);
loadDirTimeF = float.Parse(LoadDirTime);
// get Default reward.
nonRewardDefault = -0.05f;
shootRewardDefault = -0.06f;
shootWithoutReadyRewardDefault = -0.06f;
hitRewardDefault = 5.0f;
killRewardDefault = 10.0f;
winRewardDefault = 20.0f;
loseRewardDefault = -10.0f;
// change Decision Period & Take Actions Between Decisions
transform.GetComponent<DecisionRequester>().DecisionPeriod = 1;
transform.GetComponent<DecisionRequester>().TakeActionsBetweenDecisions = true;
UICon = transform.GetComponent<UIController>();
HistoryRec = transform.GetComponent<HistoryRecorder>();
rayScript = GetComponent<RaySensors>();
// give default Reward to Reward value will be used.
nonReward = nonRewardDefault;
shootReward = shootRewardDefault;
shootWithoutReadyReward = shootWithoutReadyRewardDefault;
hitReward = hitRewardDefault;
winReward = winRewardDefault;
loseReward = loseRewardDefault;
killReward = killRewardDefault;
//initialize remainTime
remainTime = (int)(timeLimit - Time.time + startTime);
/* ----------此Update用于debugBuild前删除或注释掉----------*/
@ -300,7 +332,7 @@ public class AgentWithGun : Agent
// GotKill 获得击杀时用于呼出
// GotKill 获得击杀时用于呼出
public void GotKill()
enemyKillCount += 1;
@ -493,19 +525,17 @@ public class AgentWithGun : Agent
//List<float> enemyLDisList = RaySensors.enemyLDisList;// All Enemy Lside Distances
//List<float> enemyRDisList = RaySensors.enemyRDisList;// All Enemy Rside Distances
int allEnemyNum = RaySensors.allEnemyNum;
float[] myObserve = { thisAgent.position.x, thisAgent.position.y, thisAgent.position.z, thisAgent.rotation.w };
float[] rayTagResult = RaySensors.rayTagResult;// 探测用RayTag结果 float[](raySensorNum,1)
float[] rayDisResult = RaySensors.rayDisResult; // 探测用RayDis结果 float[](raySensorNum,1)
float[] focusEnemyObserve = RaySensors.focusEnemyInfo;// 最近的Enemy情报 float[](3,1) MinEnemyIndex,x,z
int raySensorNum = rayScript.rayNum;// raySensor数量 int
float[] rayTagResult = rayScript.rayTagResult;// 探测用RayTag结果 float[](raySensorNum,1)
float[] rayDisResult = rayScript.rayDisResult; // 探测用RayDis结果 float[](raySensorNum,1)
//float[] focusEnemyObserve = RaySensors.focusEnemyInfo;// 最近的Enemy情报 float[](3,1) MinEnemyIndex,x,z
//sensor.AddObservation(allEnemyNum); // 敌人数量 int
sensor.AddObservation(myObserve); // 自机位置xyz+朝向 float[](4,1)
sensor.AddObservation(rayTagResult); // 探测用RayTag结果 float[](raySensorNum,1)
sensor.AddObservation(rayDisResult); // 探测用RayDis结果 float[](raySensorNum,1)
sensor.AddObservation(focusEnemyObserve); // 最近的Enemy情报 float[](3,1) MinEnemyIndex,x,z
//sensor.AddObservation(focusEnemyObserve); // 最近的Enemy情报 float[](3,1) MinEnemyIndex,x,z
//sensor.AddObservation(raySensorNum); // raySensor数量 int
sensor.AddObservation(LoadDirDateF); // 用于loadModel的第一级dir
sensor.AddObservation(loadDirTimeF); // 用于loadModel的第二级dir

View File

@ -6,6 +6,7 @@ public class CameraChange : MonoBehaviour
public Camera FPSCamera;
public Camera TPSCamera;
public GameObject AgentOBJ;
public void switchCamera()
@ -26,10 +27,12 @@ public class CameraChange : MonoBehaviour
TPSCamera.enabled = true;
FPSCamera.enabled = false;
AgentOBJ.GetComponent<RaySensors>().showInGameRay = true;
public void ShowFPSView()
FPSCamera.enabled = true;
TPSCamera.enabled = false;
AgentOBJ.GetComponent<RaySensors>().showInGameRay = false;

View File

View File

@ -15,6 +15,10 @@ public class EnemyHPBar : MonoBehaviour
EnemyOBJ = transform.parent.gameObject;
BGOBJ = transform.GetChild(0).gameObject;
gaugeImgOBJ = BGOBJ.transform.GetChild(0).gameObject;
Vector3 v = Camera.main.transform.position - transform.position;
v.x = v.z = 0.0f;
transform.LookAt(Camera.main.transform.position - v);
void Update()

View File

@ -1,4 +1,5 @@
using System.Collections;
using System;
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
@ -7,30 +8,59 @@ using UnityEngine;
public class RaySensors : MonoBehaviour
public Camera agentCam;
public GameObject myself;
public int rayNum = 6;
public string EnemyTagName;
public string WallTagName;
public float viewDistance = 100; // how long the ray can detect
public float Damage = 50; // damage to enemy
public float attentionRange = 1f; //注意力范围1为最大
public float MaxDistance = 9999999999f;
public float EnemyWidthRedundancy = 0.01f; //为了确保Ray可以击中Enemy用于缩小EnemyWidth的长度
//public List<string> tagNames = new List<string>();
public static int allEnemyNum = 0;//All Enemy Num
public static float[] focusEnemyInfo = new float[3];
public static float[] rayTagResult; // Array to save Tag Result
public static float[] rayDisResult; // Array to save Distance Result
public static List<float> enemyLDisList = new List<float>();// All Enemy Lside Distances
public static List<float> enemyRDisList = new List<float>();// All Enemy Rside Distances
public Material lineMeterial;
public GameObject rayInfoPrefab;
public GameObject agentCanvas;
[SerializeField, Range(0, 500)] public float viewDistance = 100; // how long the ray can detect
//[SerializeField, Range(0, 1)] public float totalRange = 1f; // Total view range Max = 1
[SerializeField, Range(0, 1)] public float focusRange = 0.15f; // center focus range
public int halfOuterRayNum = 3; // >=2
public int focusRayNum = 5; // >= 1 and must be odd num!
public bool showInGameRay = true;
public bool showDebugRay = true;
public bool showInGameRayInfo = true;
public float lineWidth = 0.05f;
public bool showEnemySensor = true;
public bool showEyeSensor = true;
public float[] rayTagResult;
public float[] rayDisResult;
[System.NonSerialized] public int totalRayNum;
GameObject[] linesOBJ;
GameObject[] rayInfoOBJ;
LineRenderer[] lineRenderers;
rayInfoUI[] rayInfoUIs;
private void Start()
totalRayNum = halfOuterRayNum * 2 + focusRayNum;
rayTagResult = new float[totalRayNum];
rayDisResult = new float[totalRayNum];
linesOBJ = new GameObject[totalRayNum];
lineRenderers = new LineRenderer[totalRayNum];
rayInfoOBJ = new GameObject[totalRayNum];
rayInfoUIs = new rayInfoUI[totalRayNum];
for(int i = 0; i < totalRayNum; i++)
linesOBJ[i] = new GameObject();
linesOBJ[i].name = "rayCastLine-" + Convert.ToString(i);
linesOBJ[i].transform.parent = agentCam.transform;
lineRenderers[i] = linesOBJ[i].GetComponent<LineRenderer>();
lineRenderers[i].material = lineMeterial;
rayInfoOBJ[i] = (GameObject)Instantiate(rayInfoPrefab);
rayInfoOBJ[i].name = "rayInfo-" + Convert.ToString(i);
rayInfoUIs[i] = rayInfoOBJ[i].GetComponent<rayInfoUI>();
static int tagToInt(string tag)
@ -45,277 +75,107 @@ public class RaySensors : MonoBehaviour
// 敌人Ray探测处理
// 返回
(int, List<float>, List<float>, float[]) enemySensorRay(GameObject myself, Camera agentCam, float attentionRange)
private void singleRaycastUpdate(Ray ray,LineRenderer thisLineRenderer,rayInfoUI thisRayInfoUI, out float rayTagResult, out float rayDisResult)
List<float> thisLDistanceList = new List<float>();
List<float> thisRDistanceList = new List<float>();
GameObject[] EnemyGameObjs;
EnemyGameObjs = GameObject.FindGameObjectsWithTag("Enemy");
int EnemyIndex = 0;
float MinEnemyDis = MaxDistance+1.0f;
int MinEnemyIndex = 0;
float[] MinEnemyInfo = new float[3];
foreach (GameObject EnemyObj in EnemyGameObjs)
// get Raycast hit infomation and return Tag and distance
RaycastHit thisHit;
Color rayColor = Color.cyan;
float lineLength = viewDistance;
string rayInfoText = "";
Vector3 rayInfoPosition;
if (Physics.Raycast(ray, out thisHit, viewDistance)) // 若在viewDistance范围内有碰撞
Vector3 thisEnemyPosition = EnemyObj.transform.position;
Vector3 thisEnemyScale = EnemyObj.transform.localScale;
Vector3 MyselfPosition = myself.transform.position;
float thisEnemyWidth = (float)(thisEnemyScale.x / 2) - EnemyWidthRedundancy;
float thisEnemyDistance = Vector3.Distance(MyselfPosition, thisEnemyPosition);
if (thisEnemyPosition == MyselfPosition)
rayInfoText = thisHit.collider.tag;
rayTagResult = tagToInt(thisHit.collider.tag);
rayDisResult = thisHit.distance;
lineLength = rayDisResult;
rayInfoText += "\n" + Convert.ToString(rayDisResult);
switch (rayTagResult)
//Debug.Log("OH It's me");
EnemyIndex += 1;
Vector3 Vertical = new Vector3(0, 100, 0);//垂直向上的向量
Vector3 EnemytoMe = MyselfPosition - thisEnemyPosition;//Enemy和自机连线指向自机的向量
Vector3 LHorizontal = Vector3.Cross(Vertical, EnemytoMe);// 垂直于EnemytoMe与Vertical向量所组成的面的且指向<-左侧的小向量<- Enemy
Vector3 RHorizontal = Vector3.Cross(EnemytoMe, Vertical);// 垂直于EnemytoMe与Vertical向量所组成的面的且指向->右侧的小向量Enemy ->
float standaedization = (float)thisEnemyWidth / Vector3.Distance(thisEnemyPosition, RHorizontal);//计算需要缩小的比例
RHorizontal *= standaedization;//应用缩小比例,标准化完成
LHorizontal *= standaedization;//应用缩小比例,标准化完成
Vector3 LMetoEnemy = LHorizontal - EnemytoMe;//自机与左侧边界连线Me<- Enemy
Vector3 RMetoEnemy = RHorizontal - EnemytoMe;//自机与右侧边界连线Enemy ->Me
Vector3 L0toEnemy = LHorizontal + thisEnemyPosition;// Enemy左侧绝对坐标
Vector3 R0toEnemy = RHorizontal + thisEnemyPosition;// Enemy右侧绝对坐标
float LMetoEnemyDist = Vector3.Distance(MyselfPosition, L0toEnemy);
float RMetoEnemyDist = Vector3.Distance(MyselfPosition, R0toEnemy);
Vector3 LEnemyInView = agentCam.WorldToViewportPoint(L0toEnemy);//Enemy左侧于视角中位置
Vector3 REnemyInView = agentCam.WorldToViewportPoint(R0toEnemy);//Enemy右侧于视角中位置
//Debug.DrawRay(thisEnemyPosition, EnemytoMe, Color.white);//Enemy和自机连线指向自机的向量
//Debug.DrawRay(thisEnemyPosition, Vertical, Color.white);//垂直向上的向量
//Debug.DrawRay(thisEnemyPosition, LHorizontal,;// 垂直于Vc与Vertical向量所组成的面的且指向<-左侧的小向量<- Enemy
//Debug.DrawRay(thisEnemyPosition, RHorizontal,;// 垂直于Vc与Vertical向量所组成的面的且指向->右侧的小向量Enemy ->
//Debug.DrawRay(MyselfPosition, LMetoEnemy,;//自机与左侧边界连线<- Enemy
//Debug.DrawRay(MyselfPosition, RMetoEnemy,;//自机与右侧边界连线Enemy ->
//Debug.Log("EnemyObj" + EnemyIndex + "Position:" + thisEnemyPosition);
if (LEnemyInView.x >= (thisEnemyWidth - attentionRange / 2) && LEnemyInView.x <= (thisEnemyWidth + attentionRange / 2) && LEnemyInView.z > 0)
Ray LRay = new Ray(MyselfPosition, LMetoEnemy);
RaycastHit LHit;
if (showEnemySensor)
Debug.DrawRay(LRay.origin, LRay.direction * LMetoEnemyDist, Color.white);//自机与左侧边界连线<- Enemy
//Ray Hit Something
if (Physics.Raycast(MyselfPosition, LMetoEnemy, out LHit, LMetoEnemyDist))
//Ray Hit Enemy
if (LHit.collider.tag == EnemyTagName && System.Math.Abs(LHit.distance - thisEnemyDistance) <= thisEnemyWidth)
if (showEnemySensor)
Debug.DrawRay(LRay.origin, LRay.direction * LHit.distance,;//自机与所击中物体的连线
//Debug.Log("Hit Tag = " + LHit.collider.tag);
if (showEnemySensor)
Debug.DrawRay(LRay.origin, LRay.direction * LHit.distance, Color.cyan);//自机与所击中物体的连线
//Debug.LogWarning("Hit Tag = " + LHit.collider.tag);
Debug.LogError("LRAY HIT NOTHING, Check Code!");
if (REnemyInView.x >= (thisEnemyWidth - attentionRange / 2) && REnemyInView.x <= (thisEnemyWidth + attentionRange / 2) && REnemyInView.z > 0)
Ray RRay = new Ray(MyselfPosition, RMetoEnemy);
RaycastHit RHit;
if (showEnemySensor)
Debug.DrawRay(RRay.origin, RRay.direction * RMetoEnemyDist, Color.white);//自机与左侧边界连线<- Enemy
//Ray Hit Something
if (Physics.Raycast(MyselfPosition, RMetoEnemy, out RHit, RMetoEnemyDist))
//Ray Hit Enemy
if (RHit.collider.tag == EnemyTagName && System.Math.Abs(RHit.distance - thisEnemyDistance) <= thisEnemyWidth)
if (showEnemySensor)
Debug.DrawRay(RRay.origin, RRay.direction * RHit.distance,;//自机与所击中物体的连线
//Debug.Log("Hit Tag = " + LHit.collider.tag);
if (showEnemySensor)
Debug.DrawRay(RRay.origin, RRay.direction * RHit.distance, Color.cyan);//自机与所击中物体的连线
//Debug.LogWarning("Hit Tag = " + LHit.collider.tag);
Debug.LogError("RRAY HIT NOTHING, Check Code!");
//Debug.LogWarning("EnemyIndex" + EnemyIndex);
//Debug.Log("thisLDistanceList" + thisLDistanceList.Count);
//Debug.Log("thisRDistanceList" + thisRDistanceList.Count);
if (System.Math.Min(thisLDistanceList[EnemyIndex], thisRDistanceList[EnemyIndex]) < MinEnemyDis)
//Debug.Log("EnemyIndex" + EnemyIndex);
MinEnemyDis = System.Math.Min(thisLDistanceList[EnemyIndex], thisRDistanceList[EnemyIndex]);
MinEnemyIndex = EnemyIndex;
case 1:// Wall
rayColor = Color.white;
case 2: // Enemy
rayColor =;
case -1: // Hit Nothing
rayColor = Color.gray;
default: // default,got wrong
rayColor = Color.cyan;
// 获取最近敌人的准确位置信息
MinEnemyInfo[0] = (float)MinEnemyIndex;
if(MinEnemyInfo[0] <= 0)
else // 若在viewDistance范围无碰撞
MinEnemyInfo[1] = MaxDistance;
MinEnemyInfo[2] = MaxDistance;
rayTagResult = -1f;
rayDisResult = -1f;
rayInfoPosition = ray.origin + (ray.direction * lineLength);
if (showInGameRay)
drawLine(ray, lineLength, thisLineRenderer, rayColor);
MinEnemyInfo[1] = EnemyGameObjs[MinEnemyIndex].transform.position.x;
MinEnemyInfo[2] = EnemyGameObjs[MinEnemyIndex].transform.position.z;
turnOffLine(thisLineRenderer, rayColor);
return (EnemyIndex, thisLDistanceList, thisRDistanceList,MinEnemyInfo);
// drawRay in game
if (showInGameRayInfo) thisRayInfoUI.updateInfo(rayInfoText, rayInfoPosition, rayColor);
// Show log
if (showDebugRay) Debug.DrawRay(ray.origin, ray.direction * viewDistance, rayColor); // drawRay in debug
// Debug.Log(ray.origin + ray.direction);
// Debug.Log(rayTagResult);
// Debug.Log(tagToInt(thisHit.collider.tag));
// 全局Ray探测处理
(float[], float[]) eyeSensorRay(int rayNum, Camera agentCam, float viewDistance)
private void drawLine(Ray ray,float lineLength, LineRenderer thisLineRenderer, Color lineColor)
//初始化result Array
float[] thisRayTagResult = new float[rayNum];
float[] thisRayDisResult = new float[rayNum];
for (int a = 0; a <= rayNum - 1; a = a + 1)
Vector3 point = new Vector3(a * agentCam.pixelWidth / (rayNum - 1), agentCam.pixelHeight / 2, 0);//发射位置
Ray ray = agentCam.ScreenPointToRay(point);
RaycastHit hit;
if (showEyeSensor)
Debug.DrawRay(ray.origin, ray.direction * viewDistance,;
if (Physics.Raycast(ray, out hit, viewDistance))
thisRayTagResult[a] = tagToInt(hit.collider.tag);
thisRayDisResult[a] = hit.distance;
if (showEyeSensor)
Debug.DrawRay(ray.origin, ray.direction * hit.distance, Color.yellow);
thisRayTagResult[a] = -1f;
thisRayDisResult[a] = -1f;
return (thisRayTagResult, thisRayDisResult);
thisLineRenderer.startColor = lineColor;
thisLineRenderer.endColor = lineColor;
thisLineRenderer.startWidth = lineWidth;
thisLineRenderer.endWidth = lineWidth;
thisLineRenderer.SetPosition(0, ray.origin);
thisLineRenderer.SetPosition(1, ray.origin + (ray.direction * lineLength));
void Start()
private void turnOffLine(LineRenderer thisLineRenderer, Color lineColor)
rayTagResult = new float[rayNum];
rayDisResult = new float[rayNum];
thisLineRenderer.startColor = lineColor;
thisLineRenderer.endColor = lineColor;
thisLineRenderer.startWidth = 0f;
thisLineRenderer.endWidth = 0f;
thisLineRenderer.SetPosition(0, new Vector3(0, 0, 0));
thisLineRenderer.SetPosition(1, new Vector3(0, 0, 0));
void Update()
public void updateRayInfo()
(rayTagResult, rayDisResult) = eyeSensorRay(rayNum, agentCam, viewDistance);
(allEnemyNum, enemyLDisList, enemyRDisList, focusEnemyInfo) = enemySensorRay(myself, agentCam, attentionRange);
Debug.LogWarning("rayNum :" + rayNum);
for (int i =0;i < rayNum; i++)
float focusLEdge = agentCam.pixelWidth * (1 - focusRange) / 2;
float focusREdge = agentCam.pixelWidth * (1 + focusRange) / 2;
float thisCamPixelHeight = agentCam.pixelHeight;
for (int i = 0; i < halfOuterRayNum; i++) // create left outside rays; 0 ~ focusLeftEdge
Debug.Log("rayTagResult" + rayTagResult[i] + "rayDisResult"+ rayDisResult[i]);
Vector3 point = new Vector3(i * focusLEdge / (halfOuterRayNum - 1), thisCamPixelHeight / 2, 0);
Ray thisRay = agentCam.ScreenPointToRay(point);
singleRaycastUpdate(thisRay,lineRenderers[i], rayInfoUIs[i] , out rayTagResult[i], out rayDisResult[i]);
Debug.LogWarning("EnemyNum Include Me:" + allEnemyNum);
for(int i = 0; i < allEnemyNum; i++)
for (int i = 0; i < halfOuterRayNum; i++) // create right outside rays; focusRightEdge ~ MaxPixelHeight
Debug.Log("enemyLDisList" + enemyLDisList[i] + "enemyRDisList" + enemyRDisList[i]);
Vector3 point = new Vector3(focusREdge + (i * focusLEdge / (halfOuterRayNum - 1)), thisCamPixelHeight / 2, 0);
Ray thisRay = agentCam.ScreenPointToRay(point);
singleRaycastUpdate(thisRay, lineRenderers[halfOuterRayNum + i], rayInfoUIs[halfOuterRayNum + i], out rayTagResult[halfOuterRayNum + i], out rayDisResult[halfOuterRayNum + i]);
for (int i = 0; i < focusRayNum; i++) // create center focus rays; focusLeftEdge ~ focusLeftEdge
Vector3 point = new Vector3(focusLEdge + ((i + 1) * (focusREdge - focusLEdge) / (focusRayNum + 1)), thisCamPixelHeight / 2, 0);
Ray thisRay = agentCam.ScreenPointToRay(point);
singleRaycastUpdate(thisRay, lineRenderers[halfOuterRayNum * 2 + i], rayInfoUIs[halfOuterRayNum * 2 + i], out rayTagResult[halfOuterRayNum*2 + i], out rayDisResult[halfOuterRayNum*2 + i]);

View File

@ -0,0 +1,25 @@
using UnityEngine;
using UnityEngine.UI;
using TMPro;
public class rayInfoUI : MonoBehaviour
TextMeshProUGUI infoText;
// Start is called before the first frame update
void Start()
infoText = transform.GetChild(0).gameObject.GetComponent<TextMeshProUGUI>();
public void updateInfo(string info,Vector3 infoPosition, Color infoColor)
infoText.text = info;
infoText.color = infoColor;
transform.position = infoPosition;
Vector3 v = Camera.main.transform.position - infoPosition;
v.x = v.z = 0.0f;
transform.LookAt(Camera.main.transform.position - v);
transform.Rotate(0, 180, 0);

View File

@ -1,5 +1,5 @@
fileFormatVersion: 2
guid: cce4f6a22ca8eba4b94c1bfc6ac08072
guid: 812dfed7ee1d09c4fa7c3ed8372f54ae
externalObjects: {}
serializedVersion: 2

View File

View File

@ -0,0 +1,62 @@
using System;
using UnityEngine;
using UnityEngine.UI;
public class EnvArgsChanger : MonoBehaviour
public GameObject DataTransfer;
public Text EnemyNumText;
public InputField EnemyNumInput;
public Text TimeLimText;
public InputField TimelimInput;
[Header("Decision Period")]
public Slider DecisionPeriodSlide;
public Text DecisionPeriodDataText;
public Toggle TakeActionsBetweenDecisionsToggle;
public void onEnemynumValueChanged()
// enemy total num value control
if (EnemyNumInput.GetComponent<InputField>().text == "" || EnemyNumInput.GetComponent<InputField>().text.Contains("-"))
EnemyNumText.color = Color.gray;
EnemyNumText.color = Color.yellow;
DataTransfer.GetComponent<StartSeneData>().EnemyNum = Math.Abs(int.Parse(EnemyNumInput.GetComponent<InputField>().text));
public void onTimeValueChanged()
// time limit value control
if (TimelimInput.GetComponent<InputField>().text == "" || TimelimInput.GetComponent<InputField>().text.Contains("-"))
TimeLimText.color = Color.gray;
TimeLimText.color = Color.yellow;
DataTransfer.GetComponent<StartSeneData>().Timelim = Math.Abs(int.Parse(TimelimInput.GetComponent<InputField>().text));
public void onDPSlideValueChanged()
// DecisionPeriod(DP) value Control
DataTransfer.GetComponent<StartSeneData>().DecisionPeriod = (int)(DecisionPeriodSlide.GetComponent<Slider>().value);
DecisionPeriodDataText.text = DataTransfer.GetComponent<StartSeneData>().DecisionPeriod.ToString();
public void onABDToggleChanged()
// Actions Between Decisions(ABD) Toggle Control
DataTransfer.GetComponent<StartSeneData>().ActionsBetweenDecisions = TakeActionsBetweenDecisionsToggle.isOn;

View File

@ -1,5 +1,5 @@
fileFormatVersion: 2
guid: 7ab8f48d4eac07f44b093f714ede051c
guid: ccfe892ab0f980343b6df0fafaec38e2
externalObjects: {}
serializedVersion: 2

View File

@ -3,19 +3,15 @@ using System.Collections.Generic;
using UnityEngine;
using UnityEngine.UI;
public class LoadDirToggle : MonoBehaviour
public class LoadDirChanger : MonoBehaviour
public InputField LoadDirDateTextBox;
public InputField LoadDirTimeTextBox;
public Toggle Toggle;
public GameObject DataTransfer;
// Start is called before the first frame update
void Start()
public void OnToggleChanged()
public void OnDirToggleChanged()
// if loadDirToggle is on then turn off the input text boxs.
LoadDirDateTextBox.interactable = Toggle.isOn;
@ -27,4 +23,15 @@ public class LoadDirToggle : MonoBehaviour
DataTransfer.GetComponent<StartSeneData>().LoadDirTime = "0";
public void OnDateValueChanged()
string input = LoadDirDateTextBox.GetComponent<InputField>().text;
DataTransfer.GetComponent<StartSeneData>().LoadDirDate = input;
public void OnTimeValueChanged()
string input = LoadDirTimeTextBox.GetComponent<InputField>().text;
DataTransfer.GetComponent<StartSeneData>().LoadDirTime = input;

View File

@ -1,5 +1,5 @@
fileFormatVersion: 2
guid: 8396a1a162e012447a0e4f7626e70dc7
guid: 664e98da80fb4d842b0c3aff09fd4dc9
externalObjects: {}
serializedVersion: 2

View File

View File

@ -1,5 +1,3 @@
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.UI;
using UnityEngine.SceneManagement;

View File

View File

View File

@ -4,9 +4,16 @@ from mlagents_envs.environment import UnityEnvironment
import numpy as np
class makeEnv(object):
def __init__(self,envPath,workerID,basePort):
self.env = UnityEnvironment(file_name=envPath,seed = 1,side_channels=[],worker_id = workerID,base_port=basePort)
def __init__(self, envPath, workerID, basePort):
self.env = UnityEnvironment(
# get enviroment specs
@ -15,73 +22,97 @@ class makeEnv(object):
self.BEHA_SPECS = self.env.behavior_specs
self.BEHA_NAME = list(self.BEHA_SPECS)[0]
self.SPEC = self.BEHA_SPECS[self.BEHA_NAME]
self.OBSERVATION_SPECS = self.SPEC.observation_specs[0] # observation spec
self.OBSERVATION_SPECS = self.SPEC.observation_specs[
] # observation spec
self.ACTION_SPEC = self.SPEC.action_spec # action specs
self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size# 連続的な動作のSize
self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size# 離散的な動作のSize
self.STATE_SIZE = self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE# 環境観測データ数
self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size #  連続的な動作のSize
self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size #  離散的な動作のSize
self.STATE_SIZE = (
) # 環境観測データ数
print("√√√√√Enviroment Initialized Success√√√√√")
def step(self,discreteActions = None,continuousActions = None,behaviorName = None,trackedAgent = None):
def step(
# take action to enviroment
# return mextState,reward,done
# check if arg is include None or IS None
isDisNone = discreteActions.any() == None
if discreteActions.all() == None:
isDisNone = discreteActions.any() is None
if discreteActions.all() is None:
print("step() Error!:discreteActions include None")
isDisNone = True
isConNone = continuousActions.any() == None
if continuousActions.all() == None:
isConNone = continuousActions.any() is None
if continuousActions.all() is None:
print("step() Error!:continuousActions include None")
isConNone = True
if isDisNone:
# if discreteActions is enpty just give nothing[[0]] to Enviroment
discreteActions = np.array([[0]],
if isConNone:
# if continuousActions is enpty just give nothing[[0]] to Enviroment
continuousActions = np.array([[0]], dtype=np.float)
if behaviorName == None:
if behaviorName is None:
behaviorName = self.BEHA_NAME
if trackedAgent == None:
if trackedAgent is None:
trackedAgent = self.TRACKED_AGENT
#create actionTuple
thisActionTuple = ActionTuple(continuous=continuousActions,discrete=discreteActions)
# create actionTuple
thisActionTuple = ActionTuple(
continuous=continuousActions, discrete=discreteActions
# take action to env
behavior_name=behaviorName, action=thisActionTuple
# get nextState & reward & done after this action
nextState,reward,done,loadDir, saveNow = self.getSteps(behaviorName,trackedAgent)
return nextState,reward,done,loadDir, saveNow
nextState, reward, done, loadDir, saveNow = self.getSteps(
behaviorName, trackedAgent
return nextState, reward, done, loadDir, saveNow
def getSteps(self,behaviorName = None,trackedAgent = None):
def getSteps(self, behaviorName=None, trackedAgent=None):
# get nextState & reward & done
if behaviorName == None:
if behaviorName is None:
behaviorName = self.BEHA_NAME
decisionSteps,terminalSteps = self.env.get_steps(behaviorName)
decisionSteps, terminalSteps = self.env.get_steps(behaviorName)
if self.TRACKED_AGENT == -1 and len(decisionSteps) >= 1:
self.TRACKED_AGENT = decisionSteps.agent_id[0]
if trackedAgent == None:
if trackedAgent is None:
trackedAgent = self.TRACKED_AGENT
if trackedAgent in decisionSteps: # ゲーム終了していない場合、環境状態がdecision_stepsに保存される
if (
trackedAgent in decisionSteps
): # ゲーム終了していない場合、環境状態がdecision_stepsに保存される
nextState = decisionSteps[trackedAgent].obs[0]
nextState = np.reshape(nextState,[1,self.STATE_SIZE+self.LOAD_DIR_SIZE_IN_STATE])
nextState = np.reshape(
nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
saveNow = nextState[0][-1]
loadDir = nextState[0][-3:-1]
nextState = nextState[0][:-3]
reward = decisionSteps[trackedAgent].reward
done = False
if trackedAgent in terminalSteps: # ゲーム終了した場合、環境状態がterminal_stepsに保存される
if (
trackedAgent in terminalSteps
): # ゲーム終了した場合、環境状態がterminal_stepsに保存される
nextState = terminalSteps[trackedAgent].obs[0]
nextState = np.reshape(nextState,[1,self.STATE_SIZE+self.LOAD_DIR_SIZE_IN_STATE])
nextState = np.reshape(
nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
saveNow = nextState[0][-1]
loadDir = nextState[0][-3:-1]
nextState = nextState[0][:-3]
@ -91,8 +122,8 @@ class makeEnv(object):
def reset(self):
nextState,reward,done,loadDir,saveNow = self.getSteps()
return nextState,reward,done,loadDir,saveNow
nextState, reward, done, loadDir, saveNow = self.getSteps()
return nextState, reward, done, loadDir, saveNow
def render(self):

View File

@ -24,7 +24,9 @@
"source": []
"source": [
"cell_type": "code",

View File

@ -13,7 +13,7 @@
"import tensorflow as tf\n",
"import time\n",
"import datetime"
"import datetime\n"
@ -33,28 +33,29 @@
"metadata": {},
"outputs": [],
"source": [
"ENV_PATH = './Build-MultiScene-WithLoad/Aimbot-PPO'\n",
"# Env\n",
"ENV_PATH = \"./Build-CloseEnemyCut/Aimbot-PPO\"\n",
"WORKER_ID = 1\n",
"BASE_PORT = 200\n",
"MAX_EP = 1000\n",
"EP_LENGTH = 100000\n",
"GAMMA = 0.99 # discount future reward (UP?)\n",
"EPSILON = 0.2 # clip Ratio range[1-EPSILON,1+EPSILON]\n",
"ACTOR_LR = 1e-5 # LR\n",
"CRITIC_LR = 2e-5 # LR\n",
"BATCH = 512 # learning step\n",
"ACTOR_EPOCH = 15 # epoch\n",
"CRITIC_EPOCH = 15 # epoch\n",
"ENTROPY_WHEIGHT = 0.01 # sigma's entropy in Actor loss\n",
"ACTION_INTERVAL = 1 # take action every ACTION_INTERVAL steps\n",
"GAMMA = 0.99 # discount future reward (UP?)\n",
"EPSILON = 0.2 # clip Ratio range[1-EPSILON,1+EPSILON]\n",
"ACTOR_LR = 1e-5 # LR\n",
"CRITIC_LR = 2e-5 # LR\n",
"BATCH = 256 # learning step\n",
"ACTOR_EPOCH = 15 # epoch\n",
"CRITIC_EPOCH = 15 # epoch\n",
"ENTROPY_WHEIGHT = 0.001 # sigma's entropy in Actor loss\n",
"ACTION_INTERVAL = 1 # take action every ACTION_INTERVAL steps\n",
"TRAIN = True\n",
"SAVE_DIR = \"PPO-Model/\"\"%m%d%H%M\")+\"/\"\n",
"SAVE_DIR = \"PPO-Model/\" +\"%m%d%H%M\") + \"/\"\n",
"LOAD_DIR = None\n",
@ -68,28 +69,27 @@
"text": [
"√√√√√Enviroment Initialized Success√√√√√\n",
"√√√√√Buffer Initialized Success√√√√√\n",
"Load Model:\n",
"No loadDir specified,Create a New Model\n",
"source": [
"# initialize enviroment & buffer class\n",
"env = aimBotEnv.makeEnv(envPath = ENV_PATH,\n",
" workerID = WORKER_ID,\n",
" basePort = BASE_PORT)\n",
"env = aimBotEnv.makeEnv(\n",
" envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT\n",
"epBuffer = buffer.buffer()\n",
"_,_,_,loadDir,_ = env.getSteps()\n",
"_, _, _, loadDir, _ = env.getSteps()\n",
"# check load model or not\n",
"if(np.any(loadDir == 0)):\n",
"if np.any(loadDir == 0):\n",
" # create a new model\n",
" print(\"No loadDir specified,Create a New Model\")\n",
" LOAD_DIR = None\n",
@ -97,23 +97,23 @@
" # load model\n",
" loadDirDateSTR = str(int(loadDir[0]))\n",
" loadDirTimeSTR = str(int(loadDir[1]))\n",
" if len(loadDirDateSTR)!=8:\n",
" if len(loadDirDateSTR) != 8:\n",
" # fill lost 0 while converse float to string\n",
" for _ in range(8 - len(loadDirDateSTR)):\n",
" loadDirDateSTR = \"0\" + loadDirDateSTR\n",
" if len(loadDirTimeSTR)!=6:\n",
" if len(loadDirTimeSTR) != 6:\n",
" # fill lost 0 while converse float to string\n",
" for _ in range(6 - len(loadDirTimeSTR)):\n",
" loadDirTimeSTR = \"0\" + loadDirTimeSTR\n",
" LOAD_DIR = \"PPO-Model/\"+loadDirDateSTR+\"/\"+loadDirTimeSTR\n",
" LOAD_DIR = \"PPO-Model/\" + loadDirDateSTR + \"/\" + loadDirTimeSTR\n",
" print(\"Load Model:\")\n",
" print(LOAD_DIR)\n",
"print(\"STATE_SIZE\", STATE_SIZE)\n",
"disActShape = [3,3,2]"
"disActShape = [3, 3, 2]\n"
@ -160,11 +160,7 @@
"output_type": "stream",
"text": [
"EP 0 START\n",
"√√√√√Buffer Initialized Success√√√√√\n",
"++++++++++++Model Loaded++++++++++++\n",
"√√√√√Buffer Initialized Success√√√√√\n"
@ -181,603 +177,16 @@
"name": "stdout",
"output_type": "stream",
"text": [
"A_Loss: 128583691.2 C_Loss: 812.6740254720052\n",
"EP OVER!\n",
"A_Loss: 18745178.266666666 C_Loss: 154.92835998535156\n",
"Model's Weights Saved\n",
"New Record! Save NN -38.23000041767955\n",
"EP 1 START\n",
"A_Loss: 563364718.9333333 C_Loss: 813.9496988932292\n",
"EP OVER!\n",
"A_Loss: 2345777356.8 C_Loss: 1002.7922281901042\n",
"Model's Weights Saved\n",
"New Record! Save NN 6.009999185800552\n",
"EP 2 START\n",
"A_Loss: 135096590.93333334 C_Loss: 289.0239980061849\n",
"EP OVER!\n",
"A_Loss: -1.0771097699801127 C_Loss: 94.56021474202474\n",
"EP 3 START\n",
"A_Loss: 188551280.0 C_Loss: 178.76077473958333\n",
"EP OVER!\n",
"A_Loss: -0.7733836094538371 C_Loss: 24.582142766316732\n",
"EP 4 START\n",
"A_Loss: 1542771293.8666666 C_Loss: 2540.582096354167\n",
"EP OVER!\n",
"A_Loss: 17443229.466666665 C_Loss: 403.4093978881836\n",
"EP 5 START\n",
"A_Loss: 0.9811522722244262 C_Loss: 173.3322311401367\n",
"EP OVER!\n",
"A_Loss: 0.062289920759697755 C_Loss: 83.96865743001302\n",
"EP 6 START\n",
"A_Loss: 207701843.2 C_Loss: 508.06519571940106\n",
"EP OVER!\n",
"A_Loss: 198849240.53333333 C_Loss: 318.39104512532555\n",
"EP 7 START\n",
"A_Loss: 227911159.46666667 C_Loss: 280.90660603841144\n",
"EP OVER!\n",
"A_Loss: 138269843.73333332 C_Loss: 530.1331217447917\n",
"EP 8 START\n",
"A_Loss: 944055261.8666667 C_Loss: 24907129397945.176\n",
"EP OVER!\n",
"A_Loss: -4141405.066666667 C_Loss: 49846409529480.53\n",
"Model's Weights Saved\n",
"New Record! Save NN 32.569999642670155\n",
"EP 9 START\n",
"A_Loss: 13671169501.866667 C_Loss: 44895753103.339066\n",
"EP OVER!\n",
"A_Loss: 43214365218.13333 C_Loss: 37662.319010416664\n",
"EP 10 START\n",
"A_Loss: 29955732002.133335 C_Loss: 68502.11927083334\n",
"EP OVER!\n",
"A_Loss: 20331074764.8 C_Loss: 38761.32864583333\n",
"EP 11 START\n",
"A_Loss: 17149350843.733334 C_Loss: 41642.053385416664\n",
"EP OVER!\n",
"A_Loss: 16021737676.8 C_Loss: 24600.219270833335\n",
"EP 12 START\n",
"A_Loss: 20431896029.866665 C_Loss: 82939.88125\n",
"EP OVER!\n",
"A_Loss: 12221524718.933332 C_Loss: 41908.44921875\n",
"EP 13 START\n",
"A_Loss: 23195075515.733334 C_Loss: 72094.946875\n",
"EP OVER!\n",
"A_Loss: 30219999095.466667 C_Loss: 25428.986588541666\n",
"EP 14 START\n",
"A_Loss: 27674789614.933334 C_Loss: 77286.32708333334\n",
"EP OVER!\n",
"A_Loss: 14385042363.733334 C_Loss: 44674.395833333336\n",
"EP 15 START\n",
"A_Loss: 6491561813.333333 C_Loss: 57468.32369791667\n",
"EP OVER!\n",
"A_Loss: 14895148373.333334 C_Loss: 27674.523567708333\n",
"EP 16 START\n",
"A_Loss: 23592904157.866665 C_Loss: 54008.02786458333\n",
"EP OVER!\n",
"A_Loss: 28394169002.666668 C_Loss: 35594.001302083336\n",
"EP 17 START\n",
"A_Loss: 31493442082.133335 C_Loss: 43341.80390625\n",
"EP OVER!\n",
"A_Loss: 15182564420.266666 C_Loss: 13120.031575520834\n",
"EP 18 START\n",
"A_Loss: 15298737561.6 C_Loss: 31930.973177083335\n",
"EP OVER!\n",
"A_Loss: 18384228215.466667 C_Loss: 20152.706770833334\n",
"EP 19 START\n",
"A_Loss: 19412337732.266666 C_Loss: 13991118556664.594\n",
"EP OVER!\n",
"A_Loss: 10120899857.066668 C_Loss: 39128662677.066666\n",
"EP 20 START\n",
"A_Loss: 19142413516.8 C_Loss: 129969.633203125\n",
"EP OVER!\n",
"A_Loss: 5016769467.733334 C_Loss: 9036.797916666666\n",
"EP 21 START\n",
"A_Loss: 18806764066.133335 C_Loss: 14859.599609375\n",
"EP OVER!\n",
"A_Loss: 8090647210.666667 C_Loss: 6834.12041015625\n",
"EP 22 START\n",
"A_Loss: 18050034210.133335 C_Loss: 15550.104427083334\n",
"EP OVER!\n",
"A_Loss: 84.65721384684245 C_Loss: 9692.593880208333\n",
"EP 23 START\n",
"A_Loss: 11287560055.466667 C_Loss: 10906.8736328125\n",
"EP OVER!\n",
"A_Loss: 9644667630.933332 C_Loss: 8087.964127604167\n",
"EP 24 START\n",
"A_Loss: 3744549939.2 C_Loss: 5294.3908203125\n",
"EP OVER!\n",
"A_Loss: 263749590.4 C_Loss: 1567.651505533854\n",
"EP 25 START\n",
"A_Loss: 5730180334.933333 C_Loss: 3254.1029459635415\n",
"EP OVER!\n",
"A_Loss: 4258283246.9333334 C_Loss: 1893.0966227213542\n",
"EP 26 START\n",
"A_Loss: 2880076526.9333334 C_Loss: 1959.542569986979\n",
"EP OVER!\n",
"A_Loss: 3309443618.133333 C_Loss: 816.9943277994792\n",
"EP 27 START\n",
"A_Loss: 1501129864.5333333 C_Loss: 750.1781555175781\n",
"EP OVER!\n",
"A_Loss: 1310018705.0666666 C_Loss: 259.69133911132815\n",
"EP 28 START\n",
"A_Loss: 587718690.1333333 C_Loss: 433.7637430826823\n",
"EP OVER!\n",
"A_Loss: 532166058.6666667 C_Loss: 279.7773742675781\n",
"EP 29 START\n",
"A_Loss: 70524670.93333334 C_Loss: 65.66910146077474\n",
"EP OVER!\n",
"A_Loss: 73527637.06666666 C_Loss: 89.10262959798177\n",
"EP 30 START\n",
"A_Loss: 144336381.86666667 C_Loss: 11965969993490.188\n",
"EP OVER!\n",
"A_Loss: 1655646916881.0667 C_Loss: 10321726704401.066\n",
"EP 31 START\n",
"A_Loss: 41522109.46666667 C_Loss: 2869060630042.067\n",
"EP OVER!\n",
"A_Loss: -64.74283854166667 C_Loss: 16231.0900390625\n",
"EP 32 START\n",
"A_Loss: 3817272.5 C_Loss: 25434.281510416666\n",
"EP OVER!\n",
"A_Loss: 279540269.8666667 C_Loss: 10897.152734375\n",
"EP 33 START\n",
"A_Loss: -88.46073099772136 C_Loss: 19774.261588541667\n",
"EP OVER!\n",
"A_Loss: 400722821.04817367 C_Loss: 7724.169270833333\n",
"EP 34 START\n",
"A_Loss: 1930250.1738263448 C_Loss: 12444.766927083334\n",
"EP OVER!\n",
"A_Loss: 37003849.2 C_Loss: 8640.598307291666\n",
"EP 35 START\n",
"A_Loss: 55674022.4 C_Loss: 14830.487825520833\n",
"EP OVER!\n",
"A_Loss: 78330882.4 C_Loss: 7793.051595052083\n",
"EP 36 START\n",
"A_Loss: 10209805.2 C_Loss: 11949599399.99375\n",
"EP OVER!\n",
"A_Loss: 48724937985574.92 C_Loss: 17860793484356.266\n",
"EP 37 START\n",
"A_Loss: 4330507.568489583 C_Loss: 223605282927.2875\n",
"EP OVER!\n",
"A_Loss: 7667.229915364584 C_Loss: 57925.12994791667\n",
"EP 38 START\n",
"A_Loss: 184.12769877115886 C_Loss: 7122.831803385417\n",
"EP OVER!\n",
"A_Loss: 15510.455403645834 C_Loss: 52148.67734375\n",
"EP 39 START\n",
"A_Loss: 5272.286588541667 C_Loss: 286373.10833333334\n",
"EP OVER!\n",
"A_Loss: 2.6445246855417888 C_Loss: 4984.467862955729\n",
"EP 40 START\n",
"A_Loss: 5228.9494140625 C_Loss: 366411.77291666664\n",
"EP OVER!\n",
"A_Loss: -288.28272705078126 C_Loss: 12667.800716145834\n",
"EP 41 START\n",
"A_Loss: 19474.684244791668 C_Loss: 45928038184215.41\n",
"EP OVER!\n",
"A_Loss: 2104.5810709635416 C_Loss: 276810.7479166667\n",
"EP 42 START\n",
"A_Loss: 61.346117909749346 C_Loss: 24438.237760416665\n",
"EP OVER!\n",
"A_Loss: 14208.790559895833 C_Loss: 220833.20625\n",
"EP 43 START\n",
"A_Loss: 5357.714680989583 C_Loss: 332224.36041666666\n",
"EP OVER!\n",
"A_Loss: 85.4434425354004 C_Loss: 6307.7271484375\n",
"EP 44 START\n",
"A_Loss: 122.99782867431641 C_Loss: 19919.213411458335\n",
"EP OVER!\n",
"A_Loss: 15087.8431640625 C_Loss: 161420.19166666668\n",
"EP 45 START\n",
"A_Loss: 1674.8425455729166 C_Loss: 309222.24166666664\n",
"EP OVER!\n",
"A_Loss: 1518.2337646484375 C_Loss: 349629.71041666664\n",
"EP 46 START\n",
"A_Loss: 2379.195491536458 C_Loss: 364705.35833333334\n",
"EP OVER!\n",
"A_Loss: 209796462.2 C_Loss: 13010572456.520052\n",
"EP 47 START\n",
"A_Loss: -2574.44921875 C_Loss: 39260709.6\n",
"EP OVER!\n",
"A_Loss: -255952.503125 C_Loss: 1636249529955.0833\n",
"EP 48 START\n",
"A_Loss: 607266.8805664063 C_Loss: 40067520562.25208\n",
"EP OVER!\n",
"A_Loss: 2080.8306803385417 C_Loss: 175271.12291666667\n",
"EP 49 START\n",
"A_Loss: 592.0791585286458 C_Loss: 303988.81666666665\n",
"EP OVER!\n",
"A_Loss: 6402.670670572917 C_Loss: 52915.89296875\n",
"EP 50 START\n",
"A_Loss: 1395209.9906412761 C_Loss: 47937.68541666667\n",
"EP OVER!\n",
"A_Loss: 5355160.362174479 C_Loss: 46913.726822916666\n",
"EP 51 START\n",
"A_Loss: 131.98586934407552 C_Loss: 131763.49583333332\n",
"EP OVER!\n",
"A_Loss: 545.0480163574218 C_Loss: 177076.325\n",
"EP 52 START\n",
"A_Loss: -295055.39791666664 C_Loss: 21676655815489.375\n",
"EP OVER!\n",
"A_Loss: 96046800076.8 C_Loss: 2559427411968.0\n",
"EP 53 START\n",
"A_Loss: 23993694.0 C_Loss: 400391016517.3333\n",
"EP OVER!\n",
"A_Loss: 93352.0390625 C_Loss: 619253805875.2\n",
"EP 54 START\n",
"A_Loss: 2732956.3833333333 C_Loss: 46527696896.0\n",
"EP OVER!\n",
"A_Loss: 39510.85885416667 C_Loss: 28781911654.4\n",
"EP 55 START\n",
"A_Loss: -29427.872395833332 C_Loss: 5438102272.0\n",
"EP OVER!\n",
"A_Loss: 90085.98177083333 C_Loss: 3624150245.866667\n",
"EP 56 START\n",
"A_Loss: -498753.30625 C_Loss: 823590289405.8667\n",
"EP OVER!\n",
"A_Loss: 10569771.733333332 C_Loss: 2970159109461.3335\n",
"EP 57 START\n",
"A_Loss: -50644.94348958333 C_Loss: 646384501282.1333\n",
"EP OVER!\n",
"A_Loss: 23941.277994791668 C_Loss: 53731624072.53333\n",
"EP 58 START\n",
"A_Loss: -199527.115625 C_Loss: 2830466387626.6665\n",
"EP OVER!\n",
"A_Loss: 5080.106127929687 C_Loss: 1123213416857.6\n",
"EP 59 START\n",
"A_Loss: 854754.9208333333 C_Loss: 89255951291.73334\n",
"EP OVER!\n",
"A_Loss: -6642.098014322916 C_Loss: 2435903428.266667\n",
"EP 60 START\n",
"A_Loss: 21668.172135416666 C_Loss: 4344311575859.2\n",
"EP OVER!\n",
"A_Loss: -2874.193896484375 C_Loss: 1691038327.4666667\n",
"EP 61 START\n",
"A_Loss: 46610.647135416664 C_Loss: 355761074.1333333\n",
"EP OVER!\n",
"A_Loss: 174212.034375 C_Loss: 49708919.233333334\n",
"EP 62 START\n",
"A_Loss: 135704.40625 C_Loss: 47006909.06666667\n",
"EP OVER!\n",
"A_Loss: 10387.477018229167 C_Loss: 51938460.8\n",
"EP 63 START\n",
"A_Loss: 594.3608805338541 C_Loss: 16747537224969.105\n",
"EP OVER!\n",
"A_Loss: 109.09736455281576 C_Loss: 4015293.466666667\n",
"EP 64 START\n",
"A_Loss: 2528.45546875 C_Loss: 5244184.566666666\n",
"EP OVER!\n",
"A_Loss: 228.57803039550782 C_Loss: 4565261.5\n",
"EP 65 START\n",
"A_Loss: 576.867138671875 C_Loss: 690111.6791666667\n",
"EP OVER!\n",
"A_Loss: 2555.884700520833 C_Loss: 1223311.4583333333\n",
"EP 66 START\n",
"A_Loss: 1392.1302897135417 C_Loss: 4376503.633333334\n",
"EP OVER!\n",
"A_Loss: 3898.088427734375 C_Loss: 2490652.9833333334\n",
"EP 67 START\n",
"A_Loss: 456.2262491861979 C_Loss: 1098666.3\n",
"EP OVER!\n",
"A_Loss: -223.93345540364584 C_Loss: 2048068.5666666667\n",
"EP 68 START\n",
"A_Loss: -439.01691080729165 C_Loss: 2945374.433333333\n",
"EP OVER!\n",
"A_Loss: -24.373832575480144 C_Loss: 343411.81875\n",
"EP 69 START\n",
"A_Loss: 5111.536979166666 C_Loss: 10348606653210.434\n",
"EP OVER!\n",
"A_Loss: -38.84524319966634 C_Loss: 1990322.725\n",
"EP 70 START\n",
"A_Loss: 951.8831461588542 C_Loss: 223227.01875\n",
"EP OVER!\n",
"A_Loss: 172.96959330240887 C_Loss: 1922374.9416666667\n",
"EP 71 START\n",
"A_Loss: 39.82610600789388 C_Loss: 533955.7208333333\n",
"EP OVER!\n",
"A_Loss: 620.001796468099 C_Loss: 282419.71979166666\n",
"EP 72 START\n",
"A_Loss: 10333.698046875 C_Loss: 2511066.466666667\n",
"EP OVER!\n",
"A_Loss: -83.75911153157553 C_Loss: 1593542.8833333333\n",
"EP 73 START\n",
"A_Loss: 1590.978955078125 C_Loss: 931809.2333333333\n",
"EP OVER!\n",
"A_Loss: 189.47867533365886 C_Loss: 1312441.1333333333\n",
"EP 74 START\n",
"A_Loss: -4686.18564453125 C_Loss: 8461484.133333333\n",
"EP OVER!\n",
"A_Loss: 152230.85208333333 C_Loss: 26483003486139.734\n",
"EP 75 START\n",
"A_Loss: 543.5623474121094 C_Loss: 676008774.1916667\n",
"EP OVER!\n",
"A_Loss: 72.8592913309733 C_Loss: 646183.675\n",
"EP 76 START\n",
"A_Loss: -111.36540323893229 C_Loss: 1268551.3833333333\n",
"EP OVER!\n",
"A_Loss: -86.98175252278646 C_Loss: 1884845.4166666667\n",
"EP 77 START\n",
"A_Loss: 9.363872464497884 C_Loss: 574981.4\n",
"EP OVER!\n",
"A_Loss: 10.75021572113037 C_Loss: 1757512.1051269532\n",
"EP 78 START\n",
"A_Loss: -91.82536315917969 C_Loss: 1116194.3125\n",
"EP OVER!\n",
"A_Loss: -2419.4617513020835 C_Loss: 9647680415.9\n",
"EP 79 START\n",
"A_Loss: 12534.184114583333 C_Loss: 125233972.73333333\n",
"EP OVER!\n",
"A_Loss: -12864.7326171875 C_Loss: 32251742043101.867\n",
"EP 80 START\n",
"A_Loss: 69521.21354166667 C_Loss: 239759950.93333334\n",
"EP OVER!\n",
"A_Loss: 1941.87587890625 C_Loss: 3182419.7\n",
"EP 81 START\n",
"A_Loss: 48.873761876424155 C_Loss: 2138957.5\n",
"EP OVER!\n",
"A_Loss: -172.59212137858074 C_Loss: 1479709.7\n",
"EP 82 START\n",
"A_Loss: -217.69344889322917 C_Loss: 1724748.0333333334\n",
"EP OVER!\n",
"A_Loss: 8.864885139465333 C_Loss: 2801.1948404947916\n",
"EP 83 START\n",
"A_Loss: -23.036806042989095 C_Loss: 487250.31875\n",
"EP OVER!\n",
"A_Loss: -85.1435302734375 C_Loss: 459072.14375\n",
"EP 84 START\n",
"A_Loss: -159.0248555501302 C_Loss: 704506.4\n",
"EP OVER!\n",
"A_Loss: 391660.59375 C_Loss: 185407429.55\n",
"EP 85 START\n",
"A_Loss: 11304412.4 C_Loss: 436438987025.06665\n",
"EP OVER!\n",
"A_Loss: -508119.4666666667 C_Loss: 27526751540565.332\n",
"EP 86 START\n",
"A_Loss: 8.469103686014812 C_Loss: 35030720904.04556\n",
"EP OVER!\n",
"A_Loss: 79.07336120605468 C_Loss: 2628.575537109375\n",
"EP 87 START\n",
"A_Loss: 87.94131622314453 C_Loss: 3291.388395182292\n",
"EP OVER!\n",
"A_Loss: 63.07492167154948 C_Loss: 3989.5349609375\n",
"EP 88 START\n",
"A_Loss: 9.08399314880371 C_Loss: 3549.7614908854166\n",
"EP OVER!\n",
"A_Loss: 47.93385569254557 C_Loss: 1957.0295735677084\n",
"EP 89 START\n",
"A_Loss: 76.6830576578776 C_Loss: 4872.250081380208\n",
"EP OVER!\n",
"A_Loss: 77.35536549886068 C_Loss: 1778.1133951822917\n",
"EP 90 START\n",
"A_Loss: 26.445547739664715 C_Loss: 1201.9427978515625\n",
"EP OVER!\n",
"A_Loss: 37.380027770996094 C_Loss: 2120.48583984375\n",
"EP 91 START\n",
"A_Loss: 62.73122533162435 C_Loss: 1910.77255859375\n",
"EP OVER!\n",
"A_Loss: 7.070840994517009 C_Loss: 2663.728580729167\n",
"Model's Weights Saved\n",
"New Record! Save NN 36.159999914467335\n",
"EP 92 START\n",
"A_Loss: 83.22457122802734 C_Loss: 4142.598046875\n",
"EP OVER!\n",
"A_Loss: -2965967.5 C_Loss: 89986176198316.5\n",
"EP 93 START\n",
"A_Loss: 5039913.333333333 C_Loss: 11098771021277.867\n",
"EP OVER!\n",
"A_Loss: 15480.3697265625 C_Loss: 119004974489.6\n",
"EP 94 START\n",
"A_Loss: -16268.488020833332 C_Loss: 2171354205.866667\n",
"EP OVER!\n",
"A_Loss: 25105.733463541666 C_Loss: 180378952.53333333\n",
"EP 95 START\n",
"A_Loss: -356.90814819335935 C_Loss: 4823839.716666667\n",
"EP OVER!\n",
"A_Loss: -103.6774673461914 C_Loss: 1091913.1416666666\n",
"EP 96 START\n",
"A_Loss: 132.41912180582682 C_Loss: 3535627.6166666667\n",
"EP OVER!\n",
"A_Loss: -250.67224629720053 C_Loss: 2047510.1\n",
"EP 97 START\n",
"A_Loss: 191.60374145507814 C_Loss: 2080957.1833333333\n",
"EP OVER!\n",
"A_Loss: 317.184033203125 C_Loss: 1221589.425\n",
"EP 98 START\n",
"A_Loss: -39.01324615478516 C_Loss: 998958.2208333333\n",
"EP OVER!\n",
"A_Loss: -190.1520253499349 C_Loss: 1742518.1\n",
"EP 99 START\n",
"A_Loss: 311.61248372395835 C_Loss: 620184.1416666667\n",
"EP OVER!\n",
"A_Loss: -264.8520751953125 C_Loss: 1779512.7666666666\n",
"EP 100 START\n",
"A_Loss: -94.42798614501953 C_Loss: 1547244.1833333333\n",
"EP OVER!\n",
"A_Loss: -153766.15104166666 C_Loss: 33501732032887.137\n",
"EP 101 START\n",
"A_Loss: 60208.17265625 C_Loss: 27465943313.066666\n",
"EP OVER!\n",
"A_Loss: 17862.422265625 C_Loss: 988074240.0\n",
"EP 102 START\n",
"A_Loss: 28635.012890625 C_Loss: 144180219.2\n",
"EP OVER!\n",
"A_Loss: 265.3567850748698 C_Loss: 2369780.45\n",
"EP 103 START\n",
"A_Loss: -17.382435480753582 C_Loss: 726623.9833333333\n",
"EP OVER!\n",
"A_Loss: 16.159270922342937 C_Loss: 124937.20364583333\n",
"EP 104 START\n",
"A_Loss: 180.78487548828124 C_Loss: 2608104.4\n",
"EP OVER!\n",
"A_Loss: -472.0964823404948 C_Loss: 3132594.6666666665\n",
"EP 105 START\n",
"A_Loss: -245.632177734375 C_Loss: 1839736.6333333333\n",
"EP OVER!\n",
"A_Loss: -84.94388631184896 C_Loss: 788294.0833333334\n",
"EP 106 START\n",
"A_Loss: -344.7350769042969 C_Loss: 2457982.183333333\n",
"EP OVER!\n",
"A_Loss: 247.77904154459637 C_Loss: 191985.88020833334\n",
"EP 107 START\n",
"A_Loss: -93.01107330322266 C_Loss: 1014136.5958333333\n",
"EP OVER!\n",
"A_Loss: -301.968603515625 C_Loss: 2097648.5416666665\n",
"EP 108 START\n",
"A_Loss: -355.8355692545573 C_Loss: 2356025.3333333335\n",
"EP OVER!\n",
"A_Loss: -176.879106648763 C_Loss: 905950.4666666667\n",
"EP 109 START\n",
"A_Loss: 2279212.8916666666 C_Loss: 5355236439927.467\n",
"EP OVER!\n",
"A_Loss: 230154.93229166666 C_Loss: 21897054401331.2\n",
"EP 110 START\n",
"A_Loss: 204.3412821451823 C_Loss: 11179356820.975\n",
"EP OVER!\n",
"A_Loss: -44.459004465738936 C_Loss: 606090.0875\n",
"EP 111 START\n",
"A_Loss: -55.804770151774086 C_Loss: 453019.00625\n",
"EP OVER!\n",
"A_Loss: 437.7322265625 C_Loss: 531331.7\n",
"EP 112 START\n",
"A_Loss: -176.73267822265626 C_Loss: 1254341.2416666667\n",
"EP OVER!\n",
"A_Loss: -9.164166609446207 C_Loss: 116005.52552083334\n",
"EP 113 START\n",
"A_Loss: 57.09782918294271 C_Loss: 590456.1916666667\n",
"EP OVER!\n",
"A_Loss: -28.08950449625651 C_Loss: 368954.27291666664\n",
"EP 114 START\n",
"A_Loss: -141.4985616048177 C_Loss: 545265.2583333333\n",
"EP OVER!\n",
"A_Loss: 196.88966267903646 C_Loss: 262787.0489583333\n",
"EP 115 START\n",
"A_Loss: 9875956.133333333 C_Loss: 3146464274264.442\n",
"EP OVER!\n",
"A_Loss: 74947.0171875 C_Loss: 1476110921523.2\n",
"EP 116 START\n",
"A_Loss: -147298.26770833333 C_Loss: 7414750106920.533\n",
"EP OVER!\n",
"A_Loss: -61223.323958333334 C_Loss: 166082947754.66666\n",
"EP 117 START\n",
"A_Loss: 155204.75208333333 C_Loss: 54631106969.6\n",
"EP OVER!\n",
"A_Loss: -5608.896809895833 C_Loss: 239498007.5\n",
"EP 118 START\n",
"A_Loss: 64438.151041666664 C_Loss: 1624584972.8\n",
"EP OVER!\n",
"A_Loss: 2922.034130859375 C_Loss: 6422857095816.533\n",
"EP 119 START\n",
"A_Loss: 4014.168212890625 C_Loss: 72697799.46666667\n",
"EP OVER!\n",
"A_Loss: 1831.806486002604 C_Loss: 35837436.0\n",
"EP 120 START\n",
"A_Loss: 10667.054622395834 C_Loss: 26391209.166666668\n",
"EP OVER!\n",
"A_Loss: 127434.12447916667 C_Loss: 182761787114.13333\n",
"EP 121 START\n",
"A_Loss: 143913.98645833333 C_Loss: 1208985656729.6\n",
"EP OVER!\n",
"A_Loss: 16578.108658854166 C_Loss: 1837287090.1333334\n",
"EP 122 START\n",
"A_Loss: -117467.396875 C_Loss: 1478863227690.6667\n",
"EP OVER!\n",
"A_Loss: -70148.46614583333 C_Loss: 5382309429111.467\n",
"EP 123 START\n",
"A_Loss: -49.01672922770182 C_Loss: 3541015449.633333\n",
"EP OVER!\n",
"A_Loss: -40.59830932617187 C_Loss: 352259.06875\n",
"EP 124 START\n",
"A_Loss: -95.25742848714192 C_Loss: 858177.2\n",
"EP OVER!\n",
"A_Loss: -52.94597473144531 C_Loss: 293412.5625\n",
"EP 125 START\n",
"A_Loss: -225.69961649576823 C_Loss: 748738.2375\n",
"EP OVER!\n",
"A_Loss: -4.78879960378011 C_Loss: 123586.36875\n",
"EP 126 START\n",
"A_Loss: -201.62482401529948 C_Loss: 720683.4833333333\n",
"EP OVER!\n",
"A_Loss: -21.21190528869629 C_Loss: 35503.817057291664\n",
"EP 127 START\n",
"A_Loss: 152.8080861409505 C_Loss: 41394907675409.0\n",
"EP OVER!\n",
"A_Loss: 3.5420272588729858 C_Loss: 95929.18802083333\n",
"EP 128 START\n",
"A_Loss: -2.3434098323186237 C_Loss: 43473.105729166666\n",
"EP OVER!\n",
"A_Loss: -31.295360438028972 C_Loss: 90783.04791666666\n",
"EP 129 START\n",
"A_Loss: 361140.66041666665 C_Loss: 72744.59609375\n",
"EP OVER!\n",
"A_Loss: 2418.9001627604166 C_Loss: 3928226928.5666666\n",
"EP 130 START\n",
"A_Loss: 11.188031895955403 C_Loss: 146395.30833333332\n",
"EP OVER!\n",
"A_Loss: -288.8839884440104 C_Loss: 726967.2367838542\n",
"EP 131 START\n",
"A_Loss: 228.3966074625651 C_Loss: 1645105.6708333334\n",
"EP OVER!\n",
"A_Loss: 56.98198420206706 C_Loss: 80022.94166666667\n",
"EP 132 START\n",
"A_Loss: 32.61619491577149 C_Loss: 13436.4310546875\n",
"EP OVER!\n",
"A_Loss: -3.019519786039988 C_Loss: 26069.423307291665\n",
"EP 133 START\n",
"A_Loss: 5.228196366628011 C_Loss: 25157.172005208333\n",
"EP OVER!\n",
"A_Loss: 13.022575950622558 C_Loss: 3747.759977213542\n",
"EP 134 START\n",
"A_Loss: -80517.8953125 C_Loss: 15650603660651.291\n",
"EP OVER!\n",
"A_Loss: 16014.4521484375 C_Loss: 9164626090.666666\n",
"EP 135 START\n",
"A_Loss: 117133.68125 C_Loss: 87002011844.26666\n",
"EP OVER!\n",
"A_Loss: 15981.832682291666 C_Loss: 47913645943.46667\n",
"EP 136 START\n",
"A_Loss: 19723.941666666666 C_Loss: 610387874.1333333\n",
"EP OVER!\n",
"A_Loss: -6.982747983932495 C_Loss: 42435512.56666667\n",
"EP 137 START\n",
"A_Loss: -177.51118876139324 C_Loss: 1209231.775\n",
"EP OVER!\n",
"A_Loss: -18.883594957987466 C_Loss: 646092.9708333333\n",
"EP 138 START\n",
"A_Loss: 152.13518269856772 C_Loss: 282594.4010416667\n",
"EP OVER!\n",
"A_Loss: 18.2394229888916 C_Loss: 124778.96979166666\n",
"EP 139 START\n",
"A_Loss: 165.62351684570314 C_Loss: 132396.28125\n",
"EP OVER!\n",
"A_Loss: -245.43899637858073 C_Loss: 928183.8291666667\n",
"EP 140 START\n",
"A_Loss: -104456.69375 C_Loss: 1376307509015.4375\n",
"EP OVER!\n",
"A_Loss: 9644.121158854166 C_Loss: 66630244983.46667\n",
"EP 141 START\n",
"A_Loss: -92.3751708984375 C_Loss: 142594040.71354166\n",
"EP OVER!\n",
"A_Loss: 29.573409907023112 C_Loss: 46747.54921875\n",
"EP 142 START\n",
"A_Loss: 1008713.7708333334 C_Loss: 20930468997730.31\n",
"EP OVER!\n",
"A_Loss: -80090.73802083333 C_Loss: 1022462403106.1333\n",
"EP 143 START\n",
"A_Loss: -23560.799739583334 C_Loss: 32153040110.933334\n",
"EP OVER!\n",
"A_Loss: 3245.091129557292 C_Loss: 950403610.1333333\n",
"EP 144 START\n",
"A_Loss: 184.1011952718099 C_Loss: 1113755.1979166667\n",
"EP OVER!\n",
"A_Loss: 963.6497111002604 C_Loss: 1592140.8416666666\n",
"EP 145 START\n",
"A_Loss: 315.7770711263021 C_Loss: 494445.6979166667\n",
"EP OVER!\n",
"A_Loss: 440.2256103515625 C_Loss: 490657.3229166667\n",
"EP 146 START\n"
"A_Loss: 0.4477495511372884 C_Loss: 3.155759557088216\n",
"A_Loss: 0.14549287557601928 C_Loss: 0.5123071213563283\n",
"A_Loss: 0.055241942902406055 C_Loss: 0.13002794484297434\n",
"A_Loss: 0.057325509190559384 C_Loss: 0.11068039039770762\n",
"A_Loss: 0.04376962607105573 C_Loss: 0.03923700377345085\n"
"source": [
"bestScore = 200.\n",
"bestScore = 200.0\n",
"stopTrainCounter = 0\n",
"totalRewardHis = []\n",
@ -787,60 +196,79 @@
"maxTotalReward = -99999999999\n",
"for ep in range(MAX_EP):\n",
" print(\"EP \",ep,\" START\")\n",
" print(\"EP \", ep, \" START\")\n",
" # first time run game\n",
" s,_,_,_,_ = env.reset()\n",
" if (ep == 0):\n",
" s, _, _, _, _ = env.reset()\n",
" if ep == 0:\n",
" epBuffer = buffer.buffer()\n",
" s = s.reshape([STATE_SIZE])\n",
" agent = PPO.PPO(stateSize=STATE_SIZE,\n",
" disActShape=disActShape,\n",
" conActSize=1,\n",
" conActRange=CTN_ACTION_RANGE,\n",
" criticLR=CRITIC_LR,\n",
" actorLR=ACTOR_LR,\n",
" gamma=GAMMA,\n",
" epsilon=EPSILON,\n",
" entropyWeight=ENTROPY_WHEIGHT,\n",
" saveDir=SAVE_DIR,\n",
" loadModelDir=LOAD_DIR)\n",
" agent = PPO.PPO(\n",
" stateSize=STATE_SIZE,\n",
" disActShape=disActShape,\n",
" conActSize=1,\n",
" conActRange=CTN_ACTION_RANGE,\n",
" criticLR=CRITIC_LR,\n",
" actorLR=ACTOR_LR,\n",
" gamma=GAMMA,\n",
" epsilon=EPSILON,\n",
" entropyWeight=ENTROPY_WHEIGHT,\n",
" saveDir=SAVE_DIR,\n",
" loadModelDir=LOAD_DIR,\n",
" )\n",
" step = 0\n",
" done = False\n",
" stopTrainCounter -= 1\n",
" epHis.append(ep)\n",
" \n",
" # reset total reward\n",
" epTotalReward = 0\n",
" \n",
" # Recorder list\n",
" epStepHis = []\n",
" epRewardHis = []\n",
" epActorLossHis = []\n",
" epCriticLossHis = []\n",
" \n",
" # save weight immediately?\n",
" saveNow = 0;\n",
" saveNow = 0\n",
" while not done:\n",
" step += 1\n",
" if step % ACTION_INTERVAL == 0: # take action every ACTION_INTERVAL steps\n",
" if (\n",
" step % ACTION_INTERVAL == 0\n",
" ): # take action every ACTION_INTERVAL steps\n",
" epStepHis.append(step)\n",
" disAct1,disAct2,disAct3,conAct,predictResult = agent.chooseAction(s)\n",
" kW, kS, kA, kD, mouseShoot, mouseMove = actToKey(disAct1,disAct2,disAct3,conAct)\n",
" \n",
" nextState,thisReward,done,_,saveNow = env.step(discreteActions=np.array([[kW, kS, kA, kD, mouseShoot]]),continuousActions=np.array([[mouseMove]]))\n",
" (\n",
" disAct1,\n",
" disAct2,\n",
" disAct3,\n",
" conAct,\n",
" predictResult,\n",
" ) = agent.chooseAction(s)\n",
" kW, kS, kA, kD, mouseShoot, mouseMove = actToKey(\n",
" disAct1, disAct2, disAct3, conAct\n",
" )\n",
" nextState, thisReward, done, _, saveNow = env.step(\n",
" discreteActions=np.array([[kW, kS, kA, kD, mouseShoot]]),\n",
" continuousActions=np.array([[mouseMove]]),\n",
" )\n",
" epTotalReward += thisReward\n",
" epBuffer.saveBuffers(s,[disAct1,disAct2,disAct3,conAct],thisReward)\n",
" epBuffer.saveBuffers(\n",
" s, [disAct1, disAct2, disAct3, conAct], thisReward\n",
" )\n",
" else:\n",
" disActs = np.array([[0,0,0,0,0]])\n",
" disActs = np.array([[0, 0, 0, 0, 0]])\n",
" conActs = np.array([[0]])\n",
" nextState,thisReward,done,_,saveNow = env.step(discreteActions=disActs,continuousActions=conActs)\n",
" nextState, thisReward, done, _, saveNow = env.step(\n",
" discreteActions=disActs, continuousActions=conActs\n",
" )\n",
" epTotalReward += thisReward\n",
" nextState = nextState.reshape([STATE_SIZE])\n",
" s = nextState\n",
" \n",
" if done:\n",
" print(\"EP OVER!\")\n",
" if saveNow != 0:\n",
@ -848,25 +276,26 @@
" saveNow = 0\n",
" agent.saveWeights()\n",
" # update PPO after Batch step or GameOver\n",
" if (step+1)%BATCH == 0 or done:\n",
" if (step + 1) % BATCH == 0 or done:\n",
" bs = epBuffer.getStates()\n",
" ba = epBuffer.getActions()\n",
" br = epBuffer.getRewards()\n",
" epBuffer.clearBuffer()\n",
" if TRAIN:\n",
" epActorLoss,epCriticLoss = agent.trainCritcActor(bs,ba,br,s,CRITIC_EPOCH,ACTOR_EPOCH)\n",
" epActorLoss, epCriticLoss = agent.trainCritcActor(\n",
" bs, ba, br, s, CRITIC_EPOCH, ACTOR_EPOCH\n",
" )\n",
" epActorLossHis.append(epActorLoss)\n",
" epCriticLossHis.append(epCriticLoss)\n",
" # update History Recorder\n",
" totalActorLossHis.append(np.mean(epActorLossHis))\n",
" totalCriticLossHis.append(np.mean(epCriticLossHis))\n",
" totalRewardHis.append(epTotalReward)\n",
" \n",
" if (epTotalReward > maxTotalReward and epTotalReward != 0):\n",
" if epTotalReward > maxTotalReward and epTotalReward != 0:\n",
" maxTotalReward = epTotalReward\n",
" agent.saveWeights(epTotalReward)\n",
" print(\"New Record! Save NN\",epTotalReward)\n",
" "
" print(\"New Record! Save NN\", epTotalReward)\n"

View File

@ -1,7 +1,17 @@
import numpy as np
class rewardAI(object):
def __init__(self,nonReward, shootReward, shootWithoutReadyReward, hitReward, winReward, loseReward, killReward):
def __init__(
self.nonReward = nonReward
self.shootReward = shootReward
self.shootWithoutReadyReward = shootWithoutReadyReward
@ -9,8 +19,8 @@ class rewardAI(object):
self.winReward = winReward
self.loseReward = loseReward
self.killReward = killReward
def getRewards(self,remainTime):
def getRewards(self, remainTime):
nonR = self.getnonReward()
shootR = self.getshootReward()
shootWithoutReadyR = self.getshootWithoutReadyReward()
@ -18,27 +28,30 @@ class rewardAI(object):
winR = self.getwinReward(remainTime)
loseR = self.getloseReward()
killR = self.getkillReward(remainTime)
rewards = np.array([[nonR,
killR]], dtype=np.float)
rewards = np.array(
[[nonR, shootR, shootWithoutReadyR, hitR, winR, loseR, killR]],
return rewards
def getnonReward (self):
def getnonReward(self):
return self.nonReward
def getshootReward(self):
return self.shootReward
def getshootWithoutReadyReward(self):
return self.shootWithoutReadyReward
def gethitReward(self):
return self.hitReward
def getwinReward(self,time):
return (self.winReward + time)
def getwinReward(self, time):
return self.winReward + time
def getloseReward(self):
return self.loseReward
def getkillReward(self,time):
return (self.killReward + time)
def getkillReward(self, time):
return self.killReward + time

View File

@ -19,12 +19,12 @@
"source": [
"import numpy as np\n",
"state = np.array([[1,2,3],[1,2,3]])\n",
"state = np.array([[1, 2, 3], [1, 2, 3]])\n",
"aaa = np.array([[123]])\n",
"state[:, -1]\n",
"np.append([[1, 2, 3]], [[7, 8, 9, 10]], axis=1)"
"np.append([[1, 2, 3]], [[7, 8, 9, 10]], axis=1)\n"
@ -36,22 +36,22 @@
"import tensorflow as tf\n",
"import numpy as np\n",
"aa = tf.constant([[1,2,3],[1,2,3],[1,2,3],[1,2,3]])\n",
"bb = tf.constant([1,2,3,4])\n",
"aa = tf.constant([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]])\n",
"bb = tf.constant([1, 2, 3, 4])\n",
"print(tf.expand_dims(bb, axis=1))\n",
"cc = tf.math.multiply(aa,tf.expand_dims(bb,axis = 1))\n",
"cc = tf.math.multiply(aa, tf.expand_dims(bb, axis=1))\n",
"print(aa[:, 2:3])\n",
"aa = tf.constant([1.0,2.0,3.0,np.nan])\n",
"aa = tf.constant([1.0, 2.0, 3.0, np.nan])\n",
"if np.any(tf.math.is_nan(aa)):\n",
" print('true')"
" print(\"true\")\n"
@ -62,11 +62,13 @@
"source": [
"import tensorflow as tf\n",
"prob = tf.constant([0.3,0.3,0.])\n",
"prob = tf.constant([0.3, 0.3, 0.0])\n",
"entropy = tf.reduce_mean(tf.math.negative(tf.math.multiply(prob,tf.math.log(prob))))\n",
"entropy = tf.reduce_mean(\n",
" tf.math.negative(tf.math.multiply(prob, tf.math.log(prob)))\n",
@ -87,25 +89,31 @@
"source": [
"from functools import singledispatchmethod\n",
"class person:\n",
" @singledispatchmethod\n",
" def age(self,arg):\n",
" def age(self, arg):\n",
" print(\"error\")\n",
" @age.register(int)\n",
" def _(self,arg:int):\n",
" print(\"int\",arg)\n",
" def _(self, arg: int):\n",
" print(\"int\", arg)\n",
" @age.register(str)\n",
" def _(self,arg:str):\n",
" print(\"str\",arg)\n",
" def _(self, arg: str):\n",
" print(\"str\", arg)\n",
" @age.register(bool)\n",
" def _(self,arg:bool):\n",
" print(\"bool\",arg)\n",
" def _(self, arg: bool):\n",
" print(\"bool\", arg)\n",
"p = person()\n",
"p.age(23) # int\n",
"p.age('twenty three') # str\n",
"p.age(\"twenty three\") # str\n",
"p.age(True) # bool\n",
"p.age(['23']) # list\n"
"p.age([\"23\"]) # list\n"
@ -117,6 +125,7 @@
"import tensorflow as tf\n",
"import tensorflow.keras.layers as layers\n",
"def conv_bn_relu(inputs, chs, reps):\n",
" x = inputs\n",
" for i in range(reps):\n",
@ -125,6 +134,7 @@
" x = layers.ReLU()(x)\n",
" return x\n",
"def create_model():\n",
" inputs = layers.Input((32, 32, 3))\n",
" x = conv_bn_relu(inputs, 64, 3)\n",
@ -136,27 +146,38 @@
" x = layers.Dense(10, activation=\"softmax\")(x)\n",
" return tf.keras.models.Model(inputs, x)\n",
"def perprocess(img, label):\n",
" img = tf.cast(img, tf.float32) / 255.0\n",
" label = tf.cast(label, tf.float32)\n",
" return img, label\n",
"def train():\n",
" (X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()\n",
" trainset =, y_train)\n",
" ).map(perprocess).shuffle(4096).batch(128).repeat().prefetch(50)\n",
" (X_train, y_train), (\n",
" X_test,\n",
" y_test,\n",
" ) = tf.keras.datasets.cifar10.load_data()\n",
" trainset = (\n",
", y_train))\n",
" .map(perprocess)\n",
" .shuffle(4096)\n",
" .batch(128)\n",
" .repeat()\n",
" .prefetch(50)\n",
" )\n",
" model = create_model()\n",
" model.compile(\"adam\", \"sparse_categorical_crossentropy\", [\"acc\"])\n",
", steps_per_epoch=50000//128, epochs=1) \n",
" # 'Adam/conv2d/kernel/m:0' shape=(3, 3, 3, 64) \n",
" print(model.optimizer.weights[1][0, 0, 0,:10])\n",
", steps_per_epoch=50000 // 128, epochs=1)\n",
" # 'Adam/conv2d/kernel/m:0' shape=(3, 3, 3, 64)\n",
" print(model.optimizer.weights[1][0, 0, 0, :10])\n",
" # <tf.Variable 'conv2d/kernel:0' shape=(3, 3, 3, 64)\n",
" print(model.weights[0][0, 0, 0,:10])\n",
" print(model.weights[0][0, 0, 0, :10])\n",
" model.save_weights(\"model_tf.ckpt\", save_format=\"tf\") # デフォルト\n",
" model.save_weights(\"model_h5.h5\", save_format=\"h5\") "
" model.save_weights(\"model_h5.h5\", save_format=\"h5\")\n"
@ -179,7 +200,7 @@
"source": [
@ -210,9 +231,11 @@
" model.load_weights(\"model_tf.ckpt\")\n",
" # これでようやくオプティマイザーの値も同一になる\n",
" print(model.weights[0][0, 0, 0,:10])\n",
" print(model.optimizer.weights[1][0, 0, 0,:10])\n",
" print(model.weights[0][0, 0, 0, :10])\n",
" print(model.optimizer.weights[1][0, 0, 0, :10])\n",
@ -234,14 +257,14 @@
"source": [
"import numpy as np\n",
"a = np.array([10,20,30,0])\n",
"a = np.array([10, 20, 30, 0])\n",
"np.any(a == 0)"
"np.any(a == 0)\n"
"cell_type": "code",
"execution_count": 29,
"execution_count": 1,
"metadata": {},
"outputs": [
@ -250,7 +273,7 @@
"execution_count": 29,
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"