Aimbot-PPO/Aimbot-PPO-Python/Pytorch/GAILRecorder.py
Koha9 4b8ffeac6d GAIL V0.1 save point
GAIL V0.1 save point
todo 
1.human action record(GAILMem) debug
2.gail debug
2022-12-04 08:42:10 +09:00

98 lines
2.6 KiB
Python

import time
import numpy as np
from AimbotEnv import Aimbot
from GAILMem import GAILMem
import keyboard
import mouse
import math
# Env
ENV_PATH = "../Build/HUMAN-ParallelEnv-Target-OffPolicy-SingleStack-SideChannel-EndReward-Easy/Aimbot-ParallelEnv"
WORKER_ID = 1
BASE_PORT = 200
# ENV Para
MOUSEDISCOUNT = 20.0
MAX_EP = 10000000
STACKSTATESIZE = 3
STACKINTERCE = 29
class HumanActions:
def __init__(self, mouseDiscount: float = 10, screenW: int = 1920, screenH: int = 1080):
def multiPressed():
pass
keyboard.add_hotkey("w+a", multiPressed)
keyboard.add_hotkey("w+d", multiPressed)
keyboard.add_hotkey("s+a", multiPressed)
keyboard.add_hotkey("s+d", multiPressed)
self.screenW = screenW
self.screenH = screenH
self.MOUSEDISCOUNT = mouseDiscount
self.mouseSmooth = 5
self.mouseMax = 10
def getHumanActions(self):
x, _ = mouse.get_position()
xMovement = (x - self.screenW / 2) / self.MOUSEDISCOUNT
xMovement = self.smoothMouseMovement(xMovement)
ws = 0
ad = 0
click = 0
if keyboard.is_pressed("w"):
ws = 1
elif keyboard.is_pressed("s"):
ws = 2
if keyboard.is_pressed("d"):
ad = 1
elif keyboard.is_pressed("a"):
ad = 2
if keyboard.is_pressed("w+d"):
ws = 1
ad = 1
elif keyboard.is_pressed("w+a"):
ws = 1
ad = 2
elif keyboard.is_pressed("s+d"):
ws = 2
ad = 1
elif keyboard.is_pressed("s+a"):
ws = 2
ad = 2
if keyboard.is_pressed("0"):
click = 1
actions = np.asarray([[ws, ad, click, xMovement]])
mouse.move(self.screenW / 2, self.screenH / 2)
return actions
def smoothMouseMovement(self, x: float):
out = (1 / (1 + math.exp(-x / self.mouseSmooth)) - 1 / 2) * self.mouseMax * 2
return out
if __name__ == "__main__":
env = Aimbot(
envPath=ENV_PATH,
workerID=WORKER_ID,
basePort=BASE_PORT,
side_channels=[],
)
demoMem = GAILMem(4)
demoAct = HumanActions(mouseDiscount=MOUSEDISCOUNT)
for ep in range(MAX_EP):
print("EP Start")
done = False
while not done:
actions = demoAct.getHumanActions()
nextState, r, done = env.step(actions=actions)
demoMem.saveMems(state=nextState, actorProb=None, action=actions, reward=None, done=None)
state = nextState
#nowMemNum = demoMem.memNum
saveSteps = 500
lastMemCheckPoint = 0