将Tensor改为tensor
Tensor与tensor的问题,规范化tensor使用。
This commit is contained in:
parent
52ccce88bc
commit
10a1663230
@ -112,7 +112,7 @@ if __name__ == "__main__":
|
|||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
# predict actions
|
# predict actions
|
||||||
action, dis_logprob, _, con_logprob, _, value = agent.get_actions_value(
|
action, dis_logprob, _, con_logprob, _, value = agent.get_actions_value(
|
||||||
torch.Tensor(state).to(device)
|
torch.tensor(state,dtype=torch.float32).to(device)
|
||||||
)
|
)
|
||||||
value = value.flatten()
|
value = value.flatten()
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ class PPOMem:
|
|||||||
thisRewardBF = (np.asarray(thisRewardBF) + (remainTime * self.result_broadcast_ratio)).tolist()
|
thisRewardBF = (np.asarray(thisRewardBF) + (remainTime * self.result_broadcast_ratio)).tolist()
|
||||||
else:
|
else:
|
||||||
print("!!!!!DIDNT GET RESULT REWARD!!!!!!", rewardBF[-1])
|
print("!!!!!DIDNT GET RESULT REWARD!!!!!!", rewardBF[-1])
|
||||||
return torch.Tensor(thisRewardBF).to(self.device)
|
return torch.tensor(thisRewardBF,dtype=torch.float32).to(self.device)
|
||||||
|
|
||||||
def save_memories(
|
def save_memories(
|
||||||
self,
|
self,
|
||||||
@ -101,10 +101,10 @@ class PPOMem:
|
|||||||
thisRewardsTensor = self.broad_cast_end_reward(self.rewards_bf[i], remainTime)
|
thisRewardsTensor = self.broad_cast_end_reward(self.rewards_bf[i], remainTime)
|
||||||
adv, rt = agent.gae(
|
adv, rt = agent.gae(
|
||||||
rewards=thisRewardsTensor,
|
rewards=thisRewardsTensor,
|
||||||
dones=torch.Tensor(self.dones_bf[i]).to(self.device),
|
dones=torch.tensor(self.dones_bf[i],dtype=torch.float32).to(self.device),
|
||||||
values=torch.tensor(self.values_bf[i]).to(self.device),
|
values=torch.tensor(self.values_bf[i]).to(self.device),
|
||||||
next_obs=torch.tensor(next_state[i]).to(self.device).unsqueeze(0),
|
next_obs=torch.tensor(next_state[i]).to(self.device).unsqueeze(0),
|
||||||
next_done=torch.Tensor([next_done[i]]).to(self.device),
|
next_done=torch.tensor([next_done[i]],dtype=torch.float32).to(self.device),
|
||||||
)
|
)
|
||||||
# send memories to training datasets
|
# send memories to training datasets
|
||||||
self.obs[roundTargetType] = torch.cat((self.obs[roundTargetType], torch.tensor(np.array(self.ob_bf[i])).to(self.device)), 0)
|
self.obs[roundTargetType] = torch.cat((self.obs[roundTargetType], torch.tensor(np.array(self.ob_bf[i])).to(self.device)), 0)
|
||||||
@ -119,7 +119,7 @@ class PPOMem:
|
|||||||
# clear buffers
|
# clear buffers
|
||||||
self.clear_buffers(i)
|
self.clear_buffers(i)
|
||||||
print(f"train dataset {Targets(roundTargetType).name} added:{self.obs[roundTargetType].size()[0]}/{self.data_set_size}")
|
print(f"train dataset {Targets(roundTargetType).name} added:{self.obs[roundTargetType].size()[0]}/{self.data_set_size}")
|
||||||
|
|
||||||
def clear_buffers(self,ind:int):
|
def clear_buffers(self,ind:int):
|
||||||
# clear buffers
|
# clear buffers
|
||||||
self.ob_bf[ind] = []
|
self.ob_bf[ind] = []
|
||||||
@ -129,7 +129,7 @@ class PPOMem:
|
|||||||
self.rewards_bf[ind] = []
|
self.rewards_bf[ind] = []
|
||||||
self.dones_bf[ind] = []
|
self.dones_bf[ind] = []
|
||||||
self.values_bf[ind] = []
|
self.values_bf[ind] = []
|
||||||
|
|
||||||
def clear_training_datasets(self,ind:int):
|
def clear_training_datasets(self,ind:int):
|
||||||
# clear training datasets
|
# clear training datasets
|
||||||
self.obs[ind] = torch.tensor([]).to(self.device)
|
self.obs[ind] = torch.tensor([]).to(self.device)
|
||||||
|
@ -34,7 +34,7 @@ BROADCASTREWARD = False
|
|||||||
ANNEAL_LEARNING_RATE = True
|
ANNEAL_LEARNING_RATE = True
|
||||||
CLIP_VLOSS = True
|
CLIP_VLOSS = True
|
||||||
NORM_ADV = False
|
NORM_ADV = False
|
||||||
TRAIN = False
|
TRAIN = True
|
||||||
SAVE_MODEL = False
|
SAVE_MODEL = False
|
||||||
WANDB_TACK = False
|
WANDB_TACK = False
|
||||||
LOAD_DIR = None
|
LOAD_DIR = None
|
||||||
|
@ -275,8 +275,8 @@ class PPOAgent(nn.Module):
|
|||||||
self,
|
self,
|
||||||
rewards: torch.Tensor,
|
rewards: torch.Tensor,
|
||||||
dones: torch.Tensor,
|
dones: torch.Tensor,
|
||||||
values: torch.tensor,
|
values: torch.Tensor,
|
||||||
next_obs: torch.tensor,
|
next_obs: torch.Tensor,
|
||||||
next_done: torch.Tensor,
|
next_done: torch.Tensor,
|
||||||
) -> tuple:
|
) -> tuple:
|
||||||
# GAE
|
# GAE
|
||||||
|
Loading…
Reference in New Issue
Block a user