1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
| import numpy as np import random import time from queue import PriorityQueue
class Simulator : def __init__(self, n: int, m: int) -> None : self.n, self.m = n, m self.stateNums = n * m self.map = np.abs(np.random.randn(n, m)) self.dis = np.zeros((n, m)) + 1e9 self.actionMap = [ { "dx": 1, "dy": 0 }, { "dx": -1, "dy": 0 }, { "dx": 0, "dy": 1 }, { "dx": 0, "dy": -1 } ] self.alpha = 1 self._dijkstra() self.actionNums = len(self.actionMap)
def _dijkstra(self) -> None : pq = PriorityQueue() pq.put((0, (self.n - 1, self.m - 1))) while not pq.empty() : now = pq.get() d, (x, y) = now if self.dis[x, y] != 1e9 : continue self.dis[x, y] = d for move in self.actionMap : tx, ty = x + move["dx"], y + move["dy"] if self._check(tx, ty) and self.dis[tx, ty] == 1e9: pq.put((d + self.map[x, y], (tx, ty)))
def _check(self, x: int, y: int) -> bool : return ( x >= 0 and x < self.n and y >= 0 and y < self.m )
def _state(self) -> np.ndarray : state = np.zeros((self.n, self.m)) state[self.x, self.y] = 1 return state.reshape(-1, self.n, self.m)
def _move(self, x: int, y: int, move) : tx, ty = x + move["dx"], y + move["dy"] if (self._check(tx, ty)) : return tx, ty else : return x, y
def step(self, action: int) -> tuple : if random.random() > self.alpha : action = random.randint(0, self.actionNums - 1)
self.x, self.y = self._move(self.x, self.y, self.actionMap[action]) state = self._state() reward = -self.map[self.x, self.y] terminated = self.x == self.n - 1 and self.y == self.m - 1
self.turns += 1 if self.turns > self.n * self.m : reward += -self.dis[self.x, self.y] terminated = True
return state, self.x, self.y, reward, terminated def start(self) -> np.ndarray : self.x = random.randint(0, self.n - 2) self.y = random.randint(0, self.m - 2) self.turns = 0 return self._state(), self.x, self.y def _calc(self, x: int, y: int, a: int) : return (x * self.m + y) * self.actionNums + a
def state(self) -> tuple : E = np.zeros((self.stateNums * self.actionNums, self.stateNums * self.actionNums)) r = np.zeros((self.stateNums * self.actionNums, 1)) for x in range(self.n) : for y in range(self.m) : for a in range(self.actionNums) : move = self.actionMap[a] tx, ty = self._move(x, y, move) if not (x == self.n - 1 and y == self.m - 1) : r[self._calc(x, y, a)] -= self.map[tx, ty] for tta in range(self.actionNums) : E[self._calc(x, y, a), self._calc(tx, ty, tta)] += 1 return E, r
|