\(\nabla_{\theta}E_{\tau}r_t=\nabla_{\theta}{\sum_{\tau}p_{\theta}(\tau)r_t}= \\ \sum_{\tau}\nabla_{\theta}p_{\theta}(\tau)r_t=\sum_{\tau}{p_{\theta}(\tau)\nabla_{\theta}log\{p_{\theta}(\tau)\}r_t}=\\ \sum_{\tau}p_{\theta}(\tau)\{\sum_{i=0}^{T-1}\nabla_{\theta}log\{\pi_{\theta}(a_i|t_i)\} \}r_t\)
\(\sum_{\tau}{p_{\theta}(\tau)\nabla_{\theta}log\{\pi_{\theta}(a_{t+k}|s_{t+k})\}r_t}=\\ \sum_{s_0,a_0..s_{T-1},a_{T-1}}p(s_0)\cdot \prod_{i=0}^{T-1}\pi_{\theta}(a_i|s_i)\cdot p(s_{i+1}|s_i,a_i)\nabla_{\theta}log\{\pi_{\theta}(a_{t+k}|s_{t+k})\}r_t=\\ \sum_{s_0,a_0..s_{t+k},a_{t+k}}\sum_{s_{t+k+1},a_{t+k+1}..s_{T-1},a_{T-1}}p(s_0)\cdot \prod_{i=0}^{T-1}\pi_{\theta}(a_i|s_i)\cdot p(s_{i+1}|s_i,a_i)\cdot \\ \nabla_{\theta}log\{\pi_{\theta}(a_{t+k}|s_{t+k})\}r_t=\\ \sum_{s_0,a_0..s_{t+k},a_{t+k}}p(s_0)\cdot \prod_{i=0}^{t+k-1}\pi_{\theta}(a_i|s_i)\cdot p(s_{i+1}|s_i,a_i)\cdot \pi_{\theta}(a_{t+k}|s_{t+k})\nabla_{\theta}log\{\pi_{\theta}(a_{t+k}|s_{t+k})\}r_t \cdot \\ \sum_{s_{t+k+1},a_{t+k+1}..s_{T-1},a_{T-1}} p(s_{t+k+1}|s_{t+k},a_{t+k}) \cdot\prod_{i=t+k+1}^{T-1}\pi_{\theta}(a_i|s_i)\cdot p(s_{i+1}|s_i,a_i)\)
\(\sum_{s_0,a_0..s_{t+k},a_{t+k}}p(s_0) \prod_{i=0}^{t+k-1}\pi_{\theta}(a_i|s_i)\cdot p(s_{i+1}|s_i,a_i)\cdot \pi_{\theta}(a_{t+k}|s_{t+k})\nabla_{\theta}log\{\pi_{\theta}(a_{t+k}|s_{t+k})\}r_t=\\ \sum_{s_0,a_0..s_{t+k}}r_t \cdot p(s_0) \prod_{i=0}^{t+k-1}\pi_{\theta}(a_i|s_i)\cdot p(s_{i+1}|s_i,a_i)\cdot \sum_{a_{t+k}}\pi_{\theta}(a_{t+k}|s_{t+k})\nabla_{\theta}log\{\pi_{\theta}(a_{t+k}|s_{t+k})\}\)
\(\sum_{a_{t+k}}\pi_{\theta}(a_{t+k}|s_{t+k})\nabla_{\theta}log\{\pi_{\theta}(a_{t+k}|s_{t+k})\}=\\ \sum_{a_{t+k}}\pi_{\theta}(a_{t+k}|s_{t+k})\frac{\nabla_{\theta}\pi_{\theta}(a_{t+k}|s_{t+k})}{\pi_{\theta}(a_{t+k}|s_{t+k})}=\sum_{a_{t+k}}\nabla_{\theta}\pi_{\theta}(a_{t+k}|s_{t+k})=\\ \nabla_{\theta}\sum_{a_{t+k}}\pi_{\theta}(a_{t+k}|s_{t+k})=\nabla_{\theta}1=0\)
\(\sum_{\tau}{p_{\theta}(\tau)\nabla_{\theta}log\{\pi_{\theta}(a_{t+k}|s_{t+k})\}r_t}=0\Longrightarrow \\ \sum_{\tau}p_{\theta}(\tau)\{\sum_{i=0}^{T-1}\nabla_{\theta}log\{\pi_{\theta}(a_i|t_i)\} \}r_t=\sum_{\tau}p_{\theta}(\tau)\{\sum_{i=0}^{t}\nabla_{\theta}log\{\pi_{\theta}(a_i|t_i)\} \}r_t \Longrightarrow \\ \nabla_{\theta}E_{\tau}r_t=\sum_{\tau}p_{\theta}(\tau)\{\sum_{i=0}^{t}\nabla_{\theta}log\{\pi_{\theta}(a_i|t_i)\} \}r_t\)