Pendulum

mit6.8242026-04-01 14:54

喜欢将平衡比作钟摆的这个形容

links

https://www.loot-drop.io/

https://github.com/ssoccean/PPT_Pilot

reinforcemennt learning需要考虑的核心问题之一

掌握好explore和exploit的接受平衡非常重要

idea-实践循环

核心就两件事

用工程进度文件而不是上下文压缩长时工作更好

规划、执行、评估分开更好能解决模型自信问题

lc1220

class Solution {

public:

vector<long long> state;

// a e i o u

// 0 1 2 3 4

// 01 10 12 20 21 23 24 32 34 40

long long mod = 1e9 + 7;

vector<long long> run(vector<long long>& s) {

vector<long long> s_(5);

s_ $0$ = (s $1$ + s $2$ + s $4$ ) % mod;

s_ $1$ = (s $0$ + s $2$ ) % mod;

s_ $2$ = (s $1$ + s $3$ ) % mod;

s_ $3$ = s $2$ % mod;

s_ $4$ = (s $2$ + s $3$ ) % mod;

return s_;

}

int countVowelPermutation(int n) {

state = vector<long long>(5, 1);

for (int i = 1; i < n; i++) {

++state = run(state);++

}

long long sum = 0;

for (int i = 0; i < 5; i++) {

++sum += state $i$ ;++

sum %= mod;

}

return sum;

}

};

lc1269

归来任是第一反应写最朴素的dp 释怀的笑了

const int MO = 1e9 + 7;

class Solution {

public:

int numWays(int steps, int arrLen) {

vector<vector<long long>> f(steps + 1, vector<long long>(steps + 1));

long long maxLen = min(steps - 1, arrLen - 1);

f $0$ $0$ = 1;

for (int i = 1; i <= steps; ++i) {

for (int j = 0; j <= maxLen; ++j) {

f $i$ $j$ = f $i - 1$ $j$ ;

if (j - 1 >= 0)

f $i$ $j$ = (f $i$ $j$ + f $i - 1$ $j - 1$ ) % MO;

if (j + 1 <= maxLen)

f $i$ $j$ = (f $i$ $j$ + f $i - 1$ $j + 1$ ) % MO;

}

return f $steps$ $0$ % MO;

}

};