喜欢将平衡比作钟摆的这个形容
links
https://github.com/ssoccean/PPT_Pilot
reinforcemennt learning需要考虑的核心问题之一
掌握好explore和exploit的接受平衡非常重要
idea-实践 循环
核心就两件事
用工程进度文件而不是上下文压缩长时工作更好
规划、执行、评估分开更好能解决模型自信问题
lc1220

class Solution {
public:
vector<long long> state;
// a e i o u
// 0 1 2 3 4
// 01 10 12 20 21 23 24 32 34 40
long long mod = 1e9 + 7;
vector<long long> run(vector<long long>& s) {
vector<long long> s_(5);
s_0 = (s1 + s2 + s4) % mod;
s_1 = (s0 + s2) % mod;
s_2 = (s1 + s3) % mod;
s_3 = s2 % mod;
s_4 = (s2 + s3) % mod;
return s_;
}
int countVowelPermutation(int n) {
state = vector<long long>(5, 1);
for (int i = 1; i < n; i++) {
++state = run(state);++
}
long long sum = 0;
for (int i = 0; i < 5; i++) {
++sum += statei;++
sum %= mod;
}
return sum;
}
};
lc1269
归来任是第一反应写最朴素的dp 释怀的笑了
const int MO = 1e9 + 7;
class Solution {
public:
int numWays(int steps, int arrLen) {
vector<vector<long long>> f(steps + 1, vector<long long>(steps + 1));
long long maxLen = min(steps - 1, arrLen - 1);
f00 = 1;
for (int i = 1; i <= steps; ++i) {
for (int j = 0; j <= maxLen; ++j) {
fij = fi - 1j;
if (j - 1 >= 0)
fij = (fij + fi - 1j - 1) % MO;
if (j + 1 <= maxLen)
fij = (fij + fi - 1j + 1) % MO;
}
}
return fsteps0 % MO;
}
};