传统网站因为要注重SEO和速度,一般采用 php/java + 模板的方式开发(不会采用SPA单页模式), 特别是经过多次改版调整后,网页源码中很多无关的备注内容,占用了不少的带宽。
但是经过本Lua代码自动处理后,把空格,单行、多行备注,在输出到客户端前通通清理掉,不影响源码的备注,因此该技术还是有一定的场景。
经我测试,压缩后了只有压缩前的60%。
lua
local outarr = {};
function strCompress(htm)
if htm==nil then return end
local pt={
{[[\/\*(.|\r|\n)*?\*\/]],"","i", "CSS/JS多行:/*多行注释*/"},
{[[<![-]{2}(.|\r|\n)*?[-]{2}>]],'',"i", "HTML注释:<!--注释-->"},
{[[[\n\r\t]+]]," ","i","多个换行缩进换成1个空格"},
{[[\s*(<[\/]*(ul|li|div)>)]],"$1","i","去ul,li,div前后空格"},
{[[((style|alt|title|class)=[\"\']\s*[\"\'])]],"","i","去空alt class"},
{[[\s{2,}]],' ',"i","连续2个空格换成1个"},
{[[(^\s*\/\/.*$)]],'',"im","前空格的双斜杠整行注释"},
{[[(?<![\n\'\"\:])(\/\/[^\"\'\n]*)(?=[\n])]],'',"im","代码后双斜杠注释"},
{[[^\s{1,}]],'',"im","去行开始的空格"},
{[[(\}|\{|\(|,|:|;)(\s|\n)*]],'$1',"is","大括号,左括号,逗号,分号后面的空白"},
{[[(\s|\n)*(\}.*$)]],'$2',"im","跨行右大括号 左边的空白"},
{[[\)\s*\{]],'){',"im","同行 右括号_左大括号中间空白"},
{[[\}if]],'};if',"im","fix 行末不加分号后面接if"}
}
-- 选择规则顺序有讲究的,否则兼容性下降
local Pats = {pt[1],pt[2],pt[3],pt[4],pt[5],pt[6]};
local from, to, substr = ngx.re.find(htm, "script","oj");
if from==2 then
Pats= {pt[1],pt[7],pt[8],pt[9],pt[10],pt[11],pt[12],pt[13]}
end
for i, v in pairs(Pats) do
local newstr = ngx.re.gsub(htm, v[1], v[2], v[3]);
if newstr then htm = newstr end
end
return htm
end
function split_script(a)
local i = 0
return
function( str, max)
if leave == nil and i==0 then leave=str end
if leave == nil then return nil end
local from,to,err = ngx.re.find(leave, "<script.*?/script>", "sjo")
i=i+1
if from then
local delimiter = strCompress(string.sub(leave,from, to))
local cutstr = strCompress(string.sub(leave, 0, from-1))
if #cutstr>0 and cutstr~=' ' then
table.insert(outarr, cutstr)
end
table.insert(outarr, delimiter)
leave = string.sub(leave,to+1)
if max>128 or #leave==0 then return nil end
return i,leave
else
table.insert(outarr, strCompress(leave))
return nil
end
end ,a,0
end
function doTraverse(str)
-- 遍历处理
for k,v in split_script( str ) do
-- ngx.say(":-->>"..k.."<<--:", v )
end
end
-- 仅仅压缩html内容
local i,j = ngx.re.find( ngx.header["Content-Type"], "text/html","sjo");
if i ~= nil then
local chunk, eof = ngx.arg[1], ngx.arg[2] -- 获取当前的流 和是否时结束
local info = ngx.ctx.buf
chunk = chunk or ""
if info then
ngx.ctx.buf = info .. chunk -- 这个可以将原本的内容记录下来
else
ngx.ctx.buf = chunk
end
if eof then
-- ngx.ctx.buffered = nili
if status == 413 or status == "413" then -- 413是Nginx request body timeout 的状态吗
ngx.arg[1] = "<h1>Nginx request body timeout</h1>"
else
doTraverse( ngx.ctx.buf )
ngx.arg[1] = table.concat(outarr,"")
end
else
ngx.arg[1] = ""
end
end
使用方式
把以上内容存储到 proxy-data-minify.lua文件中,在宝塔站点配置 Nginx 站点配置文件中加入:
conf
body_filter_by_lua_file /www/server/lua/proxy-data-minify.lua;
或者统一在 /www/server/nginx/conf/phpinfo.conf 末尾加入上面一行,这样会处理所有php 输出到 网页源码。