简介
现在web应用无处不在,nginx 作为流行的反代服务器,自然需要对HTTP协议 进行解析,其中主要用到了状态机 的机制分别对请求行 、标头 、状态行进行字段解析。
环境搭建
HTTP服务
- 编译
go build -o httpserver main.go
- 运行
./httpserver
- 请求
curl http://127.0.0.1:8888/api/hello
main.go
package main
import (
"io"
"log"
"net/http"
)
func main() {
// Hello world, the web server
helloHandler := func(w http.ResponseWriter, req *http.Request) {
io.WriteString(w, "Hello, world!\n")
}
http.HandleFunc("/api/hello", helloHandler)
log.Fatal(http.ListenAndServe(":8888", nil))
}
NGINX服务
安装
进入官网nginx.org/en/download... 选择相应的版本点击下载。
此处以最新稳定版1.24.0
为例
- 下载
wget https://nginx.org/download/nginx-1.24.0.tar.gz
- 解压
tar -xzvf nginx-1.24.0.tar.gz
- 配置
cd nginx-1.24.0/; ./configure --prefix=/home/xiaofeng/workspace/C/nginx/install/
- 编译
make
- 安装
make install
配置
- 文件位置
install/conf/nginx.conf
- 设置用户
user root;
- 设置端口
listen 88;
- 反向代理,将api开头的请求反代到监听8888端口的HTTP服务程序
javascript
location ~ ^/api/ {
proxy_pass http://127.0.0.1:8888;
}
nginx.conf
user root;
worker_processes 1;
#error_log logs/error.log;
#error_log logs/error.log notice;
#error_log logs/error.log info;
#pid logs/nginx.pid;
events {
worker_connections 1024;
}
http {
include mime.types;
default_type application/octet-stream;
#log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
#access_log logs/access.log main;
sendfile on;
#tcp_nopush on;
#keepalive_timeout 0;
keepalive_timeout 65;
#gzip on;
server {
listen 88;
server_name localhost;
#charset koi8-r;
#access_log logs/host.access.log main;
location / {
root html;
index index.html index.htm;
}
#error_page 404 /404.html;
# redirect server error pages to the static page /50x.html
#
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root html;
}
# api server
location ~ ^/api/ {
proxy_pass http://127.0.0.1:8888;
}
# proxy the PHP scripts to Apache listening on 127.0.0.1:80
#
#location ~ \.php$ {
# proxy_pass http://127.0.0.1;
#}
# pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
#
#location ~ \.php$ {
# root html;
# fastcgi_pass 127.0.0.1:9000;
# fastcgi_index index.php;
# fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name;
# include fastcgi_params;
#}
# deny access to .htaccess files, if Apache's document root
# concurs with nginx's one
#
#location ~ /\.ht {
# deny all;
#}
}
# another virtual host using mix of IP-, name-, and port-based configuration
#
#server {
# listen 8000;
# listen somename:8080;
# server_name somename alias another.alias;
# location / {
# root html;
# index index.html index.htm;
# }
#}
# HTTPS server
#
#server {
# listen 443 ssl;
# server_name localhost;
# ssl_certificate cert.pem;
# ssl_certificate_key cert.key;
# ssl_session_cache shared:SSL:1m;
# ssl_session_timeout 5m;
# ssl_ciphers HIGH:!aNULL:!MD5;
# ssl_prefer_server_ciphers on;
# location / {
# root html;
# index index.html index.htm;
# }
#}
}
启动
- 直接运行nginx命令,使用默认配置文件
nginx -h
可以查看命令参数nginx -v
可以查看版本信息nginx -c filename
可以指定配置文件路径nginx -s reload
可以重新加载配置
shell
[xiaofeng@localhost nginx]$ sudo ./install/sbin/nginx
[xiaofeng@localhost nginx]$ ps aux|grep nginx
root 1288003 0.0 0.0 36576 412 ? Ss 17:42 0:00 nginx: master process ./install/sbin/nginx
root 1288004 0.0 0.0 66624 3948 ? S 17:42 0:00 nginx: worker process
xiaofeng 1288023 0.0 0.0 9212 1196 pts/11 S+ 17:42 0:00 grep --color=auto nginx
[xiaofeng@localhost nginx]$ sudo netstat -antup|grep nginx
tcp 0 0 0.0.0.0:88 0.0.0.0:* LISTEN 1288003/nginx: mast
测试
shell
[xiaofeng@localhost workspace]$ curl -v http://127.0.0.1:88/api/hello
* Trying 127.0.0.1...
* TCP_NODELAY set
* Connected to 127.0.0.1 (127.0.0.1) port 88 (#0)
> GET /api/hello HTTP/1.1
> Host: 127.0.0.1:88
> User-Agent: curl/7.61.1
> Accept: */*
>
< HTTP/1.1 200 OK
< Server: nginx/1.24.0
< Date: Mon, 08 Apr 2024 10:56:41 GMT
< Content-Type: text/plain; charset=utf-8
< Content-Length: 14
< Connection: keep-alive
<
Hello, world!
* Connection #0 to host 127.0.0.1 left intact
HTTP解析
HTTP requests, and responses, share similar structure and are composed of:
- A start-line describing the requests to be implemented, or its status of whether successful or a failure. This is always a single line.(第一行是请求行或者状态行)
- An optional set of HTTP headers specifying the request, or describing the body included in the message.(HTTP表头行,用于说明请求或描述body消息体)
- A blank line indicating all meta-information for the request has been sent.(空行)
- An optional body containing data associated with the request (like content of an HTML form), or the document associated with a response. The presence of the body and its size is specified by the start-line and HTTP headers.(body部分)
示例如下:
请求行解析
- usual数组配合
usual[ch >> 5] & (1U << (ch & 0x1f))
用于判断是否为常规字符,如?
对应位为0则条件表达式为假,>
对应位为1则条件表达式为真,其他同理 - 定义好请求行中可能的状态
- 每次往后偏移一个字符
- 根据当前字符信息更新当前状态,并保存解析结果
- 空格作为每部分的分隔符
- 解析第一部分方法
- 解析第二部分URI(http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]])
- 解析第三部分版本
- 回车
\r
换行符\n
作为结束标识
c
static uint32_t usual[] = {
0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
/* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
0x7fff37d6, /* 0111 1111 1111 1111 0011 0111 1101 0110 */
/* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
#if (NGX_WIN32)
0xefffffff, /* 1110 1111 1111 1111 1111 1111 1111 1111 */
#else
0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
#endif
/* ~}| {zyx wvut srqp onml kjih gfed cba` */
0x7fffffff, /* 0111 1111 1111 1111 1111 1111 1111 1111 */
0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */
};
ngx_int_t
ngx_http_parse_request_line(ngx_http_request_t *r, ngx_buf_t *b)
{
u_char c, ch, *p, *m;
enum {
sw_start = 0,
sw_method,
sw_spaces_before_uri,
sw_schema,
sw_schema_slash,
sw_schema_slash_slash,
sw_host_start,
sw_host,
sw_host_end,
sw_host_ip_literal,
sw_port,
sw_after_slash_in_uri,
sw_check_uri,
sw_uri,
sw_http_09,
sw_http_H,
sw_http_HT,
sw_http_HTT,
sw_http_HTTP,
sw_first_major_digit,
sw_major_digit,
sw_first_minor_digit,
sw_minor_digit,
sw_spaces_after_digit,
sw_almost_done
} state;
state = r->state;
for (p = b->pos; p < b->last; p++) {
ch = *p;
switch (state) {
/* HTTP methods: GET, HEAD, POST */
case sw_start:
r->request_start = p;
if (ch == CR || ch == LF) {
break;
}
if ((ch < 'A' || ch > 'Z') && ch != '_' && ch != '-') {
return NGX_HTTP_PARSE_INVALID_METHOD;
}
state = sw_method;
break;
case sw_method:
if (ch == ' ') {
r->method_end = p - 1;
m = r->request_start;
switch (p - m) {
case 3:
if (ngx_str3_cmp(m, 'G', 'E', 'T', ' ')) {
r->method = NGX_HTTP_GET;
break;
}
if (ngx_str3_cmp(m, 'P', 'U', 'T', ' ')) {
r->method = NGX_HTTP_PUT;
break;
}
break;
case 4:
if (m[1] == 'O') {
if (ngx_str3Ocmp(m, 'P', 'O', 'S', 'T')) {
r->method = NGX_HTTP_POST;
break;
}
if (ngx_str3Ocmp(m, 'C', 'O', 'P', 'Y')) {
r->method = NGX_HTTP_COPY;
break;
}
if (ngx_str3Ocmp(m, 'M', 'O', 'V', 'E')) {
r->method = NGX_HTTP_MOVE;
break;
}
if (ngx_str3Ocmp(m, 'L', 'O', 'C', 'K')) {
r->method = NGX_HTTP_LOCK;
break;
}
} else {
if (ngx_str4cmp(m, 'H', 'E', 'A', 'D')) {
r->method = NGX_HTTP_HEAD;
break;
}
}
break;
case 5:
if (ngx_str5cmp(m, 'M', 'K', 'C', 'O', 'L')) {
r->method = NGX_HTTP_MKCOL;
break;
}
if (ngx_str5cmp(m, 'P', 'A', 'T', 'C', 'H')) {
r->method = NGX_HTTP_PATCH;
break;
}
if (ngx_str5cmp(m, 'T', 'R', 'A', 'C', 'E')) {
r->method = NGX_HTTP_TRACE;
break;
}
break;
case 6:
if (ngx_str6cmp(m, 'D', 'E', 'L', 'E', 'T', 'E')) {
r->method = NGX_HTTP_DELETE;
break;
}
if (ngx_str6cmp(m, 'U', 'N', 'L', 'O', 'C', 'K')) {
r->method = NGX_HTTP_UNLOCK;
break;
}
break;
case 7:
if (ngx_str7_cmp(m, 'O', 'P', 'T', 'I', 'O', 'N', 'S', ' '))
{
r->method = NGX_HTTP_OPTIONS;
}
if (ngx_str7_cmp(m, 'C', 'O', 'N', 'N', 'E', 'C', 'T', ' '))
{
r->method = NGX_HTTP_CONNECT;
}
break;
case 8:
if (ngx_str8cmp(m, 'P', 'R', 'O', 'P', 'F', 'I', 'N', 'D'))
{
r->method = NGX_HTTP_PROPFIND;
}
break;
case 9:
if (ngx_str9cmp(m,
'P', 'R', 'O', 'P', 'P', 'A', 'T', 'C', 'H'))
{
r->method = NGX_HTTP_PROPPATCH;
}
break;
}
state = sw_spaces_before_uri;
break;
}
if ((ch < 'A' || ch > 'Z') && ch != '_' && ch != '-') {
return NGX_HTTP_PARSE_INVALID_METHOD;
}
break;
/* space* before URI */
case sw_spaces_before_uri:
if (ch == '/') {
r->uri_start = p;
state = sw_after_slash_in_uri;
break;
}
c = (u_char) (ch | 0x20);
if (c >= 'a' && c <= 'z') {
r->schema_start = p;
state = sw_schema;
break;
}
switch (ch) {
case ' ':
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break;
case sw_schema:
c = (u_char) (ch | 0x20);
if (c >= 'a' && c <= 'z') {
break;
}
if ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-' || ch == '.')
{
break;
}
switch (ch) {
case ':':
r->schema_end = p;
state = sw_schema_slash;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break;
case sw_schema_slash:
switch (ch) {
case '/':
state = sw_schema_slash_slash;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break;
case sw_schema_slash_slash:
switch (ch) {
case '/':
state = sw_host_start;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break;
case sw_host_start:
r->host_start = p;
if (ch == '[') {
state = sw_host_ip_literal;
break;
}
state = sw_host;
/* fall through */
case sw_host:
c = (u_char) (ch | 0x20);
if (c >= 'a' && c <= 'z') {
break;
}
if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') {
break;
}
/* fall through */
case sw_host_end:
r->host_end = p;
switch (ch) {
case ':':
state = sw_port;
break;
case '/':
r->uri_start = p;
state = sw_after_slash_in_uri;
break;
case '?':
r->uri_start = p;
r->args_start = p + 1;
r->empty_path_in_uri = 1;
state = sw_uri;
break;
case ' ':
/*
* use single "/" from request line to preserve pointers,
* if request line will be copied to large client buffer
*/
r->uri_start = r->schema_end + 1;
r->uri_end = r->schema_end + 2;
state = sw_http_09;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break;
case sw_host_ip_literal:
if (ch >= '0' && ch <= '9') {
break;
}
c = (u_char) (ch | 0x20);
if (c >= 'a' && c <= 'z') {
break;
}
switch (ch) {
case ':':
break;
case ']':
state = sw_host_end;
break;
case '-':
case '.':
case '_':
case '~':
/* unreserved */
break;
case '!':
case '$':
case '&':
case '\'':
case '(':
case ')':
case '*':
case '+':
case ',':
case ';':
case '=':
/* sub-delims */
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break;
case sw_port:
if (ch >= '0' && ch <= '9') {
break;
}
switch (ch) {
case '/':
r->uri_start = p;
state = sw_after_slash_in_uri;
break;
case '?':
r->uri_start = p;
r->args_start = p + 1;
r->empty_path_in_uri = 1;
state = sw_uri;
break;
case ' ':
/*
* use single "/" from request line to preserve pointers,
* if request line will be copied to large client buffer
*/
r->uri_start = r->schema_end + 1;
r->uri_end = r->schema_end + 2;
state = sw_http_09;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break;
/* check "/.", "//", "%", and "\" (Win32) in URI */
case sw_after_slash_in_uri:
if (usual[ch >> 5] & (1U << (ch & 0x1f))) {
state = sw_check_uri;
break;
}
switch (ch) {
case ' ':
r->uri_end = p;
state = sw_http_09;
break;
case CR:
r->uri_end = p;
r->http_minor = 9;
state = sw_almost_done;
break;
case LF:
r->uri_end = p;
r->http_minor = 9;
goto done;
case '.':
r->complex_uri = 1;
state = sw_uri;
break;
case '%':
r->quoted_uri = 1;
state = sw_uri;
break;
case '/':
r->complex_uri = 1;
state = sw_uri;
break;
#if (NGX_WIN32)
case '\\':
r->complex_uri = 1;
state = sw_uri;
break;
#endif
case '?':
r->args_start = p + 1;
state = sw_uri;
break;
case '#':
r->complex_uri = 1;
state = sw_uri;
break;
case '+':
r->plus_in_uri = 1;
break;
default:
if (ch < 0x20 || ch == 0x7f) {
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
state = sw_check_uri;
break;
}
break;
/* check "/", "%" and "\" (Win32) in URI */
case sw_check_uri:
if (usual[ch >> 5] & (1U << (ch & 0x1f))) {
break;
}
switch (ch) {
case '/':
#if (NGX_WIN32)
if (r->uri_ext == p) {
r->complex_uri = 1;
state = sw_uri;
break;
}
#endif
r->uri_ext = NULL;
state = sw_after_slash_in_uri;
break;
case '.':
r->uri_ext = p + 1;
break;
case ' ':
r->uri_end = p;
state = sw_http_09;
break;
case CR:
r->uri_end = p;
r->http_minor = 9;
state = sw_almost_done;
break;
case LF:
r->uri_end = p;
r->http_minor = 9;
goto done;
#if (NGX_WIN32)
case '\\':
r->complex_uri = 1;
state = sw_after_slash_in_uri;
break;
#endif
case '%':
r->quoted_uri = 1;
state = sw_uri;
break;
case '?':
r->args_start = p + 1;
state = sw_uri;
break;
case '#':
r->complex_uri = 1;
state = sw_uri;
break;
case '+':
r->plus_in_uri = 1;
break;
default:
if (ch < 0x20 || ch == 0x7f) {
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break;
}
break;
/* URI */
case sw_uri:
if (usual[ch >> 5] & (1U << (ch & 0x1f))) {
break;
}
switch (ch) {
case ' ':
r->uri_end = p;
state = sw_http_09;
break;
case CR:
r->uri_end = p;
r->http_minor = 9;
state = sw_almost_done;
break;
case LF:
r->uri_end = p;
r->http_minor = 9;
goto done;
case '#':
r->complex_uri = 1;
break;
default:
if (ch < 0x20 || ch == 0x7f) {
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break;
}
break;
/* space+ after URI */
case sw_http_09:
switch (ch) {
case ' ':
break;
case CR:
r->http_minor = 9;
state = sw_almost_done;
break;
case LF:
r->http_minor = 9;
goto done;
case 'H':
r->http_protocol.data = p;
state = sw_http_H;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break;
case sw_http_H:
switch (ch) {
case 'T':
state = sw_http_HT;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break;
case sw_http_HT:
switch (ch) {
case 'T':
state = sw_http_HTT;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break;
case sw_http_HTT:
switch (ch) {
case 'P':
state = sw_http_HTTP;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break;
case sw_http_HTTP:
switch (ch) {
case '/':
state = sw_first_major_digit;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break;
/* first digit of major HTTP version */
case sw_first_major_digit:
if (ch < '1' || ch > '9') {
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
r->http_major = ch - '0';
if (r->http_major > 1) {
return NGX_HTTP_PARSE_INVALID_VERSION;
}
state = sw_major_digit;
break;
/* major HTTP version or dot */
case sw_major_digit:
if (ch == '.') {
state = sw_first_minor_digit;
break;
}
if (ch < '0' || ch > '9') {
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
r->http_major = r->http_major * 10 + (ch - '0');
if (r->http_major > 1) {
return NGX_HTTP_PARSE_INVALID_VERSION;
}
break;
/* first digit of minor HTTP version */
case sw_first_minor_digit:
if (ch < '0' || ch > '9') {
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
r->http_minor = ch - '0';
state = sw_minor_digit;
break;
/* minor HTTP version or end of request line */
case sw_minor_digit:
if (ch == CR) {
state = sw_almost_done;
break;
}
if (ch == LF) {
goto done;
}
if (ch == ' ') {
state = sw_spaces_after_digit;
break;
}
if (ch < '0' || ch > '9') {
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
if (r->http_minor > 99) {
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
r->http_minor = r->http_minor * 10 + (ch - '0');
break;
case sw_spaces_after_digit:
switch (ch) {
case ' ':
break;
case CR:
state = sw_almost_done;
break;
case LF:
goto done;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break;
/* end of request line */
case sw_almost_done:
r->request_end = p - 1;
switch (ch) {
case LF:
goto done;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
}
}
b->pos = p;
r->state = state;
return NGX_AGAIN;
done:
b->pos = p + 1;
if (r->request_end == NULL) {
r->request_end = p;
}
r->http_version = r->http_major * 1000 + r->http_minor;
r->state = sw_start;
if (r->http_version == 9 && r->method != NGX_HTTP_GET) {
return NGX_HTTP_PARSE_INVALID_09_METHOD;
}
return NGX_OK;
}
标头解析
- lowcase数组用于字符转为小写,且非字母和数字值为0
- 解析第一部分标头名称
- 冒号
:
作为分隔符 - 解析第二部分标头值
- 标头值前后空格进行跳过即可
- 回车
\r
换行符\n
作为结束标识
c
ngx_int_t
ngx_http_parse_header_line(ngx_http_request_t *r, ngx_buf_t *b,
ngx_uint_t allow_underscores)
{
u_char c, ch, *p;
ngx_uint_t hash, i;
enum {
sw_start = 0,
sw_name,
sw_space_before_value,
sw_value,
sw_space_after_value,
sw_ignore_line,
sw_almost_done,
sw_header_almost_done
} state;
/* the last '\0' is not needed because string is zero terminated */
static u_char lowcase[] =
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789\0\0\0\0\0\0"
"\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
"\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
state = r->state;
hash = r->header_hash;
i = r->lowcase_index;
for (p = b->pos; p < b->last; p++) {
ch = *p;
switch (state) {
/* first char */
case sw_start:
r->header_name_start = p;
r->invalid_header = 0;
switch (ch) {
case CR:
r->header_end = p;
state = sw_header_almost_done;
break;
case LF:
r->header_end = p;
goto header_done;
default:
state = sw_name;
c = lowcase[ch];
if (c) {
hash = ngx_hash(0, c);
r->lowcase_header[0] = c;
i = 1;
break;
}
if (ch == '_') {
if (allow_underscores) {
hash = ngx_hash(0, ch);
r->lowcase_header[0] = ch;
i = 1;
} else {
hash = 0;
i = 0;
r->invalid_header = 1;
}
break;
}
if (ch <= 0x20 || ch == 0x7f || ch == ':') {
r->header_end = p;
return NGX_HTTP_PARSE_INVALID_HEADER;
}
hash = 0;
i = 0;
r->invalid_header = 1;
break;
}
break;
/* header name */
case sw_name:
c = lowcase[ch];
if (c) {
hash = ngx_hash(hash, c);
r->lowcase_header[i++] = c;
i &= (NGX_HTTP_LC_HEADER_LEN - 1);
break;
}
if (ch == '_') {
if (allow_underscores) {
hash = ngx_hash(hash, ch);
r->lowcase_header[i++] = ch;
i &= (NGX_HTTP_LC_HEADER_LEN - 1);
} else {
r->invalid_header = 1;
}
break;
}
if (ch == ':') {
r->header_name_end = p;
state = sw_space_before_value;
break;
}
if (ch == CR) {
r->header_name_end = p;
r->header_start = p;
r->header_end = p;
state = sw_almost_done;
break;
}
if (ch == LF) {
r->header_name_end = p;
r->header_start = p;
r->header_end = p;
goto done;
}
/* IIS may send the duplicate "HTTP/1.1 ..." lines */
if (ch == '/'
&& r->upstream
&& p - r->header_name_start == 4
&& ngx_strncmp(r->header_name_start, "HTTP", 4) == 0)
{
state = sw_ignore_line;
break;
}
if (ch <= 0x20 || ch == 0x7f) {
r->header_end = p;
return NGX_HTTP_PARSE_INVALID_HEADER;
}
r->invalid_header = 1;
break;
/* space* before header value */
case sw_space_before_value:
switch (ch) {
case ' ':
break;
case CR:
r->header_start = p;
r->header_end = p;
state = sw_almost_done;
break;
case LF:
r->header_start = p;
r->header_end = p;
goto done;
case '\0':
r->header_end = p;
return NGX_HTTP_PARSE_INVALID_HEADER;
default:
r->header_start = p;
state = sw_value;
break;
}
break;
/* header value */
case sw_value:
switch (ch) {
case ' ':
r->header_end = p;
state = sw_space_after_value;
break;
case CR:
r->header_end = p;
state = sw_almost_done;
break;
case LF:
r->header_end = p;
goto done;
case '\0':
r->header_end = p;
return NGX_HTTP_PARSE_INVALID_HEADER;
}
break;
/* space* before end of header line */
case sw_space_after_value:
switch (ch) {
case ' ':
break;
case CR:
state = sw_almost_done;
break;
case LF:
goto done;
case '\0':
r->header_end = p;
return NGX_HTTP_PARSE_INVALID_HEADER;
default:
state = sw_value;
break;
}
break;
/* ignore header line */
case sw_ignore_line:
switch (ch) {
case LF:
state = sw_start;
break;
default:
break;
}
break;
/* end of header line */
case sw_almost_done:
switch (ch) {
case LF:
goto done;
case CR:
break;
default:
return NGX_HTTP_PARSE_INVALID_HEADER;
}
break;
/* end of header */
case sw_header_almost_done:
switch (ch) {
case LF:
goto header_done;
default:
return NGX_HTTP_PARSE_INVALID_HEADER;
}
}
}
b->pos = p;
r->state = state;
r->header_hash = hash;
r->lowcase_index = i;
return NGX_AGAIN;
done:
b->pos = p + 1;
r->state = sw_start;
r->header_hash = hash;
r->lowcase_index = i;
return NGX_OK;
header_done:
b->pos = p + 1;
r->state = sw_start;
return NGX_HTTP_PARSE_HEADER_DONE;
}
状态行解析
- 解析第一部分版本信息
- 解析第二部分状态码
status->code = status->code * 10 + (ch - '0');
可将字符串200解析为整数200- 解析第三部分描述信息
c
ngx_int_t
ngx_http_parse_status_line(ngx_http_request_t *r, ngx_buf_t *b,
ngx_http_status_t *status)
{
u_char ch;
u_char *p;
enum {
sw_start = 0,
sw_H,
sw_HT,
sw_HTT,
sw_HTTP,
sw_first_major_digit,
sw_major_digit,
sw_first_minor_digit,
sw_minor_digit,
sw_status,
sw_space_after_status,
sw_status_text,
sw_almost_done
} state;
state = r->state;
for (p = b->pos; p < b->last; p++) {
ch = *p;
switch (state) {
/* "HTTP/" */
case sw_start:
switch (ch) {
case 'H':
state = sw_H;
break;
default:
return NGX_ERROR;
}
break;
case sw_H:
switch (ch) {
case 'T':
state = sw_HT;
break;
default:
return NGX_ERROR;
}
break;
case sw_HT:
switch (ch) {
case 'T':
state = sw_HTT;
break;
default:
return NGX_ERROR;
}
break;
case sw_HTT:
switch (ch) {
case 'P':
state = sw_HTTP;
break;
default:
return NGX_ERROR;
}
break;
case sw_HTTP:
switch (ch) {
case '/':
state = sw_first_major_digit;
break;
default:
return NGX_ERROR;
}
break;
/* the first digit of major HTTP version */
case sw_first_major_digit:
if (ch < '1' || ch > '9') {
return NGX_ERROR;
}
r->http_major = ch - '0';
state = sw_major_digit;
break;
/* the major HTTP version or dot */
case sw_major_digit:
if (ch == '.') {
state = sw_first_minor_digit;
break;
}
if (ch < '0' || ch > '9') {
return NGX_ERROR;
}
if (r->http_major > 99) {
return NGX_ERROR;
}
r->http_major = r->http_major * 10 + (ch - '0');
break;
/* the first digit of minor HTTP version */
case sw_first_minor_digit:
if (ch < '0' || ch > '9') {
return NGX_ERROR;
}
r->http_minor = ch - '0';
state = sw_minor_digit;
break;
/* the minor HTTP version or the end of the request line */
case sw_minor_digit:
if (ch == ' ') {
state = sw_status;
break;
}
if (ch < '0' || ch > '9') {
return NGX_ERROR;
}
if (r->http_minor > 99) {
return NGX_ERROR;
}
r->http_minor = r->http_minor * 10 + (ch - '0');
break;
/* HTTP status code */
case sw_status:
if (ch == ' ') {
break;
}
if (ch < '0' || ch > '9') {
return NGX_ERROR;
}
status->code = status->code * 10 + (ch - '0');
if (++status->count == 3) {
state = sw_space_after_status;
status->start = p - 2;
}
break;
/* space or end of line */
case sw_space_after_status:
switch (ch) {
case ' ':
state = sw_status_text;
break;
case '.': /* IIS may send 403.1, 403.2, etc */
state = sw_status_text;
break;
case CR:
state = sw_almost_done;
break;
case LF:
goto done;
default:
return NGX_ERROR;
}
break;
/* any text until end of line */
case sw_status_text:
switch (ch) {
case CR:
state = sw_almost_done;
break;
case LF:
goto done;
}
break;
/* end of status line */
case sw_almost_done:
status->end = p - 1;
switch (ch) {
case LF:
goto done;
default:
return NGX_ERROR;
}
}
}
b->pos = p;
r->state = state;
return NGX_AGAIN;
done:
b->pos = p + 1;
if (status->end == NULL) {
status->end = p;
}
status->http_version = r->http_major * 1000 + r->http_minor;
r->state = sw_start;
return NGX_OK;
}
总结
- 可以通过gdb断点解析函数进行解析流程跟踪
- 支持应用层HTTP业务流量的网络安全设备研发可参考HTTP解析代码
- 一些详细的特性需要参考对应的RFC进行确认
- 核心思路在于利用状态机循环偏移分段解析