介绍
HTTP 模块负责解析客户端请求 、生成响应 、通过 writev 零拷贝高效回写 。这个是websercer项目中比较重要的部分,基于 Reactor + 线程池 + ET 。
这个模块包括三个类,每个类及其作用如下表:
|----------------|--------------------------|
| httprequest | 解析 HTTP 请求(请求行、请求头、请求体) |
| httpresponse | 生成 HTTP 响应(状态行、响应头、响应体) |
| httpconn | 封装一个 HTTP 连接,管理 IO 与业务调度 |
整体架构如下图所示:
┌────────┐ read(fd) ┌──────────┐ Parse ┌────────────┐
│ Client │ ─────────> │ read_buf │ ───────> │ httprequest│
└────────┘ └──────────┘ └─────┬──────┘
▲ │ path/method
│ ▼
│ ┌────────────┐
│ │httpresponse│
│ │ MakeResp │
│ └─────┬──────┘
│ │ 写状态行+头到 write_buf
│ │ mmap 文件到 mmFile
│ ▼
└── writev(fd, [write_buf, mmFile], 2) ──┘
调用链也很清晰:
httpconn::read() ──> 读入 read_buf
httpconn::process() ──> request.Parse() → response.MakeResponse() → 设置 iov
httpconn::write() ──> writev(fd, iov, iov_cnt) 写出
完整代码
httprequest.h
cpp
#ifndef HTTP_REQUEST_H
#define HTTP_REQUEST_H
#include <unordered_map>
#include <unordered_set>
#include <string>
#include <regex> // 正则表达式
#include <errno.h>
#include <mysql/mysql.h> //mysql
#include "../buffer/buffer.h"
#include "../log/log.h"
#include "../pool/sqlconnpool.h"
class httprequest {
public:
enum PARSE_STATE {
REQUEST_LINE,
HEADERS,
BODY,
FINISH,
};
httprequest() { init(); }
~httprequest() = default;
void init();
bool Parse(buffer& buff);
std::string Path() const;
std::string& Path();
std::string Method() const;
std::string Version() const;
std::string GetPost(const std::string& key) const;
std::string GetPost(const char* key) const;
bool IsKeepAlive() const;
private:
bool ParseRequestLine(const std::string& line); // 处理请求行
void ParseHeader(const std::string& line); // 处理请求头
void ParseBody(const std::string& line); // 处理请求体
void ParsePath(); // 处理请求路径
void ParsePost(); // 处理Post事件
void ParseFromUrlencoded(); // 从url种解析编码
static bool UserVerify(const std::string& name, const std::string& pwd, bool isLogin); // 用户验证
PARSE_STATE state_;
std::string method_, path_, version_, body_;
std::unordered_map<std::string, std::string> header_;
std::unordered_map<std::string, std::string> post_;
static const std::unordered_set<std::string> DEFAULT_HTML;
static const std::unordered_map<std::string, int> DEFAULT_HTML_TAG;
static int ConverHex(char ch); // 16进制转换为10进制
};
#endif
httprequest.cpp
cpp
#include "httprequest.h"
using namespace std;
const unordered_set<string> httprequest::DEFAULT_HTML{
"/index", "/register", "/login",
"/welcome", "/video", "/picture", };
const unordered_map<string, int> httprequest::DEFAULT_HTML_TAG {
{"/register.html", 0}, {"/login.html", 1}, };
void httprequest::init() {
method_ = path_ = version_ = body_ = "";
state_ = REQUEST_LINE;
header_.clear();
post_.clear();
}
bool httprequest::IsKeepAlive() const {
if(header_.count("Connection") == 1) {
return header_.find("Connection")->second == "keep-alive" && version_ == "1.1";
}
return false;
}
// 解析处理
bool httprequest::Parse(buffer& buff) {
const char CRLF[] = "\r\n"; // 行结束符标志(回车换行)
if(buff.readable_Size() <= 0) { // 没有可读的字节
return false;
}
// 读取数据
while(buff.readable_Size() && state_ != FINISH) {
// 从buff中的读指针开始到读指针结束,这块区域是未读取得数据并去处"\r\n",返回有效数据得行末指针
const char* lineEnd = search(buff.readPos_ptrConst(), buff.writePos_ptrConst(), CRLF, CRLF + 2);
// 转化为string类型
std::string line(buff.readPos_ptrConst(), lineEnd);
switch(state_)
{
/*
有限状态机,从请求行开始,每处理完后会自动转入到下一个状态
*/
case REQUEST_LINE:
if(!ParseRequestLine(line)) {
return false;
}
ParsePath(); // 解析路径
break;
case HEADERS:
ParseHeader(line);
if(buff.readable_Size() <= 2) {
state_ = FINISH;
}
break;
case BODY:
ParseBody(line);
break;
default:
break;
}
if(lineEnd == buff.writePos_ptrConst()) { break; } // 读完了
buff.Retrieve(lineEnd - buff.readPos_ptr() + 2); // 跳过回车换行
}
LOG_DEBUG("[%s], [%s], [%s]", method_.c_str(), path_.c_str(), version_.c_str());
return true;
}
// 解析路径
void httprequest::ParsePath() {
if(path_ == "/") {
path_ = "/index.html";
}
else {
for(auto &item: DEFAULT_HTML) {
if(item == path_) {
path_ += ".html";
break;
}
}
}
}
bool httprequest::ParseRequestLine(const string& line) {
regex patten("^([^ ]*) ([^ ]*) HTTP/([^ ]*)$");
smatch subMatch;
// 在匹配规则中,以括号()的方式来划分组别 一共三个括号 [0]表示整体
if(regex_match(line, subMatch, patten)) { // 匹配指定字符串整体是否符合
method_ = subMatch[1];
path_ = subMatch[2];
version_ = subMatch[3];
state_ = HEADERS; // 状态转换为下一个状态
return true;
}
LOG_ERROR("RequestLine Error");
return false;
}
void httprequest::ParseHeader(const string& line) {
regex patten("^([^:]*): ?(.*)$");
smatch subMatch;
if(regex_match(line, subMatch, patten)) {
header_[subMatch[1]] = subMatch[2];
//printf("%s %s\n",subMatch[1],subMatch[2]);
}
else {
state_ = BODY; // 状态转换为下一个状态
}
}
void httprequest::ParseBody(const string& line) {
body_ = line;
ParsePost();
state_ = FINISH; // 状态转换为下一个状态
LOG_DEBUG("Body:%s, len:%d", line.c_str(), line.size());
}
// 16进制转化为10进制
int httprequest::ConverHex(char ch) {
if(ch >= 'A' && ch <= 'F') return ch -'A' + 10;
if(ch >= 'a' && ch <= 'f') return ch -'a' + 10;
return ch;
}
// 处理post请求
void httprequest::ParsePost() {
if(method_ == "POST" && header_["Content-Type"] == "application/x-www-form-urlencoded") {
ParseFromUrlencoded(); // POST请求体示例
if(DEFAULT_HTML_TAG.count(path_)) { // 如果是登录/注册的path
int tag = DEFAULT_HTML_TAG.find(path_)->second;
LOG_DEBUG("Tag:%d", tag);
if(tag == 0 || tag == 1) {
bool isLogin = (tag == 1); // 为1则是登录
if(UserVerify(post_["username"], post_["password"], isLogin)) {
path_ = "/welcome.html";
}
else {
path_ = "/error.html";
}
}
}
}
}
// 从url中解析编码
void httprequest::ParseFromUrlencoded() {
if(body_.size() == 0) { return; }
string key, value;
int num = 0;
int n = body_.size();
int i = 0, j = 0;
for(; i < n; i++) {
char ch = body_[i];
switch (ch) {
// key
case '=':
key = body_.substr(j, i - j);
j = i + 1;
break;
// 键值对中的空格换为+或者%20
case '+':
body_[i] = ' ';
break;
case '%':
num = ConverHex(body_[i + 1]) * 16 + ConverHex(body_[i + 2]);
body_[i] = num;
for(int j = i + 1; j < (int)body_.size() - 2; j++) {
body_[j] = body_[j + 2];
}
body_.resize(body_.size() - 2);
i--;
break;
// 键值对连接符
case '&':
value = body_.substr(j, i - j);
j = i + 1;
post_[key] = value;
LOG_DEBUG("%s = %s", key.c_str(), value.c_str());
break;
default:
break;
}
}
assert(j <= i);
if(post_.count(key) == 0 && j < i) {
value = body_.substr(j, i - j);
post_[key] = value;
}
}
bool httprequest::UserVerify(const string &name, const string &pwd, bool isLogin) {
if(name == "" || pwd == "") { return false; }
LOG_INFO("Verify name:%s pwd:%s", name.c_str(), pwd.c_str());
MYSQL* sql;
sqlconnRAII(&sql, sqlconnpool::Instance());
assert(sql);
bool flag = false;
unsigned int j = 0;
char order[256] = { 0 };
MYSQL_FIELD *fields = nullptr;
MYSQL_RES *res = nullptr;
if(!isLogin) { flag = true; }
/* 查询用户及密码 */
snprintf(order, 256, "SELECT username, password FROM user WHERE username='%s' LIMIT 1", name.c_str());
LOG_DEBUG("%s", order);
if(mysql_query(sql, order)) {
mysql_free_result(res);
return false;
}
res = mysql_store_result(sql);
j = mysql_num_fields(res);
fields = mysql_fetch_fields(res);
while(MYSQL_ROW row = mysql_fetch_row(res)) {
LOG_DEBUG("MYSQL ROW: %s %s", row[0], row[1]);
string password(row[1]);
/* 注册行为 且 用户名未被使用*/
if(isLogin) {
if(pwd == password) { flag = true; }
else {
flag = false;
LOG_INFO("pwd error!");
}
}
else {
flag = false;
LOG_INFO("user used!");
}
}
mysql_free_result(res);
/* 注册行为 且 用户名未被使用*/
if(!isLogin && flag == true) {
LOG_DEBUG("regirster!");
bzero(order, 256);
snprintf(order, 256,"INSERT INTO user(username, password) VALUES('%s','%s')", name.c_str(), pwd.c_str());
LOG_DEBUG( "%s", order);
if(mysql_query(sql, order)) {
LOG_DEBUG( "Insert error!");
flag = false;
}
flag = true;
}
// SqlConnPool::Instance()->FreeConn(sql);
LOG_DEBUG( "UserVerify success!!");
return flag;
}
std::string httprequest::Path() const{
return path_;
}
std::string& httprequest::Path(){
return path_;
}
std::string httprequest::Method() const {
return method_;
}
std::string httprequest::Version() const {
return version_;
}
std::string httprequest::GetPost(const std::string& key) const {
assert(key != "");
if(post_.count(key) == 1) {
return post_.find(key)->second;
}
return "";
}
std::string httprequest::GetPost(const char* key) const {
assert(key != nullptr);
if(post_.count(key) == 1) {
return post_.find(key)->second;
}
return "";
}
httpresponse.h
cpp
#ifndef HTTP_RESPONSE
#define HTTP_RESPONSE
#include <string>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unordered_map>
#include <fcntl.h>
#include "../log/log.h"
#include "../buffer/buffer.h"
class httpresponse
{
public:
httpresponse();
~httpresponse();
void Init(int code_ = -1, std::string dir_ = "", std::string path_ = "", bool isKeepAlive_ = 0);
void MakeResponse(buffer &buff);
char *File() { return mmFile; };
size_t FileLen() { return mmFileStat.st_size; };
int Code() { return code; };
void unmapFile();
std::string GetFileType();
private:
void ErrorHtml();
void AddStateLine(buffer &buff);
void AddHearder(buffer &buff);
void AddContent(buffer &buff);
void ErrorContent(buffer &buff);
int code;
std::string dir;
std::string path;
bool isKeepAlive;
char *mmFile;
struct stat mmFileStat;
static const std::unordered_map<int, std::string> CODE_PATH;
static const std::unordered_map<int, std::string> CODE_STATUS;
static const std::unordered_map<std::string, std::string> SUFFIX_TYPE;
};
#endif
httpresponse.cpp
cpp
#include "httpresponse.h"
#include <iostream>
const std::unordered_map<int, std::string> httpresponse::CODE_PATH = {
{400, "/400.html"}, {401, "/401.html"}, {404, "/404.html"}};
const std::unordered_map<int, std::string> httpresponse::CODE_STATUS = {
{200, "OK"}, {400, "Bad Request"}, {403, "Forbidden"}, {404, "Not Found"}};
const std::unordered_map<std::string, std::string> httpresponse::SUFFIX_TYPE = {
{".html", "text/html"},
{".xml", "text/xml"},
{".xhtml", "application/xhtml+xml"},
{".txt", "text/plain"},
{".rtf", "application/rtf"},
{".pdf", "application/pdf"},
{".word", "application/nsword"},
{".png", "image/png"},
{".gif", "image/gif"},
{".jpg", "image/jpeg"},
{".jpeg", "image/jpeg"},
{".au", "audio/basic"},
{".mpeg", "video/mpeg"},
{".mpg", "video/mpeg"},
{".avi", "video/x-msvideo"},
{".gz", "application/x-gzip"},
{".tar", "application/x-tar"},
{".css", "text/css "},
{".js", "text/javascript "}};
httpresponse::httpresponse()
{
code = -1;
dir = "";
path = "";
isKeepAlive = false;
mmFile = nullptr;
mmFileStat = {0};
}
httpresponse::~httpresponse()
{
unmapFile();
}
void httpresponse::unmapFile()
{
if(mmFile)
{
munmap(mmFile, mmFileStat.st_size);
mmFile = nullptr;
}
}
void httpresponse::Init(int code_, std::string dir_, std::string path_, bool isKeepAlive_)
{
code = code_;
dir = dir_;
path = path_;
isKeepAlive = isKeepAlive_;
mmFile = nullptr;
mmFileStat = {0};
}
void httpresponse::MakeResponse(buffer &buff)
{
if(stat((dir + path).data(), &mmFileStat) == -1 || S_ISDIR(mmFileStat.st_mode))
{
code = 404;
}
else if((mmFileStat.st_mode & S_IROTH) == 0)
{
code = 403;
}
else if(code == -1)
{
code = 200;
}
ErrorHtml();
AddStateLine(buff);
AddHearder(buff);
AddContent(buff);
}
void httpresponse::ErrorHtml()
{
if(CODE_PATH.count(code))
{
path = CODE_PATH.find(code)->second;
stat((dir + path).data(), &mmFileStat);
}
}
void httpresponse::AddStateLine(buffer &buff)
{
std::string status;
if(CODE_STATUS.count(code))
{
status = CODE_STATUS.find(code)->second;
}
else
{
code = 400;
status = CODE_STATUS.find(code)->second;
}
buff.Append("HTTP/1.1 " + std::to_string(code) + " " + status + "\r\n");
}
void httpresponse::AddHearder(buffer &buff)
{
buff.Append("Connection: ");
if(isKeepAlive)
{
buff.Append("keep-alive\r\n");
buff.Append("Keep-Alive: max=6, timeout=120\r\n");
}
else
{
buff.Append("close\r\n");
}
buff.Append("Content-type: " + GetFileType() + "\r\n");
}
std::string httpresponse::GetFileType()
{
std::string::size_type idx = path.find_last_of('.');
if(idx == std::string::npos)
{
return "text/plain";
}
std::string suf = path.substr(idx);
if(SUFFIX_TYPE.count(suf))
{
return SUFFIX_TYPE.find(suf)->second;
}
else
{
return "text/plain";
}
}
void httpresponse::AddContent(buffer &buff)
{
int fd = open((dir + path).data(), O_RDONLY);
if(fd == -1)
{
ErrorContent(buff);
return;
}
char *mmRef = (char*)mmap(0, mmFileStat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if(mmRef == MAP_FAILED)
{
close(fd);
ErrorContent(buff);
return;
}
LOG_DEBUG("file path is %s", (dir + path).data());
//std::cout<<(dir + path)<<"\n";
mmFile = mmRef;
close(fd);
buff.Append("Content-length: " + std::to_string(mmFileStat.st_size) + "\r\n\r\n");
}
void httpresponse::ErrorContent(buffer &buff)
{
std::string body;
std::string status;
body += "<html><title>Error</title>";
body += "<body bgcolor=\"ffffff\">";
if(CODE_STATUS.count(code) == 1) {
status = CODE_STATUS.find(code)->second;
} else {
status = "Bad Request";
}
body += std::to_string(code) + " : " + status + "\n";
body += "<p>File NotFound!</p>";
body += "<hr><em>TinyWebServer</em></body></html>";
buff.Append("Content-length: " + std::to_string(body.size()) + "\r\n\r\n");
buff.Append(body);
}
httpconn.h
cpp
#ifndef HTTP_CONN
#define HTTP_CONN
#include <stdlib.h>
#include <sys/uio.h>
#include <arpa/inet.h>
#include "../buffer/buffer.h"
#include "../log/log.h"
#include "../httprequest/httprequest.h"
#include "httpresponse.h"
class httpconn
{
friend class HttpConnTest;
public:
httpconn();
~httpconn();
void init(int fd_, sockaddr_in &addr_);
ssize_t read(int *Error);
ssize_t write(int *Error);
bool process();
int NeedWriteByte();
int get_fd() { return fd; }
int get_port() { return addr.sin_port; }
const char *get_ip() { return inet_ntoa(addr.sin_addr); }
sockaddr_in get_addr() { return addr; }
int ToWriteBytes() {
return iov[0].iov_len + iov[1].iov_len;
}
bool IsKeepAlive() const {
return request.IsKeepAlive();
}
static bool isET;
static std::atomic<int> user_cnt;
const static char *src_dir;
void Close();
private:
int fd;
struct sockaddr_in addr;
bool isClose;
buffer read_buf, write_buf;
int iov_cnt;
struct iovec iov[2];
httpresponse response;
httprequest request;
};
#endif
httpconn.cpp
cpp
#include "httpconn.h"
bool httpconn::isET = true;
std::atomic<int> httpconn::user_cnt;
const char *httpconn::src_dir = "";
httpconn::httpconn()
{
isClose = true;
fd = -1;
addr = {0};
}
httpconn::~httpconn()
{
Close();
}
void httpconn::Close()
{
response.unmapFile();
if (isClose == false)
{
isClose = true;
user_cnt--;
close(fd);
LOG_INFO("Client[%d](%s:%d) quit, UserCount:%d", fd, get_ip(), get_port(), (int)user_cnt);
}
}
void httpconn::init(int fd_, sockaddr_in &addr_)
{
assert(fd_ != -1);
isClose = false;
user_cnt++;
fd = fd_;
addr = addr_;
write_buf.Reset_buffer();
read_buf.Reset_buffer();
LOG_INFO("Client[%d](%s:%d) in, UserCount:%d", fd, get_ip(), get_port(), (int)user_cnt);
}
ssize_t httpconn::read(int *Error)
{
ssize_t len = -1;
do
{
len = read_buf.ReadFd(fd, Error);
if (len <= 0)
{
break;
}
} while (isET);
return len;
}
ssize_t httpconn::write(int *Error) {
ssize_t len = -1;
do {
len = writev(fd, iov, iov_cnt);
if (len <= 0) {
*Error = errno;
break;
}
if (iov[0].iov_len + iov[1].iov_len == 0) {
break;
}
// 计算已发送的字节在 iov[0] 和 iov[1] 中的分布
if (len > iov[0].iov_len) {
iov[1].iov_base = iov[1].iov_base + (len - iov[0].iov_len);
iov[1].iov_len -= (len - iov[0].iov_len);
if(iov[0].iov_len)
{
iov[0].iov_len=0;
write_buf.Reset_buffer();
}
} else {
// 仅发送了 iov[0] 的部分数据
iov[0].iov_base = (char*)iov[0].iov_base + len;
iov[0].iov_len -= len; // 减少 iov[0] 剩余长度(关键修正:减法)
write_buf.Retrieve(len);
}
} while (isET); // ET 模式下需要循环发送直到 EAGAIN
return len;
}
bool httpconn::process()
{
request.init();
if(read_buf.readable_Size() <= 0)
{
return false;
}
else if(request.Parse(read_buf))
{
LOG_DEBUG("%s", request.Path().c_str());
response.Init(200, src_dir, request.Path(), request.IsKeepAlive());
}
else
{
response.Init(400, src_dir, request.Path(), false);
}
response.MakeResponse(write_buf);
iov[0].iov_base = write_buf.BeginPtr();
iov[0].iov_len = write_buf.readable_Size();
iov_cnt = 1;
if(response.FileLen() > 0 && response.File()) {
iov[1].iov_base = response.File();
iov[1].iov_len = response.FileLen();
iov_cnt = 2;
}
LOG_DEBUG("filesize:%d, %d to %d", response.FileLen() , iov_cnt, iov[0].iov_len + iov[1].iov_len);
return true;
}
int httpconn::NeedWriteByte()
{
return iov[0].iov_len + iov[1].iov_len;
}
bool httpconn::IsKeepAlive()
{
return request.IsKeepAlive();
}
httprequest
这个类使用到了有限状态机,为什么用状态机呢?
有限状态机是什么:
(7 封私信 / 80 条消息) 有限状态机(FSM)详解 - 知乎
HTTP 协议是**文本协议、基于行的**。一次 HTTP 请求长这样:
GET /index.html HTTP/1.1\r\n
Host: 127.0.0.1:9006\r\n
Connection: keep-alive\r\n
\r\n
(可选 BODY)
数据可能分多次 `read()` 到达(TCP 是流式协议),所以必须用有限状态机来记录"现在解析到哪一行/哪一段了",避免因为半包/粘包而出错。
http协议具体可看这篇文章:
(7 封私信 / 80 条消息) 一篇文章搞懂http协议(超详细) - 知乎
状态定义
cpp
enum PARSE_STATE {
REQUEST_LINE, // 正在解析请求行
HEADERS, // 正在解析请求头
BODY, // 正在解析请求体
FINISH, // 解析完成
};
状态流转:
REQUEST_LINE ──(解析成功)──> HEADERS ──(空行)──> BODY ──> FINISH
Parse(buffer& buff)函数
这个是有限状态机的具体实现代码:
cpp
bool httprequest::Parse(buffer& buff) {
const char CRLF[] = "\r\n";
if(buff.readable_Size() <= 0) return false;
while(buff.readable_Size() && state_ != FINISH) {
// 在未读数据中找 \r\n,分割出一行
const char* lineEnd = search(buff.readPos_ptrConst(),
buff.writePos_ptrConst(),
CRLF, CRLF + 2);
std::string line(buff.readPos_ptrConst(), lineEnd);
switch(state_) {
case REQUEST_LINE:
if(!ParseRequestLine(line)) return false;
ParsePath();
break;
case HEADERS:
ParseHeader(line);
if(buff.readable_Size() <= 2) state_ = FINISH;
break;
case BODY:
ParseBody(line);
break;
default: break;
}
if(lineEnd == buff.writePos_ptrConst()) break; // 没读完,下次继续
buff.Retrieve(lineEnd - buff.readPos_ptr() + 2); // 跳过 \r\n
}
return true;
}
核心思想就一句话:循环找 `\r\n`,把每行交给对应状态的解析函数处理,处理完更新状态。当缓冲区里没数据了(`lineEnd == writePos`)就退出,等下一次 read 拿到更多数据再继续。
三个解析函数
请求行解析函数:`ParseRequestLine`
HTTP 请求行格式是 `METHOD PATH HTTP/VERSION`,我用了**正则**来拆:
cpp
regex patten("^([^ ]*) ([^ ]*) HTTP/([^ ]*)$");
if(regex_match(line, subMatch, patten)) {
method_ = subMatch[1];
path_ = subMatch[2];
version_ = subMatch[3];
state_ = HEADERS;
return true;
}
正则的三个分组对应方法、路径、版本。简单粗暴但代价也不小(编译+匹配开销大),后续可以改成手写 `sscanf` 或 `find`。
请求头解析函数:`ParseHeader`
格式是 `Key: Value`,同样正则:
cpp
regex patten("^([^:]*): ?(.*)$");
if(regex_match(line, subMatch, patten)) {
header_[subMatch[1]] = subMatch[2];
}
else {
state_ = BODY; // 匹配不上说明是空行或 BODY
}
所有头部存到 `unordered_map<string,string> header_`,后面判断 `Connection: keep-alive` 之类的就方便了。
请求体解析函数:`ParseBody`
Body 处理完之后,会调用 `ParsePost()` 处理表单数据。
路径归一化:`ParsePath`
cpp
void httprequest::ParsePath() {
if(path_ == "/") {
path_ = "/index.html";
} else {
for(auto &item: DEFAULT_HTML) {
if(item == path_) { path_ += ".html"; break; }
}
}
}
这套逻辑是给个人小项目用的,把 `/register` 自动补全成 `/register.html`,方便前端开发。
POST 表单解析:`ParseFromUrlencoded`
POST 表单的标准编码是 `application/x-www-form-urlencoded`,body 长这样:
username=Tom&password=123&nick=%E5%B0%8F%E6%98%8E
手写解析三种特殊字符:
| 字符 | 含义 | 处理 |
|---|---|---|
| `=` | key/value 分隔 | 切分当前 key |
| `&` | 键值对分隔 | 切分当前 value,存到 map |
| `+` | 空格 | 替换为 `' '` |
| `%XX` | URL 编码 | 调用 `ConverHex` 转成原字节 |
cpp
case '%':
num = ConverHex(body_[i+1]) * 16 + ConverHex(body_[i+2]);
body_[i] = num;
// 把后面两位左移覆盖掉
for(int j = i+1; j < (int)body_.size()-2; j++)
body_[j] = body_[j+2];
body_.resize(body_.size() - 2);
i--;
break;
`ConverHex` 处理 `A-F / a-f / 0-9` 三种情况返回 0~15。
用户校验:`UserVerify`
注册和登录最终都要查 MySQL,借助 `sqlconnpool`(连接池)取连接:
sqlconnRAII(&sql, sqlconnpool::Instance());
长连接判定
cpp
bool httprequest::IsKeepAlive() const {
if(header_.count("Connection") == 1) {
return header_.find("Connection")->second == "keep-alive"
&& version_ == "1.1";
}
return false;
}
`HTTP/1.1` 默认就是长连接,但**显式带 `Connection: close` 时要关闭**。
httpresponse 生成响应 + mmap 零拷贝
状态码与资源类型映射
响应模块维护了三张静态映射表:
cpp
CODE_PATH = { {400, "/400.html"}, {401, "/401.html"}, {404, "/404.html"} };
CODE_STATUS = { {200, "OK"}, {400, "Bad Request"}, {403, "Forbidden"}, {404, "Not Found"} };
SUFFIX_TYPE = { {".html", "text/html"}, {".png", "image/png"}, ... };
`CODE_PATH`:错误码 → 错误页面路径
`CODE_STATUS`:错误码 → 状态短语
`SUFFIX_TYPE`:文件后缀 → MIME 类型
核心函数:`MakeResponse`
cpp
void httpresponse::MakeResponse(buffer &buff) {
if(stat((dir + path).data(), &mmFileStat) == -1 || S_ISDIR(...)) code = 404;
else if((mmFileStat.st_mode & S_IROTH) == 0) code = 403;
else if(code == -1) code = 200;
ErrorHtml(); // 必要时把 path 改成 400.html / 404.html
AddStateLine(buff);
AddHearder(buff);
AddContent(buff);
}
四步走:
-
**stat 检查**:根据权限、存在性决定状态码
-
**错误页替换**:404 → `/404.html`
-
**状态行**:`HTTP/1.1 200 OK\r\n`
-
**响应头**:`Connection`、`Content-type`、后续会在 `AddContent` 补 `Content-length`
-
**响应体**:`mmap` 把文件映射进内存
`mmap` 零拷贝
这是性能优化的关键。`AddContent` 里:
cpp
int fd = open((dir + path).data(), O_RDONLY);
char *mmRef = (char*)mmap(0, mmFileStat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if(mmRef == MAP_FAILED) { close(fd); ErrorContent(buff); return; }
mmFile = mmRef;
close(fd);
buff.Append("Content-length: " + std::to_string(mmFileStat.st_size) + "\r\n\r\n");
把文件 mmap 到进程的虚拟内存后,**文件内容不进入用户缓冲区**,后面 `writev` 时直接把 mmap 指针交给内核,避免了 `read()` + `write()` 两次拷贝。`mmFile` 字段保存映射地址,析构时 `munmap` 释放。
`ErrorContent` ------ mmap 失败的兜底
如果 mmap 失败(文件突然消失、被截断等),就生成一个简单的 HTML 错误页写到 `write_buf`:
cpp
body += "<html><title>Error</title>";
body += "<body bgcolor=\"ffffff\">";
body += std::to_string(code) + " : " + status + "\n";
body += "<p>File NotFound!</p>";
`httpconn` 连接 + IO 粘合层
作用
`httpconn` 把一个 TCP 连接的所有东西打包:
套接字 `fd`、对端地址
自定义双缓冲 `read_buf / write_buf`
`iov\[\]` 用于 `writev`
一个 `httprequest` + 一个 `httpresponse`
静态成员:在线连接数 `user_cnt`、资源根目录 `src_dir`、是否 ET 模式 `isET`
`read()` ------ ET 模式必须读到 EAGAIN
cpp
ssize_t httpconn::read(int *Error) {
ssize_t len = -1;
do {
len = read_buf.ReadFd(fd, Error);
if (len <= 0) break;
} while (isET);
return len;
}
ET(边缘触发)下,**一次 epoll 唤醒只通知一次**,必须循环 `read` 直到返回 `EAGAIN`(`len <= 0` 且 `Error=EAGAIN`),否则数据会一直留在内核缓冲区没人读。
`process()` 业务主入口
cpp
bool httpconn::process() {
request.init();
if(read_buf.readable_Size() <= 0) return false;
else if(request.Parse(read_buf))
response.Init(200, src_dir, request.Path(), request.IsKeepAlive());
else
response.Init(400, src_dir, request.Path(), false);
response.MakeResponse(write_buf);
// iov[0] = write_buf (响应头)
iov[0].iov_base = write_buf.BeginPtr();
iov[0].iov_len = write_buf.readable_Size();
iov_cnt = 1;
// iov[1] = mmap 出来的文件正文
if(response.FileLen() > 0 && response.File()) {
iov[1].iov_base = response.File();
iov[1].iov_len = response.FileLen();
iov_cnt = 2;
}
return true;
}
注意**响应头写到 `write_buf`、响应体走 mmap**,所以 `iov0` 是 buffer、`iov1` 是 mmap 区。
`write()` ------ `writev` 零拷贝发送
cpp
ssize_t httpconn::write(int *Error) {
ssize_t len = -1;
do {
len = writev(fd, iov, iov_cnt);
if (len <= 0) { *Error = errno; break; }
if (iov[0].iov_len + iov[1].iov_len == 0) break;
if (len > iov[0].iov_len) {
// iov[0] 全部发完,剩余属于 iov[1]
iov[1].iov_base = (char*)iov[1].iov_base + (len - iov[0].iov_len);
iov[1].iov_len -= (len - iov[0].iov_len);
iov[0].iov_len = 0;
write_buf.Reset_buffer();
} else {
// 只发了一部分 iov[0]
iov[0].iov_base = (char*)iov[0].iov_base + len;
iov[0].iov_len -= len;
write_buf.Retrieve(len);
}
} while (isET);
return len;
}
`writev` 是关键,一次系统调用同时发送 buffer + mmap 两段内存,避免多次 `write()` + 用户态拼接。
返回的 `len` 是实际发送字节数,要回推更新两个 `iov`:
`len > iov0.iov_len`:iov0 整段发完,iov1 减掉剩余
`len <= iov0.iov_len`:只发了一部分 iov0
ET 模式下还是得循环到 `EAGAIN`。
`Close()` ------ 资源释放
cpp
void httpconn::Close() {
response.unmapFile(); // munmap
if (!isClose) {
isClose = true;
user_cnt--;
close(fd);
}
}
幂等关闭,避免重复 `--user_cnt` 引发计数错乱。
HTTP 模块完整工作流
把三个类串起来看一次完整流程:
客户端 connect,触发 epoll
接收新连接 → httpconn::init(fd, addr)
客户端发来数据 → epoll 唤醒(IN)
httpconn::read() → 循环 read 完数据
httpconn::process()
├─ request.Parse(read_buf) # 状态机解析
├─ response.MakeResponse(write_buf) # 写头、mmap 文件
└─ 设置 iov0=write_buf, iov1=mmap
httpconn::write() → writev(fd, iov) 发送
epoll 唤醒(OUT)→ 继续 write 直到 EAGAIN
若长连接 → 跳回第 3 步;否则 → httpconn::Close()
总结
整个 HTTP 模块的核心其实就是三件事:
有限状态机:解决 TCP 粘包/半包带来的解析难题
正则:简洁地拆出 HTTP 文本字段
mmap` + `writev:把零拷贝做到极致,少一次用户态 → 内核态的数据搬运
`httpconn` 作为胶水层,把 `httprequest` 和 `httpresponse` 串起来,再加上 ET 模式下的循环 IO,构成了一个轻量但完整的高性能 HTTP 处理单元。