linux编程----网络(http协议)

1.相关概念

www: 万维网(web)

一个大规模、联机式的信息储存所

URL:统一资源定位符;

URL格式:

2.http协议

(1)定义

1.定义:超文本传输协议

一准面向事务的应用协议;

端口号:80, 备用端口号:8080;

(2)HTTP通信过程

基于传输层的TCP协议;

(3)HTTP的报文格式

keep-alive (长连接):请求响应之后,连接保持一段时间;

close(短连接):请求响应之后,立即断开连接;

(4)常见请求报文的方法

(5)常见状态码

3.代码练习

搜狐网站爬虫

cs 复制代码
#include "head.h"

#define SER_PORT  80
#define SER_IP    "117.34.49.212"

int create_tcp_connect()
{
    int sockfd = socket(AF_INET, SOCK_STREAM, 0);
    if(sockfd < 0)
    {
        perror("socket error");
        return -1;
    }

    struct sockaddr_in sockaddr;
    sockaddr.sin_family = AF_INET;
    sockaddr.sin_port = htons(SER_PORT);
    sockaddr.sin_addr.s_addr = inet_addr(SER_IP);

    int ret = connect(sockfd, (struct sockaddr *)&sockaddr, sizeof(sockaddr));
    if(ret < 0)
    {
        perror("connect error");
        return -1;
    }
    return sockfd;
}

int send_http_request(int sockfd)
{
    char *buf ="GET / HTTP/1.1\r\n"
				"Host: news.sohu.com\r\n"
				"User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0\r\n"
				"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8\r\n"
				"Accept-Language: en-US,en;q=0.5\r\n"
				"Connection: close\r\n"
				"\r\n";
    int ret = send(sockfd, buf, strlen(buf), 0);
    if(ret < 0)
    {
        perror("send error");
        return -1;
    }
    return 0;
}

int recv_http_response(int sockfd)
{   
    char buf[1024] = {0};
    while(1)
    {
        int cont = recv(sockfd, buf, sizeof(buf), 0);
        if(cont < 0)
        {
            perror("recv error");
            return -1;
        }
        else if(cont == 0)
        {
            printf("server off\n");
            break;
        }
        else
        {
            write(1, buf, cont);
        }
    }
    return 0;
}

int main(int argc, char const *argv[])
{
    int sockfd = create_tcp_connect();
    send_http_request(sockfd);
    recv_http_response(sockfd);

    close(sockfd);
    return 0;
}