Linux 文本处理三剑客(grep, sed, awk)核心用法与实战

Linux 文本处理三剑客(grep, sed, awk)核心用法与实战

1. 文本处理三剑客概述

1.1 工具定位与适用场景

graph TB A[文本处理需求] --> B{处理类型} B -->|模式搜索| C[grep] B -->|流编辑| D[sed] B -->|报表生成| E[awk] C --> F[快速过滤] D --> G[批量替换] E --> H[数据提取] F --> I[结果输出] G --> I H --> I style A fill:#1e3a5f,color:#ffffff style B fill:#4a1e5f,color:#ffffff style C fill:#1e5f3a,color:#ffffff style D fill:#1e5f3a,color:#ffffff style E fill:#1e5f3a,color:#ffffff style F fill:#5f3a1e,color:#ffffff style G fill:#5f3a1e,color:#ffffff style H fill:#5f3a1e,color:#ffffff style I fill:#1e3a5f,color:#ffffff

1.2 创建测试数据文件

创建基础测试数据文件:create_test_data.sh

bash 复制代码
#!/bin/bash

# 创建测试数据脚本
set -e

echo "=== 创建文本处理测试数据 ==="

# 创建用户数据文件
cat > users.txt << 'EOF'
1,张三,25,工程师,北京,50000
2,李四,30,经理,上海,80000
3,王五,28,设计师,广州,60000
4,赵六,35,总监,深圳,120000
5,钱七,22,实习生,杭州,30000
6,孙八,40,顾问,成都,90000
7,周九,26,开发,西安,55000
8,吴十,33,产品,武汉,70000
9,郑十一,29,测试,南京,52000
10,王十二,31,运维,长沙,58000
EOF

# 创建日志文件
cat > access.log << 'EOF'
192.168.1.100 - - [10/Oct/2023:10:30:01 +0800] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0"
192.168.1.101 - - [10/Oct/2023:10:30:02 +0800] "POST /api/login HTTP/1.1" 401 567 "https://example.com" "Mozilla/5.0"
192.168.1.102 - - [10/Oct/2023:10:30:03 +0800] "GET /products.html HTTP/1.1" 200 7890 "https://example.com" "Chrome/91.0"
192.168.1.100 - - [10/Oct/2023:10:30:04 +0800] "GET /images/logo.png HTTP/1.1" 304 0 "https://example.com" "Mozilla/5.0"
192.168.1.103 - - [10/Oct/2023:10:30:05 +0800] "PUT /api/users/1 HTTP/1.1" 403 234 "https://example.com" "Firefox/89.0"
192.168.1.104 - - [10/Oct/2023:10:30:06 +0800] "GET /contact.html HTTP/1.1" 200 3456 "https://example.com" "Safari/14.0"
192.168.1.101 - - [10/Oct/2023:10:30:07 +0800] "DELETE /api/products/5 HTTP/1.1" 204 0 "https://example.com" "Mozilla/5.0"
192.168.1.105 - - [10/Oct/2023:10:30:08 +0800] "GET /about.html HTTP/1.1" 500 123 "https://example.com" "Chrome/92.0"
EOF

# 创建配置文件
cat > config.txt << 'EOF'
# 数据库配置
database.host=localhost
database.port=3306
database.name=myapp
database.user=admin
database.password=secret123

# 应用配置
app.name=MyApplication
app.version=1.0.0
app.port=8080
app.debug=true

# 日志配置
log.level=INFO
log.file=/var/log/app.log
log.max_size=100MB

# 功能开关
feature.auth=true
feature.cache=false
feature.export=true
EOF

# 创建多语言文本文件
cat > multilang.txt << 'EOF'
Hello world! 你好世界! Bonjour le monde!
This is a test. 这是一个测试。 C'est un test.
Programming is fun. 编程很有趣。 La programmation est amusante.
Linux is powerful. Linux很强大。 Linux est puissant.
Open source software. 开源软件。 Logiciel open source.
EOF

# 创建 CSV 数据文件
cat > sales.csv << 'EOF'
Date,Product,Category,Region,Sales,Quantity
2023-10-01,Laptop,Electronics,North,50000,10
2023-10-01,Phone,Electronics,South,30000,15
2023-10-02,Desk,Furniture,East,15000,5
2023-10-02,Chair,Furniture,West,8000,8
2023-10-03,Monitor,Electronics,North,20000,4
2023-10-03,Keyboard,Electronics,South,5000,10
2023-10-04,Table,Furniture,East,12000,3
2023-10-04,Books,Education,West,3000,30
EOF

echo "测试数据文件创建完成:"
echo "  users.txt      - 用户数据"
echo "  access.log     - 访问日志"
echo "  config.txt     - 配置文件"
echo "  multilang.txt  - 多语言文本"
echo "  sales.csv      - 销售数据"

2. grep - 文本搜索专家

2.1 grep 基础用法

创建 grep 基础教程:grep_basics.sh

bash 复制代码
#!/bin/bash

# grep 基础用法教程

echo "=== grep 基础用法 ==="

# 1. 基础搜索
echo -e "\n1. 基础搜索:"
echo "搜索包含'工程师'的行:"
grep '工程师' users.txt

# 2. 忽略大小写
echo -e "\n2. 忽略大小写搜索:"
echo "搜索包含'get'的行(忽略大小写):"
grep -i 'get' access.log

# 3. 显示行号
echo -e "\n3. 显示行号:"
echo "搜索'北京'并显示行号:"
grep -n '北京' users.txt

# 4. 反向搜索
echo -e "\n4. 反向搜索(不包含指定内容):"
echo "搜索不包含'GET'的行:"
grep -v 'GET' access.log

# 5. 统计匹配行数
echo -e "\n5. 统计匹配数量:"
echo "统计状态码为200的行数:"
grep -c '200' access.log

# 6. 显示匹配文件名
echo -e "\n6. 显示匹配文件名:"
echo "在多个文件中搜索:"
grep -l 'admin' *.txt

# 7. 递归搜索
echo -e "\n7. 递归目录搜索:"
echo "在当前目录递归搜索'localhost':"
grep -r 'localhost' . 2>/dev/null || echo "搜索完成"

# 8. 完整单词匹配
echo -e "\n8. 完整单词匹配:"
echo "搜索完整单词'test':"
grep -w 'test' multilang.txt

# 9. 显示匹配前后内容
echo -e "\n9. 显示上下文:"
echo "搜索'500'并显示前后2行:"
grep -C 2 '500' access.log

# 10. 只显示匹配部分
echo -e "\n10. 只显示匹配部分:"
echo "只显示匹配的IP地址:"
grep -o '[0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+' access.log

# 11. 扩展正则表达式
echo -e "\n11. 扩展正则表达式:"
echo "使用扩展正则搜索状态码:"
grep -E '(200|404|500)' access.log

# 12. 固定字符串搜索
echo -e "\n12. 固定字符串搜索:"
echo "搜索固定字符串'1.0.0':"
grep -F '1.0.0' config.txt

2.2 grep 高级用法与正则表达式

创建 grep 高级教程:grep_advanced.sh

bash 复制代码
#!/bin/bash

# grep 高级用法与正则表达式

echo "=== grep 高级用法 ==="

# 创建复杂测试文件
cat > regex_test.txt << 'EOF'
email1: john.doe@example.com
email2: jane_smith123@company.co.uk
phone1: +1-555-123-4567
phone2: (555) 987-6543
date1: 2023-10-15
date2: 10/15/2023
ip1: 192.168.1.1
ip2: 10.0.0.255
url1: https://www.example.com/path
url2: http://localhost:8080/api/v1/users
html: <div class="container">Content</div>
json: {"name": "John", "age": 30, "active": true}
credit_card: 4111-1111-1111-1111
ssn: 123-45-6789
EOF

# 1. 基础正则表达式
echo -e "\n1. 基础正则表达式:"

echo "匹配邮箱地址:"
grep -E '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' regex_test.txt

echo -e "\n匹配IP地址:"
grep -E '([0-9]{1,3}\.){3}[0-9]{1,3}' regex_test.txt

echo -e "\n匹配日期(YYYY-MM-DD):"
grep -E '[0-9]{4}-[0-9]{2}-[0-9]{2}' regex_test.txt

# 2. 字符类
echo -e "\n2. 字符类:"

echo "匹配数字:"
grep '[0-9]' regex_test.txt | head -3

echo -e "\n匹配小写字母:"
grep '[a-z]' regex_test.txt | head -3

echo -e "\n匹配单词字符:"
grep '[[:alnum:]]' regex_test.txt | head -3

# 3. 量词
echo -e "\n3. 量词:"

echo "匹配3位数字:"
grep -E '[0-9]{3}' regex_test.txt

echo -e "\n匹配1个或多个数字:"
grep -E '[0-9]+' regex_test.txt | head -3

echo -e "\n匹配0个或多个字母:"
grep -E '[a-z]*' regex_test.txt | head -3

# 4. 分组和引用
echo -e "\n4. 分组和引用:"

echo "匹配重复单词:"
echo "hello hello world test test example" | grep -E '(\b\w+\b) \1'

# 5. 锚点
echo -e "\n5. 锚点:"

echo "匹配以'email'开头的行:"
grep '^email' regex_test.txt

echo -e "\n匹配以'.com'结尾的行:"
grep '\.com$' regex_test.txt

echo -e "\n匹配完整单词'json':"
grep '\bjson\b' regex_test.txt

# 6. 交替匹配
echo -e "\n6. 交替匹配:"

echo "匹配'http'或'https':"
grep -E 'https?' regex_test.txt

echo -e "\n匹配多种日期格式:"
grep -E '([0-9]{4}-[0-9]{2}-[0-9]{2}|[0-9]{2}/[0-9]{2}/[0-9]{4})' regex_test.txt

# 7. 环视断言
echo -e "\n7. 环视断言(需要PCRE):"

echo "匹配后面跟着'@'的数字:"
grep -P '[0-9]+(?=@)' regex_test.txt 2>/dev/null || echo "PCRE不支持,使用其他方法"

# 8. 复杂模式匹配
echo -e "\n8. 复杂模式匹配:"

echo "提取URL中的域名:"
grep -oE 'https?://[^/]+' regex_test.txt

echo -e "\n提取JSON字段值:"
grep -oE '"name": "[^"]+"' regex_test.txt

# 9. grep 组合技巧
echo -e "\n9. grep 组合技巧:"

echo "搜索多个模式:"
grep -e 'email' -e 'phone' regex_test.txt

echo -e "\n排除多个模式:"
grep -v -e 'email' -e 'phone' regex_test.txt

echo -e "\n管道组合使用:"
grep 'example' regex_test.txt | grep -v 'email'

# 10. 性能优化技巧
echo -e "\n10. 性能优化技巧:"

echo "使用固定字符串加速:"
time grep -F 'example.com' regex_test.txt

echo -e "\n使用简单字符类:"
time grep '[0-9]' regex_test.txt

# 清理
rm -f regex_test.txt

echo -e "\n=== grep 高级用法演示完成 ==="

2.3 grep 实战案例

创建 grep 实战脚本:grep_practical.sh

bash 复制代码
#!/bin/bash

# grep 实战案例

echo "=== grep 实战案例 ==="

# 案例1: 日志分析
echo -e "\n案例1: 日志分析"

echo "1. 查找错误请求:"
grep -E '(404|500|403)' access.log

echo -e "\n2. 统计各状态码出现次数:"
grep -oE 'HTTP/1.1" [0-9]{3}' access.log | awk '{print $2}' | sort | uniq -c | sort -rn

echo -e "\n3. 查找特定IP的访问记录:"
grep '192.168.1.100' access.log

echo -e "\n4. 搜索特定时间段的日志:"
grep '10/Oct/2023:10:30:0[2-5]' access.log

# 案例2: 配置文件处理
echo -e "\n案例2: 配置文件处理"

echo "1. 提取所有配置项(排除注释):"
grep -v '^#' config.txt | grep '='

echo -e "\n2. 查找数据库相关配置:"
grep -i 'database' config.txt

echo -e "\n3. 提取配置值:"
grep 'app.port' config.txt | grep -oE '[0-9]+'

echo -e "\n4. 查找启用的功能:"
grep 'feature.' config.txt | grep 'true'

# 案例3: 数据提取
echo -e "\n案例3: 数据提取"

echo "1. 提取所有用户名:"
grep -oE ',[^,]+,' users.txt | grep -oE '[^,]+' | grep -vE '^[0-9]+$' | head -5

echo -e "\n2. 查找高薪员工(薪资>70000):"
grep -E ',[0-9]{5,6}$' users.txt | awk -F, '$6 > 70000'

echo -e "\n3. 统计各城市员工数量:"
grep -oE '[^,]+,' users.txt | grep -oE '[^,]+' | grep -vE '^[0-9]+$' | sort | uniq -c

# 案例4: 代码分析
echo -e "\n案例4: 代码分析(模拟)"

# 创建模拟代码文件
cat > sample_code.py << 'EOF'
#!/usr/bin/env python3
"""
示例代码文件
"""

import sys
import os
from typing import List

def calculate_sum(numbers: List[int]) -> int:
    """计算数字列表的总和"""
    total = 0
    for num in numbers:
        total += num
    return total

def read_file(filename: str) -> str:
    """读取文件内容"""
    try:
        with open(filename, 'r') as f:
            return f.read()
    except FileNotFoundError:
        print(f"错误: 文件 {filename} 不存在")
        return ""

class DataProcessor:
    """数据处理类"""
    
    def __init__(self, data: List[str]):
        self.data = data
    
    def process(self) -> List[str]:
        """处理数据"""
        result = []
        for item in self.data:
            # TODO: 实现处理逻辑
            processed = item.strip().upper()
            result.append(processed)
        return result

if __name__ == "__main__":
    # FIXME: 需要添加命令行参数解析
    numbers = [1, 2, 3, 4, 5]
    print(f"总和: {calculate_sum(numbers)}")
EOF

echo "1. 查找函数定义:"
grep -E '^def ' sample_code.py

echo -e "\n2. 查找类定义:"
grep -E '^class ' sample_code.py

echo -e "\n3. 查找TODO和FIXME注释:"
grep -E '(TODO|FIXME)' sample_code.py

echo -e "\n4. 查找导入语句:"
grep -E '^import|^from' sample_code.py

# 案例5: 系统管理
echo -e "\n案例5: 系统管理"

echo "1. 查找进程:"
ps aux | grep 'bash' | head -5

echo -e "\n2. 检查服务状态:"
systemctl list-units | grep 'running' | head -5

echo -e "\n3. 查找大文件:"
find /tmp -type f -size +1M 2>/dev/null | head -5

# 案例6: 网络分析
echo -e "\n案例6: 网络分析"

echo "1. 分析网络连接:"
netstat -tulpn 2>/dev/null | grep 'LISTEN' | head -5

# 清理
rm -f sample_code.py

echo -e "\n=== grep 实战案例演示完成 ==="

3. sed - 流编辑器大师

3.1 sed 基础用法

创建 sed 基础教程:sed_basics.sh

bash 复制代码
#!/bin/bash

# sed 基础用法教程

echo "=== sed 基础用法 ==="

# 创建测试文件
cat > sed_test.txt << 'EOF'
Hello World
This is a test file.
Welcome to Linux sed tutorial.
Python programming is fun.
Java is also popular.
We are learning text processing.
EOF

# 1. 基本替换
echo -e "\n1. 基本替换:"
echo "将'is'替换为'IS':"
sed 's/is/IS/' sed_test.txt

# 2. 全局替换
echo -e "\n2. 全局替换:"
echo "全局将'is'替换为'IS':"
sed 's/is/IS/g' sed_test.txt

# 3. 指定行替换
echo -e "\n3. 指定行替换:"
echo "只在第2行替换:'is' -> 'IS':"
sed '2s/is/IS/g' sed_test.txt

# 4. 行范围替换
echo -e "\n4. 行范围替换:"
echo "在第2-4行替换:'is' -> 'IS':"
sed '2,4s/is/IS/g' sed_test.txt

# 5. 删除行
echo -e "\n5. 删除行:"
echo "删除包含'test'的行:"
sed '/test/d' sed_test.txt

# 6. 打印特定行
echo -e "\n6. 打印特定行:"
echo "打印第3行:"
sed -n '3p' sed_test.txt

# 7. 多命令执行
echo -e "\n7. 多命令执行:"
echo "替换并删除:"
sed -e 's/is/IS/g' -e '/Java/d' sed_test.txt

# 8. 原位编辑
echo -e "\n8. 原位编辑(创建备份):"
cp sed_test.txt sed_test_backup.txt
sed -i.bak 's/Linux/UNIX/g' sed_test_backup.txt
echo "原文件:"
cat sed_test.txt | head -1
echo "修改后:"
cat sed_test_backup.txt | head -1

# 9. 插入行
echo -e "\n9. 插入行:"
echo "在第2行前插入新行:"
sed '2i\---插入的行---' sed_test.txt

# 10. 追加行
echo -e "\n10. 追加行:"
echo "在第2行后追加新行:"
sed '2a\---追加的行---' sed_test.txt

# 11. 修改行
echo -e "\n11. 修改行:"
echo "修改第3行:"
sed '3c\---修改的行内容---' sed_test.txt

# 12. 写入文件
echo -e "\n12. 写入文件:"
echo "将包含'Python'的行写入新文件:"
sed -n '/Python/w python_lines.txt' sed_test.txt
cat python_lines.txt

# 13. 读取文件
echo -e "\n13. 读取文件:"
echo "在第3行后读取其他文件内容:"
sed '3r sed_test_backup.txt' sed_test.txt | head -10

# 14. 转换字符
echo -e "\n14. 转换字符:"
echo "转换大小写:"
echo "hello world" | sed 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'

# 清理
rm -f sed_test.txt sed_test_backup.txt sed_test_backup.txt.bak python_lines.txt

echo -e "\n=== sed 基础用法演示完成 ==="

3.2 sed 高级用法

创建 sed 高级教程:sed_advanced.sh

bash 复制代码
#!/bin/bash

# sed 高级用法教程

echo "=== sed 高级用法 ==="

# 创建复杂测试文件
cat > advanced_sed_test.txt << 'EOF'
Name: John Doe
Age: 30
Email: john@example.com
Phone: 123-456-7890
Address: 123 Main St, City, State 12345

Name: Jane Smith
Age: 25
Email: jane.smith@company.com
Phone: (555) 987-6543
Address: 456 Oak Ave, Another City, State 67890

Name: Bob Johnson
Age: 35
Email: bob_j@test.org
Phone: 111.222.3333
Address: 789 Pine Rd, Different City, State 54321
EOF

# 1. 模式空间和保持空间
echo -e "\n1. 模式空间和保持空间:"

echo "交换模式空间和保持空间:"
sed -n '1h; 2x; p' advanced_sed_test.txt

# 2. 分支和跳转
echo -e "\n2. 分支和跳转:"

echo "跳过包含'Email'的行:"
sed -n '/Email/!p' advanced_sed_test.txt

# 3. 多行处理
echo -e "\n3. 多行处理:"

echo "将多行合并为一行:"
sed ':a; N; $!ba; s/\n/ /g' advanced_sed_test.txt

# 4. 高级替换
echo -e "\n4. 高级替换:"

echo "使用分组和反向引用:"
echo "123-456-7890" | sed -E 's/([0-9]{3})-([0-9]{3})-([0-9]{4})/(\1) \2-\3/'

echo -e "\n条件替换:"
sed '/Phone/ s/[0-9]/X/g' advanced_sed_test.txt

# 5. 循环和条件
echo -e "\n5. 循环和条件:"

echo "编号非空行:"
sed '/^$/!{=;d}' advanced_sed_test.txt | sed 'N;s/\n/ /'

# 6. 文件处理
echo -e "\n6. 文件处理技巧:"

echo "删除文件中的空白行:"
sed '/^$/d' advanced_sed_test.txt

echo -e "\n删除行尾空格:"
sed 's/[[:space:]]*$//' advanced_sed_test.txt

# 7. 复杂模式匹配
echo -e "\n7. 复杂模式匹配:"

echo "提取邮箱地址:"
sed -nE 's/.*Email: ([^ ]+).*/\1/p' advanced_sed_test.txt

echo -e "\n提取电话号码:"
sed -nE 's/.*Phone: ([0-9().-]+).*/\1/p' advanced_sed_test.txt

# 8. 范围操作
echo -e "\n8. 范围操作:"

echo "处理特定记录(第1-6行):"
sed -n '1,6p' advanced_sed_test.txt

echo -e "\n从模式开始到文件结束:"
sed -n '/Jane Smith/,$p' advanced_sed_test.txt

# 9. 标签和跳转
echo -e "\n9. 标签和跳转:"

echo "使用标签实现循环:"
echo "aaa bbb ccc" | sed ':loop s/a/X/; t loop'

# 10. 保持空间操作
echo -e "\n10. 保持空间操作:"

echo "复制模式空间到保持空间:"
sed -n '1h; 1p; 2g; 2p' advanced_sed_test.txt

# 11. 流控制脚本
echo -e "\n11. 流控制脚本:"

# 创建复杂的sed脚本
cat > complex_script.sed << 'EOF'
# 复杂sed脚本示例
/Name:/ {
    h  # 复制到保持空间
    d  # 删除模式空间
}
/Email:/ {
    G  # 追加保持空间到模式空间
    s/\n/ - /  # 替换换行符
    p  # 打印
}
EOF

echo "执行复杂sed脚本:"
sed -n -f complex_script.sed advanced_sed_test.txt

# 12. 实际应用案例
echo -e "\n12. 实际应用案例:"

echo "格式化电话号码:"
sed -E 's/Phone: ([0-9]{3})[-.]([0-9]{3})[-.]([0-9]{4})/Phone: (\1) \2-\3/g' advanced_sed_test.txt

echo -e "\n提取姓名和邮箱:"
sed -nE '/Name:/{s/Name: //;h}; /Email:/{s/Email: //;G;s/\n/ : /p}' advanced_sed_test.txt

# 清理
rm -f advanced_sed_test.txt complex_script.sed

echo -e "\n=== sed 高级用法演示完成 ==="

3.3 sed 实战案例

创建 sed 实战脚本:sed_practical.sh

bash 复制代码
#!/bin/bash

# sed 实战案例

echo "=== sed 实战案例 ==="

# 案例1: 日志处理
echo -e "\n案例1: 日志处理"

echo "1. 提取特定时间段的日志:"
sed -n '/10\/Oct\/2023:10:30:0[2-5]/p' access.log

echo -e "\n2. 删除调试信息:"
sed '/DEBUG/d' access.log

echo -e "\n3. 替换敏感信息:"
sed 's/192.168.1.[0-9]*/XXX.XXX.XXX.XXX/g' access.log

# 案例2: 配置文件修改
echo -e "\n案例2: 配置文件修改"

echo "1. 修改配置项:"
sed 's/app.port=8080/app.port=9090/' config.txt

echo -e "\n2. 注释掉特定配置:"
sed '/feature.cache/s/^/# /' config.txt

echo -e "\n3. 取消注释:"
sed '/# database.host/s/^# //' config.txt

# 案例3: 数据清洗
echo -e "\n案例3: 数据清洗"

echo "1. 标准化CSV格式:"
sed 's/, /,/g' sales.csv | sed 's/^[[:space:]]*//' | sed 's/[[:space:]]*$//'

echo -e "\n2. 删除空行:"
sed '/^$/d' sales.csv

echo -e "\n3. 转换日期格式:"
sed 's|\([0-9]\{4\}\)-\([0-9]\{2\}\)-\([0-9]\{2\}\)|\2/\3/\1|g' sales.csv

# 案例4: 代码重构
echo -e "\n案例4: 代码重构"

# 创建模拟代码文件
cat > refactor_code.py << 'EOF'
def old_function_name():
    print("This is old function")
    
def another_old_function():
    print("Another old function")
    
# 调用旧函数
old_function_name()
another_old_function()
EOF

echo "1. 重命名函数:"
sed 's/old_function_name/new_function_name/g' refactor_code.py

echo -e "\n2. 添加日志:"
sed '/def /a\    print("Function called")' refactor_code.py

# 案例5: 文本格式化
echo -e "\n案例5: 文本格式化"

echo "1. 添加行号:"
sed = users.txt | sed 'N;s/\n/ /'

echo -e "\n2. 每N行添加分隔符:"
sed '3~3a\---' users.txt

echo -e "\n3. 文本对齐:"
sed 's/^/    /' users.txt | head -3

# 案例6: 批量文件处理
echo -e "\n案例6: 批量文件处理"

# 创建多个测试文件
for i in {1..3}; do
    echo "File $i content" > "test_file_$i.txt"
    echo "version=1.0" >> "test_file_$i.txt"
done

echo "批量修改文件内容:"
for file in test_file_*.txt; do
    echo "处理文件: $file"
    sed -i 's/version=1.0/version=2.0/' "$file"
    cat "$file"
done

# 案例7: 数据提取和转换
echo -e "\n案例7: 数据提取和转换"

echo "1. 提取薪资大于60000的员工:"
sed -n '/,[0-9]\{5,\}$/p' users.txt | awk -F, '$6 > 60000'

echo -e "\n2. 生成SQL插入语句:"
sed '1d; s/\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\)/INSERT INTO users VALUES(\1, \"\2\", \3, \"\4\", \"\5\", \6);/' users.txt

# 案例8: 复杂文本转换
echo -e "\n案例8: 复杂文本转换"

echo "转换多语言文本格式:"
sed -E 's/([^!])!([^!])/\1\n\2/g' multilang.txt

# 清理
rm -f refactor_code.py test_file_*.txt

echo -e "\n=== sed 实战案例演示完成 ==="

4. awk - 文本处理编程语言

4.1 awk 基础用法

创建 awk 基础教程:awk_basics.sh

bash 复制代码
#!/bin/bash

# awk 基础用法教程

echo "=== awk 基础用法 ==="

# 1. 基本打印
echo -e "\n1. 基本打印:"
echo "打印整个文件:"
awk '{print}' users.txt

echo -e "\n打印第一列:"
awk '{print $1}' users.txt

echo -e "\n打印多列:"
awk '{print $1, $3}' users.txt

# 2. 字段分隔符
echo -e "\n2. 字段分隔符:"

echo "使用逗号分隔符:"
awk -F, '{print $2, $6}' users.txt

echo -e "\n使用多个分隔符:"
echo "apple,banana;cherry" | awk -F'[,;]' '{print $1, $2, $3}'

# 3. 模式匹配
echo -e "\n3. 模式匹配:"

echo "匹配包含'北京'的行:"
awk '/北京/' users.txt

echo -e "\n匹配特定字段:"
awk -F, '$3 > 28' users.txt

# 4. 内置变量
echo -e "\n4. 内置变量:"

echo "行号:"
awk '{print NR, $0}' users.txt | head -3

echo -e "\n字段数量:"
awk -F, '{print NF, $0}' users.txt | head -3

echo -e "\n文件名:"
awk 'END{print FILENAME}' users.txt

# 5. BEGIN 和 END 块
echo -e "\n5. BEGIN 和 END 块:"

echo "添加表头:"
awk -F, 'BEGIN {print "ID\t姓名\t年龄\t职位"} {print $1"\t"$2"\t"$3"\t"$4} END {print "=== 结束 ==="}' users.txt

# 6. 变量和计算
echo -e "\n6. 变量和计算:"

echo "计算平均年龄:"
awk -F, '{sum += $3; count++} END {print "平均年龄:", sum/count}' users.txt

echo -e "\n薪资统计:"
awk -F, '{sum += $6; if($6 > max) max = $6} END {print "总薪资:", sum, "最高薪资:", max}' users.txt

# 7. 条件语句
echo -e "\n7. 条件语句:"

echo "薪资分类:"
awk -F, '{
    if ($6 > 80000) 
        print $2, "高薪"
    else if ($6 > 50000) 
        print $2, "中薪"
    else 
        print $2, "低薪"
}' users.txt

# 8. 循环
echo -e "\n8. 循环:"

echo "遍历字段:"
awk -F, '{
    printf "行 %d: ", NR
    for(i=1; i<=NF; i++) 
        printf "[%s] ", $i
    print ""
}' users.txt | head -3

# 9. 数组
echo -e "\n9. 数组:"

echo "按城市统计人数:"
awk -F, '{
    city[$5]++
} END {
    for(c in city) 
        print c, city[c]
}' users.txt

# 10. 字符串函数
echo -e "\n10. 字符串函数:"

echo "字符串操作:"
awk -F, '{
    print "原始:", $2, "大写:", toupper($2), "长度:", length($2)
}' users.txt | head -3

# 11. 数学函数
echo -e "\n11. 数学函数:"

echo "数学运算:"
awk -F, '{
    print $2, "薪资:", $6, "平方根:", sqrt($6)
}' users.txt | head -3

# 12. 输出格式控制
echo -e "\n12. 输出格式控制:"

echo "格式化输出:"
awk -F, 'BEGIN {printf "%-10s %-8s %-10s\n", "姓名", "年龄", "薪资"} 
{printf "%-10s %-8d %-10.2f\n", $2, $3, $6}' users.txt

echo -e "\n=== awk 基础用法演示完成 ==="

4.2 awk 高级用法

创建 awk 高级教程:awk_advanced.sh

bash 复制代码
#!/bin/bash

# awk 高级用法教程

echo "=== awk 高级用法 ==="

# 创建复杂测试数据
cat > advanced_awk_test.txt << 'EOF'
2023-10-01 08:30:25 INFO [UserService] User login successful: user_id=123
2023-10-01 08:45:12 ERROR [PaymentService] Payment failed: amount=500.00, reason=insufficient_funds
2023-10-01 09:15:33 WARN [AuthService] Multiple failed login attempts: ip=192.168.1.100
2023-10-01 10:20:45 INFO [OrderService] New order created: order_id=456, amount=299.99
2023-10-01 11:05:17 ERROR [DatabaseService] Connection timeout: retry_count=3
2023-10-01 14:30:22 INFO [InventoryService] Stock updated: product_id=789, quantity=50
EOF

# 1. 复杂字段分割
echo -e "\n1. 复杂字段分割:"

echo "多字符分隔符:"
awk -F'[][]' '{print "服务:", $2, "消息:", $3}' advanced_awk_test.txt

echo -e "\n正则表达式分隔符:"
awk -F'[=,]' '{for(i=1;i<=NF;i++) if($i~/[a-z]_id/) print $i}' advanced_awk_test.txt

# 2. 关联数组
echo -e "\n2. 关联数组:"

echo "统计日志级别:"
awk '{
    split($3, level, " ")
    levels[level[1]]++
} END {
    for(l in levels) 
        printf "%-6s: %d\n", l, levels[l]
}' advanced_awk_test.txt

# 3. 多维数组
echo -e "\n3. 多维数组:"

echo "按服务和级别统计:"
awk '{
    split($0, parts, "[][]")
    service = parts[2]
    level = $3
    stats[service][level]++
} END {
    for(service in stats) {
        print "服务:", service
        for(level in stats[service]) {
            print "  ", level, ":", stats[service][level]
        }
    }
}' advanced_awk_test.txt

# 4. 自定义函数
echo -e "\n4. 自定义函数:"

echo "使用自定义函数:"
awk '
function extract_number(str) {
    match(str, /[0-9]+(\.[0-9]+)?/)
    return substr(str, RSTART, RLENGTH)
}
{
    for(i=1; i<=NF; i++) {
        if($i ~ /amount=/) {
            amount = extract_number($i)
            print "金额:", amount
        }
    }
}' advanced_awk_test.txt

# 5. 模式范围
echo -e "\n5. 模式范围:"

echo "处理特定时间范围:"
awk '$2 >= "09:00:00" && $2 <= "11:00:00"' advanced_awk_test.txt

# 6. 输出重定向
echo -e "\n6. 输出重定向:"

echo "按级别输出到不同文件:"
awk '
$3 ~ /INFO/ {print > "info.log"}
$3 ~ /ERROR/ {print > "error.log"} 
$3 ~ /WARN/ {print > "warn.log"}
' advanced_awk_test.txt

echo "INFO日志:"
cat info.log
echo "ERROR日志:"
cat error.log

# 7. 管道输出
echo -e "\n7. 管道输出:"

echo "排序输出:"
awk '{print $3, $5}' advanced_awk_test.txt | sort

# 8. 系统命令集成
echo -e "\n8. 系统命令集成:"

echo "在awk中执行系统命令:"
awk '{
    "date +%Y-%m-%d" | getline current_date
    close("date +%Y-%m-%d")
    if($1 == current_date) 
        print "今天日志:", $0
}' advanced_awk_test.txt

# 9. 复杂数据处理
echo -e "\n9. 复杂数据处理:"

echo "提取和计算数值:"
awk '{
    total = 0
    count = 0
    for(i=1; i<=NF; i++) {
        if($i ~ /amount=([0-9.]+)/) {
            match($i, /amount=([0-9.]+)/, arr)
            total += arr[1]
            count++
        }
    }
    if(count > 0) {
        print "总金额:", total, "平均金额:", total/count
    }
}' advanced_awk_test.txt

# 10. 报表生成
echo -e "\n10. 报表生成:"

echo "生成统计报表:"
awk '
BEGIN {
    printf "%-20s %-10s %-10s\n", "服务", "INFO", "ERROR"
    printf "%-20s %-10s %-10s\n", "---", "---", "---"
}
{
    split($0, parts, "[][]")
    service = parts[2]
    if($3 ~ /INFO/) info[service]++
    if($3 ~ /ERROR/) error[service]++
} 
END {
    for(service in info) {
        printf "%-20s %-10d %-10d\n", service, info[service], error[service]
    }
}' advanced_awk_test.txt

# 11. 数据验证
echo -e "\n11. 数据验证:"

echo "验证数据格式:"
awk '
function is_valid_ip(ip) {
    return ip ~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/
}
{
    for(i=1; i<=NF; i++) {
        if($i ~ /ip=/) {
            split($i, ip_parts, "=")
            ip = ip_parts[2]
            if(is_valid_ip(ip)) {
                print "有效IP:", ip
            } else {
                print "无效IP:", ip
            }
        }
    }
}' advanced_awk_test.txt

# 清理
rm -f info.log error.log warn.log advanced_awk_test.txt

echo -e "\n=== awk 高级用法演示完成 ==="

4.3 awk 实战案例

创建 awk 实战脚本:awk_practical.sh

bash 复制代码
#!/bin/bash

# awk 实战案例

echo "=== awk 实战案例 ==="

# 案例1: 销售数据分析
echo -e "\n案例1: 销售数据分析"

echo "1. 按产品分类统计:"
awk -F, '
NR > 1 {
    products[$2] += $5
    quantity[$2] += $6
} 
END {
    printf "%-15s %-12s %-10s %-12s\n", "产品", "总销售额", "总数量", "平均单价"
    printf "%-15s %-12s %-10s %-12s\n", "---", "---", "---", "---"
    for(p in products) {
        avg = products[p] / quantity[p]
        printf "%-15s %-12.2f %-10d %-12.2f\n", p, products[p], quantity[p], avg
    }
}' sales.csv

echo -e "\n2. 按地区统计:"
awk -F, '
NR > 1 {
    regions[$4] += $5
}
END {
    print "=== 地区销售统计 ==="
    for(r in regions) {
        printf "%-10s: %.2f\n", r, regions[r]
    }
}' sales.csv

# 案例2: 系统监控报告
echo -e "\n案例2: 系统监控报告"

# 创建系统数据模拟
cat > system_stats.txt << 'EOF'
CPU 25% MEM 45% DISK 78% NET 120KB/s
CPU 30% MEM 48% DISK 79% NET 150KB/s
CPU 28% MEM 46% DISK 78% NET 130KB/s
CPU 35% MEM 50% DISK 80% NET 200KB/s
CPU 40% MEM 52% DISK 81% NET 180KB/s
EOF

echo "系统资源分析:"
awk '{
    cpu_sum += $2
    mem_sum += $4  
    disk_sum += $6
    net_sum += $8
    count++
} 
END {
    print "=== 系统资源统计 ==="
    printf "CPU平均使用率: %.1f%%\n", cpu_sum/count
    printf "内存平均使用率: %.1f%%\n", mem_sum/count
    printf "磁盘平均使用率: %.1f%%\n", disk_sum/count
    printf "网络平均速度: %.1fKB/s\n", net_sum/count
}' system_stats.txt

# 案例3: 日志分析
echo -e "\n案例3: 日志分析"

echo "1. HTTP状态码统计:"
awk '{
    match($0, /HTTP\/1\.1" ([0-9]{3})/, arr)
    if(arr[1] != "") {
        status_codes[arr[1]]++
    }
} 
END {
    print "=== HTTP状态码统计 ==="
    for(code in status_codes) {
        printf "状态码 %s: %d次\n", code, status_codes[code]
    }
}' access.log

echo -e "\n2. IP访问频率:"
awk '{
    ip = $1
    ips[ip]++
} 
END {
    print "=== IP访问频率 ==="
    for(ip in ips) {
        printf "%-15s: %d次\n", ip, ips[ip]
    }
}' access.log | sort -k2 -nr

# 案例4: 配置解析
echo -e "\n案例4: 配置解析"

echo "解析配置文件:"
awk -F= '
/^[^#]/ && NF == 2 {
    config[$1] = $2
} 
END {
    print "=== 配置信息 ==="
    for(key in config) {
        printf "%-20s: %s\n", key, config[key]
    }
}' config.txt

# 案例5: 数据转换
echo -e "\n案例5: 数据转换"

echo "生成JSON格式:"
awk -F, '
BEGIN {
    print "["
}
NR > 1 {
    if(NR > 2) print ","
    printf "  {\n"
    printf "    \"id\": %s,\n", $1
    printf "    \"name\": \"%s\",\n", $2
    printf "    \"age\": %s,\n", $3
    printf "    \"position\": \"%s\",\n", $4
    printf "    \"city\": \"%s\",\n", $5
    printf "    \"salary\": %s\n", $6
    printf "  }"
}
END {
    print "\n]"
}' users.txt

# 案例6: 文本处理
echo -e "\n案例6: 文本处理"

echo "多语言文本分析:"
awk '{
    # 统计中文字符
    chinese_chars = gsub(/[\\u4e00-\\u9fff]/, "&")
    # 统计英文字符
    english_words = gsub(/[a-zA-Z]+/, "&")
    # 统计法文字符
    french_chars = gsub(/[éèêëàâæçîïôœùûüÿ]/, "&")
    
    printf "行 %d: 中文%d个 英文%d个 法文%d个\n", NR, chinese_chars, english_words, french_chars
}' multilang.txt

# 案例7: 复杂计算
echo -e "\n案例7: 复杂计算"

echo "薪资分析报告:"
awk -F, '
BEGIN {
    print "=== 薪资分析报告 ==="
    printf "%-10s %-8s %-10s %-12s\n", "姓名", "年龄", "薪资", "等级"
    printf "%-10s %-8s %-10s %-12s\n", "---", "---", "---", "---"
}
NR > 0 {
    salary = $6
    age = $3
    
    # 薪资等级
    if(salary >= 100000) grade = "A"
    else if(salary >= 70000) grade = "B" 
    else if(salary >= 50000) grade = "C"
    else grade = "D"
    
    # 统计
    total_salary += salary
    count++
    if(salary > max_salary) max_salary = salary
    if(salary < min_salary || min_salary == 0) min_salary = salary
    
    # 年龄组统计
    if(age < 25) age_group["<25"]++
    else if(age < 30) age_group["25-29"]++
    else if(age < 35) age_group["30-34"]++
    else age_group[">=35"]++
    
    printf "%-10s %-8d %-10d %-12s\n", $2, age, salary, grade
}
END {
    print "\n=== 统计摘要 ==="
    printf "员工总数: %d\n", count
    printf "平均薪资: %.2f\n", total_salary/count
    printf "最高薪资: %d\n", max_salary
    printf "最低薪资: %d\n", min_salary
    
    print "\n=== 年龄分布 ==="
    for(group in age_group) {
        printf "%s岁: %d人\n", group, age_group[group]
    }
}' users.txt

# 清理
rm -f system_stats.txt

echo -e "\n=== awk 实战案例演示完成 ==="

5. 三剑客组合应用

5.1 工具组合工作流

创建组合应用脚本:text_processing_workflow.sh

bash 复制代码
#!/bin/bash

# 三剑客组合应用工作流

echo "=== 文本处理三剑客组合应用 ==="

# 工作流1: 日志分析管道
echo -e "\n工作流1: 日志分析管道"

echo "分析错误请求并统计:"
grep -E '(404|500|403)' access.log | \
awk '{
    match($0, /([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+).*"([A-Z]+) ([^"]+).* ([0-9]{3})/, arr)
    print arr[1], arr[2], arr[3], arr[4]
}' | \
sort | \
uniq -c | \
sort -rn | \
head -10

# 工作流2: 数据清洗和转换
echo -e "\n工作流2: 数据清洗和转换"

echo "用户数据清洗和统计:"
sed 's/, /,/g' users.txt | \
awk -F, '
NR > 0 {
    gsub(/^[[:space:]]+|[[:space:]]+$/, "", $2)  # 清理姓名空格
    gsub(/^[[:space:]]+|[[:space:]]+$/, "", $4)  # 清理职位空格
    print $1","$2","$3","$4","$5","$6
}' | \
awk -F, '{
    # 按城市统计薪资
    city_salary[$5] += $6
    city_count[$5]++
} 
END {
    print "=== 各城市平均薪资 ==="
    for(city in city_salary) {
        avg = city_salary[city] / city_count[city]
        printf "%-8s: %.2f\n", city, avg
    }
}'

# 工作流3: 配置文件处理
echo -e "\n工作流3: 配置文件处理"

echo "生成配置文档:"
grep -v '^#' config.txt | \
sed 's/^# //' | \
awk -F= '{
    if(NF == 2) {
        category = substr($1, 1, index($1, ".")-1)
        key = substr($1, index($1, ".")+1)
        config[category][key] = $2
    }
} 
END {
    print "# 配置文档"
    print "生成时间: '$(date)'"
    print ""
    
    for(category in config) {
        print "## " category
        print ""
        for(key in config[category]) {
            printf "%-20s: %s\n", key, config[category][key]
        }
        print ""
    }
}'

# 工作流4: 复杂文本转换
echo -e "\n工作流4: 复杂文本转换"

echo "多语言文本分析报告:"
cat multilang.txt | \
sed 's/! /!\n/g' | \
awk '{
    # 分离不同语言
    split($0, parts, " ")
    chinese_count = 0
    english_count = 0
    french_count = 0
    
    for(i in parts) {
        if(parts[i] ~ /[\u4e00-\u9fff]/) chinese_count++
        else if(parts[i] ~ /^[A-Za-z]/) english_count++
        else if(parts[i] ~ /[éèêëàâæçîïôœùûüÿ]/) french_count++
    }
    
    print "行", NR, "- 中文:", chinese_count, "英文:", english_count, "法文:", french_count
}'

# 工作流5: 性能监控管道
echo -e "\n工作流5: 系统监控管道"

echo "模拟系统监控分析:"
# 创建监控数据
for i in {1..10}; do
    echo "CPU $((20 + RANDOM % 30))% MEM $((40 + RANDOM % 20))% DISK $((70 + RANDOM % 15))%"
done > monitor.log

cat monitor.log | \
awk '{
    gsub(/%/, "", $2)
    gsub(/%/, "", $4) 
    gsub(/%/, "", $6)
    
    cpu_sum += $2
    mem_sum += $4
    disk_sum += $6
    count++
    
    # 告警检测
    if($2 > 80) print "警告: CPU使用率过高 - "$2"%"
    if($4 > 85) print "警告: 内存使用率过高 - "$4"%"
    if($6 > 90) print "警告: 磁盘使用率过高 - "$6"%"
} 
END {
    print "=== 平均使用率 ==="
    printf "CPU: %.1f%%\n", cpu_sum/count
    printf "内存: %.1f%%\n", mem_sum/count
    printf "磁盘: %.1f%%\n", disk_sum/count
}'

# 工作流6: 数据报表生成
echo -e "\n工作流6: 销售数据报表"

awk -F, '
BEGIN {
    print "=== 销售数据分析报告 ==="
    print "生成时间: '$(date)'"
    print ""
}
NR == 1 {
    # 跳过标题行
    next
}
{
    # 数据统计
    date_sales[$1] += $5
    product_sales[$2] += $5
    category_sales[$3] += $5
    region_sales[$4] += $5
    total_sales += $5
    total_quantity += $6
    record_count++
}
END {
    # 日期分析
    print "## 按日期统计"
    for(date in date_sales) {
        printf "%-12s: %8.2f\n", date, date_sales[date]
    }
    print ""
    
    # 产品分析
    print "## 按产品统计"
    for(product in product_sales) {
        printf "%-15s: %8.2f\n", product, product_sales[product]
    }
    print ""
    
    # 分类分析
    print "## 按分类统计"
    for(category in category_sales) {
        printf "%-15s: %8.2f\n", category, category_sales[category]
    }
    print ""
    
    # 地区分析  
    print "## 按地区统计"
    for(region in region_sales) {
        printf "%-10s: %8.2f\n", region, region_sales[region]
    }
    print ""
    
    # 汇总信息
    print "## 汇总信息"
    printf "总销售额: %.2f\n", total_sales
    printf "总数量: %d\n", total_quantity
    printf "平均单价: %.2f\n", total_sales/total_quantity
    printf "记录数量: %d\n", record_count
}' sales.csv

# 清理
rm -f monitor.log

echo -e "\n=== 组合应用演示完成 ==="

5.2 性能优化与最佳实践

创建优化指南:performance_tips.sh

bash 复制代码
#!/bin/bash

# 性能优化与最佳实践

echo "=== 文本处理性能优化与最佳实践 ==="

# 创建大文件测试性能
echo "创建大测试文件..."
for i in {1..10000}; do
    echo "Line $i: This is test data with some pattern $((i % 100)) and more content here." >> large_test.txt
done

echo -e "\n1. grep 性能优化:"

echo "使用固定字符串搜索:"
time grep -F 'pattern 50' large_test.txt > /dev/null

echo -e "\n使用简单正则:"
time grep 'pattern 50' large_test.txt > /dev/null

echo -e "\n使用扩展正则:"
time grep -E 'pattern (50|51)' large_test.txt > /dev/null

echo -e "\n2. sed 性能优化:"

echo "使用简单替换:"
time sed 's/pattern/PATTERN/g' large_test.txt > /dev/null

echo -e "\n使用复杂替换:"
time sed -E 's/pattern ([0-9]+)/PATTERN \1/g' large_test.txt > /dev/null

echo -e "\n3. awk 性能优化:"

echo "使用字段分割:"
time awk '{print $3}' large_test.txt > /dev/null

echo -e "\n使用正则分割:"
time awk -F'[: ]' '{print $3}' large_test.txt > /dev/null

echo -e "\n4. 管道优化技巧:"

echo "减少管道数量:"
time cat large_test.txt | grep 'pattern' | sed 's/pattern/PATTERN/' > /dev/null

echo -e "\n合并处理命令:"
time awk '/pattern/ {gsub(/pattern/, "PATTERN"); print}' large_test.txt > /dev/null

echo -e "\n5. 内存使用优化:"

echo "流式处理大文件:"
time while read line; do
    echo "$line" | grep -q 'pattern 50' && echo "$line"
done < large_test.txt > /dev/null

echo -e "\n使用高效工具组合:"
time grep 'pattern 50' large_test.txt | head -100 > /dev/null

# 最佳实践示例
echo -e "\n6. 最佳实践示例:"

echo "错误处理:"
grep 'nonexistent' large_test.txt || echo "没有找到匹配内容"

echo -e "\n输出控制:"
awk 'NR % 1000 == 0 {print "进度:", NR}' large_test.txt

echo -e "\n资源清理:"
trap 'rm -f temp_file.txt' EXIT

echo -e "\n7. 调试技巧:"

echo "步骤调试:"
grep 'pattern 50' large_test.txt | \
sed 's/pattern/PATTERN/' | \
awk '{print "处理结果:", $0}' | \
head -3

echo -e "\n变量跟踪:"
awk '{
    if(NR % 1000 == 0) {
        print "处理行数:", NR > "/dev/stderr"
    }
    print $0
}' large_test.txt > /dev/null

# 清理
rm -f large_test.txt

echo -e "\n=== 性能优化指南完成 ==="

6. 综合实战项目

6.1 完整的日志分析系统

创建日志分析系统:log_analysis_system.sh

bash 复制代码
#!/bin/bash

# 完整的日志分析系统

echo "=== 日志分析系统 ==="

# 创建更丰富的日志数据
cat > extended_access.log << 'EOF'
192.168.1.100 - john [10/Oct/2023:10:30:01 +0800] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0" 0.123
192.168.1.101 - jane [10/Oct/2023:10:30:02 +0800] "POST /api/login HTTP/1.1" 401 567 "https://example.com" "Mozilla/5.0" 0.456
192.168.1.102 - bob [10/Oct/2023:10:30:03 +0800] "GET /products.html HTTP/1.1" 200 7890 "https://example.com" "Chrome/91.0" 0.234
192.168.1.100 - john [10/Oct/2023:10:30:04 +0800] "GET /images/logo.png HTTP/1.1" 304 0 "https://example.com" "Mozilla/5.0" 0.078
192.168.1.103 - alice [10/Oct/2023:10:30:05 +0800] "PUT /api/users/1 HTTP/1.1" 403 234 "https://example.com" "Firefox/89.0" 0.345
192.168.1.104 - charlie [10/Oct/2023:10:30:06 +0800] "GET /contact.html HTTP/1.1" 200 3456 "https://example.com" "Safari/14.0" 0.189
192.168.1.101 - jane [10/Oct/2023:10:30:07 +0800] "DELETE /api/products/5 HTTP/1.1" 204 0 "https://example.com" "Mozilla/5.0" 0.267
192.168.1.105 - david [10/Oct/2023:10:30:08 +0800] "GET /about.html HTTP/1.1" 500 123 "https://example.com" "Chrome/92.0" 0.412
192.168.1.106 - eve [10/Oct/2023:10:30:09 +0800] "GET /admin/dashboard HTTP/1.1" 200 4567 "https://example.com" "Mozilla/5.0" 0.156
192.168.1.107 - frank [10/Oct/2023:10:30:10 +0800] "POST /api/orders HTTP/1.1" 201 789 "https://example.com" "Chrome/93.0" 0.298
EOF

# 分析函数定义
analyze_traffic() {
    echo "=== 流量分析 ==="
    
    echo -e "\n1. 总请求数:"
    wc -l extended_access.log | awk '{print $1}'
    
    echo -e "\n2. 请求方法分布:"
    awk -F'"' '{print $2}' extended_access.log | awk '{methods[$1]++} END {for(m in methods) print m, methods[m]}' | sort -k2 -nr
    
    echo -e "\n3. HTTP状态码分布:"
    awk '{print $9}' extended_access.log | sort | uniq -c | sort -rn
    
    echo -e "\n4. 用户代理分布:"
    awk -F'"' '{print $6}' extended_access.log | sort | uniq -c | sort -rn | head -5
}

analyze_performance() {
    echo -e "\n=== 性能分析 ==="
    
    echo -e "\n1. 平均响应时间:"
    awk '{response_time = $(NF); sum += response_time; count++} END {printf "%.3f秒\n", sum/count}' extended_access.log
    
    echo -e "\n2. 最慢的请求:"
    awk '{print $(NF), $0}' extended_access.log | sort -rn | head -3 | awk '{$1=""; print}'
    
    echo -e "\n3. 按端点统计平均响应时间:"
    awk -F'"' '{ 
        split($2, parts, " ")
        endpoint = parts[2]
        response_time = $(NF)
        total_time[endpoint] += response_time
        count[endpoint]++
    } END {
        for(ep in total_time) {
            avg = total_time[ep] / count[ep]
            printf "%-20s: %.3f秒 (%d次)\n", ep, avg, count[ep]
        }
    }' extended_access.log | sort -k2 -nr
}

analyze_security() {
    echo -e "\n=== 安全分析 ==="
    
    echo -e "\n1. 失败登录尝试:"
    grep 'POST /api/login.*401' extended_access.log
    
    echo -e "\n2. 权限拒绝访问:"
    awk '/ 403 /' extended_access.log
    
    echo -e "\n3. 服务器错误:"
    awk '/ 500 /' extended_access.log
    
    echo -e "\n4. 管理员访问:"
    grep '/admin' extended_access.log
}

analyze_users() {
    echo -e "\n=== 用户行为分析 ==="
    
    echo -e "\n1. 活跃用户:"
    awk '{print $3}' extended_access.log | grep -v '^-$' | sort | uniq -c | sort -rn
    
    echo -e "\n2. 用户会话分析:"
    awk '{
        ip = $1
        user = $3
        if(user != "-") {
            user_requests[user]++
            user_ips[user][ip]++
        }
    } END {
        for(user in user_requests) {
            ip_count = 0
            for(ip in user_ips[user]) ip_count++
            printf "用户: %-10s 请求数: %-3d IP数: %d\n", user, user_requests[user], ip_count
        }
    }' extended_access.log | sort -k4 -nr
}

generate_report() {
    echo -e "\n=== 分析报告生成 ==="
    
    cat > log_report.txt << 'EOF'
# 网站访问日志分析报告
生成时间: $(date)

## 执行摘要
EOF

    # 汇总信息
    {
        echo "总请求数: $(wc -l < extended_access.log)"
        echo "成功请求(2xx): $(grep -c ' 2[0-9][0-9] ' extended_access.log)"
        echo "客户端错误(4xx): $(grep -c ' 4[0-9][0-9] ' extended_access.log)"
        echo "服务器错误(5xx): $(grep -c ' 5[0-9][0-9] ' extended_access.log)"
        echo "平均响应时间: $(awk '{sum += $(NF)} END {printf "%.3f秒", sum/NR}' extended_access.log)"
    } >> log_report.txt
    
    # 详细分析
    echo -e "\n## 详细分析" >> log_report.txt
    
    echo -e "\n### 流量统计" >> log_report.txt
    awk -F'"' '{print $2}' extended_access.log | awk '{methods[$1]++} END {for(m in methods) printf "%s: %d\n", m, methods[m]}' >> log_report.txt
    
    echo -e "\n### 性能统计" >> log_report.txt
    awk '{
        status = $9
        response_time = $(NF)
        status_time[status] += response_time
        status_count[status]++
    } END {
        for(s in status_count) {
            avg = status_time[s] / status_count[s]
            printf "状态码 %s: 平均 %.3f秒 (%d次)\n", s, avg, status_count[s]
        }
    }' extended_access.log >> log_report.txt
    
    echo "报告已生成: log_report.txt"
}

# 执行分析
analyze_traffic
analyze_performance
analyze_security
analyze_users
generate_report

# 清理
rm -f extended_access.log

echo -e "\n=== 日志分析系统演示完成 ==="

6.2 数据转换和ETL管道

创建ETL管道:etl_pipeline.sh

bash 复制代码
#!/bin/bash

# 数据转换和ETL管道

echo "=== 数据转换和ETL管道 ==="

# 创建原始数据
cat > raw_data.csv << 'EOF'
id|name|age|salary|department|join_date
1|John Doe|30|50000|Engineering|2020-01-15
2|Jane Smith|25|45000|Marketing|2021-03-20
3|Bob Johnson|35|60000|Engineering|2019-11-10
4|Alice Brown|28|52000|Sales|2022-02-28
5|Charlie Wilson|42|75000|Management|2018-05-15
6|Diana Lee|26|48000|Engineering|2021-07-01
7|Edward Zhang|33|55000|Sales|2020-09-10
8|Fiona Chen|29|51000|Marketing|2022-01-05
EOF

# ETL管道函数
extract_data() {
    echo "=== 数据提取阶段 ==="
    
    # 验证数据格式
    if [[ ! -f raw_data.csv ]]; then
        echo "错误: 数据文件不存在"
        exit 1
    fi
    
    # 检查数据完整性
    local line_count=$(wc -l < raw_data.csv)
    if [[ $line_count -lt 2 ]]; then
        echo "错误: 数据文件为空或只有标题行"
        exit 1
    fi
    
    echo "数据文件验证通过,共 $line_count 行"
    
    # 提取数据
    cp raw_data.csv extracted_data.csv
    echo "数据提取完成"
}

transform_data() {
    echo -e "\n=== 数据转换阶段 ==="
    
    # 数据清洗和转换
    cat extracted_data.csv | \
    sed '1s/|/,/g' | \  # 转换标题行分隔符
    sed '1s/\([a-z]\)_\([a-z]\)/\1\U\2/g' | \  # 标题驼峰命名
    awk -F'|' '
    BEGIN {
        OFS = ","
        print "ID,Name,Age,Salary,Department,JoinDate,Experience,SalaryGrade,Bonus"
    }
    NR > 1 {
        # 数据清洗
        gsub(/^[[:space:]]+|[[:space:]]+$/, "", $2)  # 清理姓名空格
        gsub(/^[[:space:]]+|[[:space:]]+$/, "", $5)  # 清理部门空格
        
        # 计算工作经验(年)
        split($6, date_parts, "-")
        join_year = date_parts[1]
        current_year = strftime("%Y")
        experience = current_year - join_year
        
        # 薪资等级
        salary_grade = "C"
        if ($4 >= 70000) salary_grade = "A"
        else if ($4 >= 55000) salary_grade = "B"
        
        # 计算奖金(薪资的10-20%)
        bonus_rate = 0.1
        if (experience >= 3) bonus_rate = 0.15
        if (experience >= 5) bonus_rate = 0.2
        bonus = $4 * bonus_rate
        
        print $1, $2, $3, $4, $5, $6, experience, salary_grade, int(bonus)
    }' > transformed_data.csv
    
    echo "数据转换完成"
    
    # 数据质量检查
    echo -e "\n数据质量检查:"
    echo "总记录数: $(($(wc -l < transformed_data.csv) - 1))"
    echo "空值检查:"
    awk -F, '
    NR > 1 {
        for(i=1; i<=NF; i++) {
            if($i == "" || $i == "NULL") {
                print "第 " NR " 行第 " i " 列为空"
            }
        }
    }' transformed_data.csv
    
    # 统计信息
    echo -e "\n数据统计:"
    awk -F, '
    NR > 1 {
        total_salary += $4
        total_bonus += $9
        count++
        dept[$5]++
    } 
    END {
        printf "平均薪资: %.2f\n", total_salary/count
        printf "平均奖金: %.2f\n", total_bonus/count
        print "部门分布:"
        for(d in dept) {
            printf "  %-15s: %d人\n", d, dept[d]
        }
    }' transformed_data.csv
}

load_data() {
    echo -e "\n=== 数据加载阶段 ==="
    
    # 创建目标表结构(模拟)
    cat > database_schema.sql << 'EOF'
-- 员工表结构
CREATE TABLE employees (
    id INT PRIMARY KEY,
    name VARCHAR(100),
    age INT,
    salary DECIMAL(10,2),
    department VARCHAR(50),
    join_date DATE,
    experience INT,
    salary_grade CHAR(1),
    bonus DECIMAL(10,2)
);
EOF
    
    # 生成SQL插入语句
    awk -F, '
    NR > 1 {
        printf "INSERT INTO employees VALUES(%d, '\''%s'\'', %d, %.2f, '\''%s'\'', '\''%s'\'', %d, '\''%s'\'', %.2f);\n", 
            $1, $2, $3, $4, $5, $6, $7, $8, $9
    }' transformed_data.csv > load_data.sql
    
    echo "SQL加载脚本已生成:"
    head -3 load_data.sql
    echo "..."
    
    # 创建报表
    echo -e "\n生成数据报表..."
    awk -F, '
    BEGIN {
        print "# 员工数据分析报告"
        print "生成时间: '$(date)'"
        print ""
    }
    NR == 1 { next }  # 跳过标题行
    
    {
        total_salary += $4
        total_bonus += $9
        count++
        
        # 部门统计
        dept_salary[$5] += $4
        dept_count[$5]++
        dept_bonus[$5] += $9
        
        # 年龄组统计
        if($3 < 25) age_group["<25"]++
        else if($3 < 30) age_group["25-29"]++
        else if($3 < 35) age_group["30-34"]++
        else if($3 < 40) age_group["35-39"]++
        else age_group[">=40"]++
        
        # 薪资等级统计
        grade_count[$8]++
    }
    END {
        print "## 执行摘要"
        printf "员工总数: %d\n", count
        printf "总薪资: %.2f\n", total_salary
        printf "总奖金: %.2f\n", total_bonus
        printf "平均薪资: %.2f\n", total_salary/count
        printf "平均奖金: %.2f\n", total_bonus/count
        print ""
        
        print "## 部门分析"
        for(dept in dept_salary) {
            avg_salary = dept_salary[dept] / dept_count[dept]
            avg_bonus = dept_bonus[dept] / dept_count[dept]
            printf "### %s\n", dept
            printf "人数: %d\n", dept_count[dept]
            printf "平均薪资: %.2f\n", avg_salary
            printf "平均奖金: %.2f\n", avg_bonus
            print ""
        }
        
        print "## 年龄分布"
        for(group in age_group) {
            printf "%s岁: %d人 (%.1f%%)\n", group, age_group[group], (age_group[group]/count)*100
        }
        print ""
        
        print "## 薪资等级分布"
        for(grade in grade_count) {
            printf "等级 %s: %d人\n", grade, grade_count[grade]
        }
    }' transformed_data.csv > analysis_report.md
    
    echo "数据分析报告已生成: analysis_report.md"
}

# 执行ETL管道
extract_data
transform_data
load_data

# 清理临时文件
rm -f raw_data.csv extracted_data.csv transformed_data.csv database_schema.sql load_data.sql

echo -e "\n=== ETL管道演示完成 ==="

7. 总结与进阶学习

7.1 工具对比与选择指南

graph TB A[文本处理任务] --> B{任务类型} B -->|快速搜索| C[grep] B -->|简单替换| D[sed] B -->|复杂处理| E[awk] C --> F[模式匹配
文件过滤] D --> G[流编辑
批量替换] E --> H[数据提取
报表生成] F --> I[适用场景
日志搜索
文件过滤] G --> J[适用场景
配置修改
数据清洗] H --> K[适用场景
数据分析
报表生成] style A fill:#1e3a5f,color:#ffffff style B fill:#4a1e5f,color:#ffffff style C fill:#1e5f3a,color:#ffffff style D fill:#1e5f3a,color:#ffffff style E fill:#1e5f3a,color:#ffffff style F fill:#5f3a1e,color:#ffffff style G fill:#5f3a1e,color:#ffffff style H fill:#5f3a1e,color:#ffffff style I fill:#1e3a5f,color:#ffffff style J fill:#1e3a5f,color:#ffffff style K fill:#1e3a5f,color:#ffffff

7.2 创建学习检查清单

创建学习检查清单:learning_checklist.sh

bash 复制代码
#!/bin/bash

# 学习检查清单

echo "=== 文本处理三剑客学习检查清单 ==="

cat > learning_checklist.md << 'EOF'
# 文本处理三剑客学习检查清单

## grep 掌握程度检查
- [ ] 基础搜索和选项 (-i, -n, -v, -c)
- [ ] 正则表达式基础 (., *, +, ?, [])
- [ ] 字符类和预定义字符类
- [ ] 锚点和单词边界
- [ ] 分组和引用
- [ ] 扩展正则表达式
- [ ] 递归搜索和文件过滤
- [ ] 性能优化技巧

## sed 掌握程度检查
- [ ] 基本替换命令 (s///)
- [ ] 地址和范围指定
- [ ] 删除、插入、追加命令
- [ ] 模式空间和保持空间
- [ ] 流控制(分支、跳转)
- [ ] 多命令执行和脚本文件
- [ ] 高级替换技巧
- [ ] 原位编辑和备份

## awk 掌握程度检查
- [ ] 基本打印和字段处理
- [ ] 模式匹配和条件语句
- [ ] BEGIN 和 END 块
- [ ] 内置变量 (NR, NF, FS, OFS)
- [ ] 数组和关联数组
- [ ] 字符串和数学函数
- [ ] 控制结构 (if, for, while)
- [ ] 自定义函数
- [ ] 输入输出重定向

## 组合应用掌握程度检查
- [ ] 管道连接多个命令
- [ ] 复杂数据处理流程
- [ ] 性能优化和调试
- [ ] 错误处理和边界情况
- [ ] 实际项目应用

## 实战项目建议
1. 日志分析系统
2. 数据清洗管道
3. 配置文件管理工具
4. 报表生成系统
5. 监控告警脚本

## 进阶学习方向
- Perl 文本处理
- Python 数据处理 (pandas)
- jq (JSON 处理)
- xmlstarlet (XML 处理)
- 数据库查询优化
EOF

echo "学习检查清单已生成: learning_checklist.md"
echo ""
echo "下一步学习建议:"
echo "1. 完成检查清单中的所有项目"
echo "2. 在实际工作中应用所学知识"
echo "3. 阅读官方文档和man页面"
echo "4. 参与开源项目贡献"
echo "5. 学习相关工具如 jq, xmllint 等"

7.3 创建实用脚本库

创建实用脚本库:utility_scripts.sh

bash 复制代码
#!/bin/bash

# 实用脚本库

echo "=== 创建文本处理实用脚本库 ==="

# 1. 日志分析脚本
cat > analyze_logs.sh << 'EOF'
#!/bin/bash
# 日志分析脚本

LOG_FILE=${1:-access.log}

echo "分析日志文件: $LOG_FILE"

# 基本统计
echo "=== 基本统计 ==="
echo "总行数: $(wc -l < "$LOG_FILE")"
echo "独立IP数: $(awk '{print $1}' "$LOG_FILE" | sort -u | wc -l)"
echo "请求方法分布:"
awk -F'"' '{print $2}' "$LOG_FILE" | awk '{print $1}' | sort | uniq -c | sort -rn

# 状态码分析
echo -e "\n=== 状态码分析 ==="
awk '{print $9}' "$LOG_FILE" | sort | uniq -c | sort -rn

# 热门页面
echo -e "\n=== 热门页面 ==="
awk -F'"' '{print $2}' "$LOG_FILE" | awk '{print $2}' | sort | uniq -c | sort -rn | head -10

# 错误分析
echo -e "\n=== 错误分析 ==="
grep -E ' (4[0-9]{2}|5[0-9]{2}) ' "$LOG_FILE" | head -10
EOF

chmod +x analyze_logs.sh

# 2. 数据清洗脚本
cat > clean_data.sh << 'EOF'
#!/bin/bash
# 数据清洗脚本

INPUT_FILE=$1
OUTPUT_FILE=${2:-cleaned_data.csv}

if [[ -z "$INPUT_FILE" ]]; then
    echo "用法: $0 <输入文件> [输出文件]"
    exit 1
fi

echo "清洗数据文件: $INPUT_FILE -> $OUTPUT_FILE"

# 执行数据清洗
sed '
# 删除空行
/^$/d
# 删除行首行尾空格
s/^[[:space:]]*//
s/[[:space:]]*$//
# 标准化分隔符
s/[,;|][[:space:]]*/,/g
' "$INPUT_FILE" | \
awk -F, '
BEGIN {OFS=","}
NR == 1 {
    # 处理标题行
    print $0
    next
}
{
    # 数据验证和清理
    for(i=1; i<=NF; i++) {
        # 清理字段空格
        gsub(/^[[:space:]]+|[[:space:]]+$/, "", $i)
        # 处理空值
        if($i == "") $i = "NULL"
    }
    print $0
}' > "$OUTPUT_FILE"

echo "数据清洗完成: $OUTPUT_FILE"
echo "原始行数: $(wc -l < "$INPUT_FILE")"
echo "清洗后行数: $(wc -l < "$OUTPUT_FILE")"
EOF

chmod +x clean_data.sh

# 3. 配置文件管理脚本
cat > manage_config.sh << 'EOF'
#!/bin/bash
# 配置文件管理脚本

CONFIG_FILE=${1:-config.txt}
ACTION=$2
KEY=$3
VALUE=$4

usage() {
    echo "用法: $0 <配置文件> <动作> [键] [值]"
    echo "动作:"
    echo "  list       - 列出所有配置"
    echo "  get <key>  - 获取配置值"
    echo "  set <key> <value> - 设置配置值"
    echo "  delete <key> - 删除配置项"
}

list_config() {
    echo "=== 配置列表 ==="
    grep -v '^#' "$CONFIG_FILE" | grep '=' | while IFS= read -r line; do
        key=$(echo "$line" | cut -d'=' -f1)
        value=$(echo "$line" | cut -d'=' -f2-)
        printf "%-25s: %s\n" "$key" "$value"
    done
}

get_config() {
    local key=$1
    grep "^$key=" "$CONFIG_FILE" | cut -d'=' -f2-
}

set_config() {
    local key=$1
    local value=$2
    
    if grep -q "^$key=" "$CONFIG_FILE"; then
        # 更新现有配置
        sed -i "s/^$key=.*/$key=$value/" "$CONFIG_FILE"
        echo "更新配置: $key=$value"
    else
        # 添加新配置
        echo "$key=$value" >> "$CONFIG_FILE"
        echo "添加配置: $key=$value"
    fi
}

delete_config() {
    local key=$1
    sed -i "/^$key=/d" "$CONFIG_FILE"
    echo "删除配置: $key"
}

case "$ACTION" in
    "list")
        list_config
        ;;
    "get")
        if [[ -z "$KEY" ]]; then
            echo "错误: 需要指定键名"
            usage
            exit 1
        fi
        get_config "$KEY"
        ;;
    "set")
        if [[ -z "$KEY" || -z "$VALUE" ]]; then
            echo "错误: 需要指定键名和值"
            usage
            exit 1
        fi
        set_config "$KEY" "$VALUE"
        ;;
    "delete")
        if [[ -z "$KEY" ]]; then
            echo "错误: 需要指定键名"
            usage
            exit 1
        fi
        delete_config "$KEY"
        ;;
    *)
        usage
        exit 1
        ;;
esac
EOF

chmod +x manage_config.sh

# 4. 系统监控脚本
cat > system_monitor.sh << 'EOF'
#!/bin/bash
# 系统监控脚本

LOG_FILE="/var/log/system_monitor.log"
ALERT_THRESHOLD=80

# 日志函数
log_message() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >> "$LOG_FILE"
}

# 检查CPU使用率
check_cpu() {
    local cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
    echo "CPU使用率: ${cpu_usage}%"
    
    if (( $(echo "$cpu_usage > $ALERT_THRESHOLD" | bc -l) )); then
        log_message "警告: CPU使用率过高 - ${cpu_usage}%"
        return 1
    fi
    return 0
}

# 检查内存使用率
check_memory() {
    local mem_info=$(free | grep Mem)
    local total_mem=$(echo "$mem_info" | awk '{print $2}')
    local used_mem=$(echo "$mem_info" | awk '{print $3}')
    local mem_usage=$(echo "scale=2; $used_mem * 100 / $total_mem" | bc)
    
    echo "内存使用率: ${mem_usage}%"
    
    if (( $(echo "$mem_usage > $ALERT_THRESHOLD" | bc -l) )); then
        log_message "警告: 内存使用率过高 - ${mem_usage}%"
        return 1
    fi
    return 0
}

# 检查磁盘使用率
check_disk() {
    local disk_usage=$(df / | awk 'NR==2{print $5}' | cut -d'%' -f1)
    echo "磁盘使用率: ${disk_usage}%"
    
    if [ "$disk_usage" -gt "$ALERT_THRESHOLD" ]; then
        log_message "警告: 磁盘使用率过高 - ${disk_usage}%"
        return 1
    fi
    return 0
}

# 检查系统负载
check_load() {
    local load_avg=$(uptime | awk -F'load average:' '{print $2}' | awk -F, '{print $1}' | tr -d ' ')
    local cpu_cores=$(nproc)
    
    echo "系统负载: $load_avg (CPU核心: $cpu_cores)"
    
    if (( $(echo "$load_avg > $cpu_cores" | bc -l) )); then
        log_message "警告: 系统负载过高 - $load_avg"
        return 1
    fi
    return 0
}

# 生成报告
generate_report() {
    echo "=== 系统监控报告 ==="
    echo "生成时间: $(date)"
    echo
    
    check_cpu
    check_memory
    check_disk
    check_load
    
    echo
    echo "最近告警:"
    tail -5 "$LOG_FILE" 2>/dev/null || echo "无告警记录"
}

# 主函数
main() {
    case "${1:-report}" in
        "report")
            generate_report
            ;;
        "cpu")
            check_cpu
            ;;
        "memory")
            check_memory
            ;;
        "disk")
            check_disk
            ;;
        "load")
            check_load
            ;;
        "log")
            tail -20 "$LOG_FILE" 2>/dev/null || echo "日志文件不存在"
            ;;
        *)
            echo "用法: $0 {report|cpu|memory|disk|load|log}"
            exit 1
            ;;
    esac
}

main "$@"
EOF

chmod +x system_monitor.sh

echo "实用脚本库创建完成:"
echo "  analyze_logs.sh   - 日志分析脚本"
echo "  clean_data.sh     - 数据清洗脚本"
echo "  manage_config.sh  - 配置管理脚本"
echo "  system_monitor.sh - 系统监控脚本"

echo -e "\n=== 文本处理三剑客教程完成 ==="
echo "现在您已经掌握了 grep, sed, awk 的核心用法和实战技巧!"
echo "建议在实际工作中不断练习和应用这些工具。"

通过本教程,您已经系统学习了 Linux 文本处理三剑客的完整知识体系。从基础用法到高级技巧,从单一工具使用到组合应用,您现在应该能够:

  1. 使用 grep 进行高效的文本搜索和过滤
  2. 使用 sed 进行流编辑和批量处理
  3. 使用 awk 进行复杂的数据处理和报表生成
  4. 将三个工具组合使用解决复杂问题
  5. 在实际项目中应用这些技能

继续练习和探索,这些工具将成为您日常工作中不可或缺的利器!

相关推荐
RisunJan1 小时前
Linux命令-exportfs命令(管理NFS服务器上共享文件系统)
linux·运维·服务器
动感小麦兜2 小时前
服务器搭建
linux·服务器·python
LCG元2 小时前
效率翻倍!10个让你爱不释手的 Linux 命令行"神器"
linux
BS_Li2 小时前
【Linux系统编程】Ext系列文件系统
android·linux·ext系列文件系统
q***01772 小时前
Linux 下安装 Golang环境
linux·运维·golang
企鹅侠客3 小时前
Linux性能调优使用strace来分析文件系统的性能问题
linux·运维·服务器
奔跑吧邓邓子4 小时前
CentOS 7性能飞升秘籍:实战系统优化与调优
linux·运维·centos·实战·系统优化·性能调优
qinyia4 小时前
WisdomSSH如何高效检查服务器状态并生成运维报告
linux·运维·服务器·数据库·人工智能·后端·ssh
laocooon5238578866 小时前
实现了一个新闻数据采集与分析系统python
linux·服务器·windows