Linux 文本处理三剑客(grep, sed, awk)核心用法与实战

Linux 文本处理三剑客(grep, sed, awk)核心用法与实战

1. 文本处理三剑客概述

1.1 工具定位与适用场景

graph TB A[文本处理需求] --> B{处理类型} B -->|模式搜索| C[grep] B -->|流编辑| D[sed] B -->|报表生成| E[awk] C --> F[快速过滤] D --> G[批量替换] E --> H[数据提取] F --> I[结果输出] G --> I H --> I style A fill:#1e3a5f,color:#ffffff style B fill:#4a1e5f,color:#ffffff style C fill:#1e5f3a,color:#ffffff style D fill:#1e5f3a,color:#ffffff style E fill:#1e5f3a,color:#ffffff style F fill:#5f3a1e,color:#ffffff style G fill:#5f3a1e,color:#ffffff style H fill:#5f3a1e,color:#ffffff style I fill:#1e3a5f,color:#ffffff

1.2 创建测试数据文件

创建基础测试数据文件:create_test_data.sh

bash 复制代码
#!/bin/bash

# 创建测试数据脚本
set -e

echo "=== 创建文本处理测试数据 ==="

# 创建用户数据文件
cat > users.txt << 'EOF'
1,张三,25,工程师,北京,50000
2,李四,30,经理,上海,80000
3,王五,28,设计师,广州,60000
4,赵六,35,总监,深圳,120000
5,钱七,22,实习生,杭州,30000
6,孙八,40,顾问,成都,90000
7,周九,26,开发,西安,55000
8,吴十,33,产品,武汉,70000
9,郑十一,29,测试,南京,52000
10,王十二,31,运维,长沙,58000
EOF

# 创建日志文件
cat > access.log << 'EOF'
192.168.1.100 - - [10/Oct/2023:10:30:01 +0800] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0"
192.168.1.101 - - [10/Oct/2023:10:30:02 +0800] "POST /api/login HTTP/1.1" 401 567 "https://example.com" "Mozilla/5.0"
192.168.1.102 - - [10/Oct/2023:10:30:03 +0800] "GET /products.html HTTP/1.1" 200 7890 "https://example.com" "Chrome/91.0"
192.168.1.100 - - [10/Oct/2023:10:30:04 +0800] "GET /images/logo.png HTTP/1.1" 304 0 "https://example.com" "Mozilla/5.0"
192.168.1.103 - - [10/Oct/2023:10:30:05 +0800] "PUT /api/users/1 HTTP/1.1" 403 234 "https://example.com" "Firefox/89.0"
192.168.1.104 - - [10/Oct/2023:10:30:06 +0800] "GET /contact.html HTTP/1.1" 200 3456 "https://example.com" "Safari/14.0"
192.168.1.101 - - [10/Oct/2023:10:30:07 +0800] "DELETE /api/products/5 HTTP/1.1" 204 0 "https://example.com" "Mozilla/5.0"
192.168.1.105 - - [10/Oct/2023:10:30:08 +0800] "GET /about.html HTTP/1.1" 500 123 "https://example.com" "Chrome/92.0"
EOF

# 创建配置文件
cat > config.txt << 'EOF'
# 数据库配置
database.host=localhost
database.port=3306
database.name=myapp
database.user=admin
database.password=secret123

# 应用配置
app.name=MyApplication
app.version=1.0.0
app.port=8080
app.debug=true

# 日志配置
log.level=INFO
log.file=/var/log/app.log
log.max_size=100MB

# 功能开关
feature.auth=true
feature.cache=false
feature.export=true
EOF

# 创建多语言文本文件
cat > multilang.txt << 'EOF'
Hello world! 你好世界! Bonjour le monde!
This is a test. 这是一个测试。 C'est un test.
Programming is fun. 编程很有趣。 La programmation est amusante.
Linux is powerful. Linux很强大。 Linux est puissant.
Open source software. 开源软件。 Logiciel open source.
EOF

# 创建 CSV 数据文件
cat > sales.csv << 'EOF'
Date,Product,Category,Region,Sales,Quantity
2023-10-01,Laptop,Electronics,North,50000,10
2023-10-01,Phone,Electronics,South,30000,15
2023-10-02,Desk,Furniture,East,15000,5
2023-10-02,Chair,Furniture,West,8000,8
2023-10-03,Monitor,Electronics,North,20000,4
2023-10-03,Keyboard,Electronics,South,5000,10
2023-10-04,Table,Furniture,East,12000,3
2023-10-04,Books,Education,West,3000,30
EOF

echo "测试数据文件创建完成:"
echo "  users.txt      - 用户数据"
echo "  access.log     - 访问日志"
echo "  config.txt     - 配置文件"
echo "  multilang.txt  - 多语言文本"
echo "  sales.csv      - 销售数据"

2. grep - 文本搜索专家

2.1 grep 基础用法

创建 grep 基础教程:grep_basics.sh

bash 复制代码
#!/bin/bash

# grep 基础用法教程

echo "=== grep 基础用法 ==="

# 1. 基础搜索
echo -e "\n1. 基础搜索:"
echo "搜索包含'工程师'的行:"
grep '工程师' users.txt

# 2. 忽略大小写
echo -e "\n2. 忽略大小写搜索:"
echo "搜索包含'get'的行(忽略大小写):"
grep -i 'get' access.log

# 3. 显示行号
echo -e "\n3. 显示行号:"
echo "搜索'北京'并显示行号:"
grep -n '北京' users.txt

# 4. 反向搜索
echo -e "\n4. 反向搜索(不包含指定内容):"
echo "搜索不包含'GET'的行:"
grep -v 'GET' access.log

# 5. 统计匹配行数
echo -e "\n5. 统计匹配数量:"
echo "统计状态码为200的行数:"
grep -c '200' access.log

# 6. 显示匹配文件名
echo -e "\n6. 显示匹配文件名:"
echo "在多个文件中搜索:"
grep -l 'admin' *.txt

# 7. 递归搜索
echo -e "\n7. 递归目录搜索:"
echo "在当前目录递归搜索'localhost':"
grep -r 'localhost' . 2>/dev/null || echo "搜索完成"

# 8. 完整单词匹配
echo -e "\n8. 完整单词匹配:"
echo "搜索完整单词'test':"
grep -w 'test' multilang.txt

# 9. 显示匹配前后内容
echo -e "\n9. 显示上下文:"
echo "搜索'500'并显示前后2行:"
grep -C 2 '500' access.log

# 10. 只显示匹配部分
echo -e "\n10. 只显示匹配部分:"
echo "只显示匹配的IP地址:"
grep -o '[0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+' access.log

# 11. 扩展正则表达式
echo -e "\n11. 扩展正则表达式:"
echo "使用扩展正则搜索状态码:"
grep -E '(200|404|500)' access.log

# 12. 固定字符串搜索
echo -e "\n12. 固定字符串搜索:"
echo "搜索固定字符串'1.0.0':"
grep -F '1.0.0' config.txt

2.2 grep 高级用法与正则表达式

创建 grep 高级教程:grep_advanced.sh

bash 复制代码
#!/bin/bash

# grep 高级用法与正则表达式

echo "=== grep 高级用法 ==="

# 创建复杂测试文件
cat > regex_test.txt << 'EOF'
email1: john.doe@example.com
email2: jane_smith123@company.co.uk
phone1: +1-555-123-4567
phone2: (555) 987-6543
date1: 2023-10-15
date2: 10/15/2023
ip1: 192.168.1.1
ip2: 10.0.0.255
url1: https://www.example.com/path
url2: http://localhost:8080/api/v1/users
html: <div class="container">Content</div>
json: {"name": "John", "age": 30, "active": true}
credit_card: 4111-1111-1111-1111
ssn: 123-45-6789
EOF

# 1. 基础正则表达式
echo -e "\n1. 基础正则表达式:"

echo "匹配邮箱地址:"
grep -E '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' regex_test.txt

echo -e "\n匹配IP地址:"
grep -E '([0-9]{1,3}\.){3}[0-9]{1,3}' regex_test.txt

echo -e "\n匹配日期(YYYY-MM-DD):"
grep -E '[0-9]{4}-[0-9]{2}-[0-9]{2}' regex_test.txt

# 2. 字符类
echo -e "\n2. 字符类:"

echo "匹配数字:"
grep '[0-9]' regex_test.txt | head -3

echo -e "\n匹配小写字母:"
grep '[a-z]' regex_test.txt | head -3

echo -e "\n匹配单词字符:"
grep '[[:alnum:]]' regex_test.txt | head -3

# 3. 量词
echo -e "\n3. 量词:"

echo "匹配3位数字:"
grep -E '[0-9]{3}' regex_test.txt

echo -e "\n匹配1个或多个数字:"
grep -E '[0-9]+' regex_test.txt | head -3

echo -e "\n匹配0个或多个字母:"
grep -E '[a-z]*' regex_test.txt | head -3

# 4. 分组和引用
echo -e "\n4. 分组和引用:"

echo "匹配重复单词:"
echo "hello hello world test test example" | grep -E '(\b\w+\b) \1'

# 5. 锚点
echo -e "\n5. 锚点:"

echo "匹配以'email'开头的行:"
grep '^email' regex_test.txt

echo -e "\n匹配以'.com'结尾的行:"
grep '\.com$' regex_test.txt

echo -e "\n匹配完整单词'json':"
grep '\bjson\b' regex_test.txt

# 6. 交替匹配
echo -e "\n6. 交替匹配:"

echo "匹配'http'或'https':"
grep -E 'https?' regex_test.txt

echo -e "\n匹配多种日期格式:"
grep -E '([0-9]{4}-[0-9]{2}-[0-9]{2}|[0-9]{2}/[0-9]{2}/[0-9]{4})' regex_test.txt

# 7. 环视断言
echo -e "\n7. 环视断言(需要PCRE):"

echo "匹配后面跟着'@'的数字:"
grep -P '[0-9]+(?=@)' regex_test.txt 2>/dev/null || echo "PCRE不支持,使用其他方法"

# 8. 复杂模式匹配
echo -e "\n8. 复杂模式匹配:"

echo "提取URL中的域名:"
grep -oE 'https?://[^/]+' regex_test.txt

echo -e "\n提取JSON字段值:"
grep -oE '"name": "[^"]+"' regex_test.txt

# 9. grep 组合技巧
echo -e "\n9. grep 组合技巧:"

echo "搜索多个模式:"
grep -e 'email' -e 'phone' regex_test.txt

echo -e "\n排除多个模式:"
grep -v -e 'email' -e 'phone' regex_test.txt

echo -e "\n管道组合使用:"
grep 'example' regex_test.txt | grep -v 'email'

# 10. 性能优化技巧
echo -e "\n10. 性能优化技巧:"

echo "使用固定字符串加速:"
time grep -F 'example.com' regex_test.txt

echo -e "\n使用简单字符类:"
time grep '[0-9]' regex_test.txt

# 清理
rm -f regex_test.txt

echo -e "\n=== grep 高级用法演示完成 ==="

2.3 grep 实战案例

创建 grep 实战脚本:grep_practical.sh

bash 复制代码
#!/bin/bash

# grep 实战案例

echo "=== grep 实战案例 ==="

# 案例1: 日志分析
echo -e "\n案例1: 日志分析"

echo "1. 查找错误请求:"
grep -E '(404|500|403)' access.log

echo -e "\n2. 统计各状态码出现次数:"
grep -oE 'HTTP/1.1" [0-9]{3}' access.log | awk '{print $2}' | sort | uniq -c | sort -rn

echo -e "\n3. 查找特定IP的访问记录:"
grep '192.168.1.100' access.log

echo -e "\n4. 搜索特定时间段的日志:"
grep '10/Oct/2023:10:30:0[2-5]' access.log

# 案例2: 配置文件处理
echo -e "\n案例2: 配置文件处理"

echo "1. 提取所有配置项(排除注释):"
grep -v '^#' config.txt | grep '='

echo -e "\n2. 查找数据库相关配置:"
grep -i 'database' config.txt

echo -e "\n3. 提取配置值:"
grep 'app.port' config.txt | grep -oE '[0-9]+'

echo -e "\n4. 查找启用的功能:"
grep 'feature.' config.txt | grep 'true'

# 案例3: 数据提取
echo -e "\n案例3: 数据提取"

echo "1. 提取所有用户名:"
grep -oE ',[^,]+,' users.txt | grep -oE '[^,]+' | grep -vE '^[0-9]+$' | head -5

echo -e "\n2. 查找高薪员工(薪资>70000):"
grep -E ',[0-9]{5,6}$' users.txt | awk -F, '$6 > 70000'

echo -e "\n3. 统计各城市员工数量:"
grep -oE '[^,]+,' users.txt | grep -oE '[^,]+' | grep -vE '^[0-9]+$' | sort | uniq -c

# 案例4: 代码分析
echo -e "\n案例4: 代码分析(模拟)"

# 创建模拟代码文件
cat > sample_code.py << 'EOF'
#!/usr/bin/env python3
"""
示例代码文件
"""

import sys
import os
from typing import List

def calculate_sum(numbers: List[int]) -> int:
    """计算数字列表的总和"""
    total = 0
    for num in numbers:
        total += num
    return total

def read_file(filename: str) -> str:
    """读取文件内容"""
    try:
        with open(filename, 'r') as f:
            return f.read()
    except FileNotFoundError:
        print(f"错误: 文件 {filename} 不存在")
        return ""

class DataProcessor:
    """数据处理类"""
    
    def __init__(self, data: List[str]):
        self.data = data
    
    def process(self) -> List[str]:
        """处理数据"""
        result = []
        for item in self.data:
            # TODO: 实现处理逻辑
            processed = item.strip().upper()
            result.append(processed)
        return result

if __name__ == "__main__":
    # FIXME: 需要添加命令行参数解析
    numbers = [1, 2, 3, 4, 5]
    print(f"总和: {calculate_sum(numbers)}")
EOF

echo "1. 查找函数定义:"
grep -E '^def ' sample_code.py

echo -e "\n2. 查找类定义:"
grep -E '^class ' sample_code.py

echo -e "\n3. 查找TODO和FIXME注释:"
grep -E '(TODO|FIXME)' sample_code.py

echo -e "\n4. 查找导入语句:"
grep -E '^import|^from' sample_code.py

# 案例5: 系统管理
echo -e "\n案例5: 系统管理"

echo "1. 查找进程:"
ps aux | grep 'bash' | head -5

echo -e "\n2. 检查服务状态:"
systemctl list-units | grep 'running' | head -5

echo -e "\n3. 查找大文件:"
find /tmp -type f -size +1M 2>/dev/null | head -5

# 案例6: 网络分析
echo -e "\n案例6: 网络分析"

echo "1. 分析网络连接:"
netstat -tulpn 2>/dev/null | grep 'LISTEN' | head -5

# 清理
rm -f sample_code.py

echo -e "\n=== grep 实战案例演示完成 ==="

3. sed - 流编辑器大师

3.1 sed 基础用法

创建 sed 基础教程:sed_basics.sh

bash 复制代码
#!/bin/bash

# sed 基础用法教程

echo "=== sed 基础用法 ==="

# 创建测试文件
cat > sed_test.txt << 'EOF'
Hello World
This is a test file.
Welcome to Linux sed tutorial.
Python programming is fun.
Java is also popular.
We are learning text processing.
EOF

# 1. 基本替换
echo -e "\n1. 基本替换:"
echo "将'is'替换为'IS':"
sed 's/is/IS/' sed_test.txt

# 2. 全局替换
echo -e "\n2. 全局替换:"
echo "全局将'is'替换为'IS':"
sed 's/is/IS/g' sed_test.txt

# 3. 指定行替换
echo -e "\n3. 指定行替换:"
echo "只在第2行替换:'is' -> 'IS':"
sed '2s/is/IS/g' sed_test.txt

# 4. 行范围替换
echo -e "\n4. 行范围替换:"
echo "在第2-4行替换:'is' -> 'IS':"
sed '2,4s/is/IS/g' sed_test.txt

# 5. 删除行
echo -e "\n5. 删除行:"
echo "删除包含'test'的行:"
sed '/test/d' sed_test.txt

# 6. 打印特定行
echo -e "\n6. 打印特定行:"
echo "打印第3行:"
sed -n '3p' sed_test.txt

# 7. 多命令执行
echo -e "\n7. 多命令执行:"
echo "替换并删除:"
sed -e 's/is/IS/g' -e '/Java/d' sed_test.txt

# 8. 原位编辑
echo -e "\n8. 原位编辑(创建备份):"
cp sed_test.txt sed_test_backup.txt
sed -i.bak 's/Linux/UNIX/g' sed_test_backup.txt
echo "原文件:"
cat sed_test.txt | head -1
echo "修改后:"
cat sed_test_backup.txt | head -1

# 9. 插入行
echo -e "\n9. 插入行:"
echo "在第2行前插入新行:"
sed '2i\---插入的行---' sed_test.txt

# 10. 追加行
echo -e "\n10. 追加行:"
echo "在第2行后追加新行:"
sed '2a\---追加的行---' sed_test.txt

# 11. 修改行
echo -e "\n11. 修改行:"
echo "修改第3行:"
sed '3c\---修改的行内容---' sed_test.txt

# 12. 写入文件
echo -e "\n12. 写入文件:"
echo "将包含'Python'的行写入新文件:"
sed -n '/Python/w python_lines.txt' sed_test.txt
cat python_lines.txt

# 13. 读取文件
echo -e "\n13. 读取文件:"
echo "在第3行后读取其他文件内容:"
sed '3r sed_test_backup.txt' sed_test.txt | head -10

# 14. 转换字符
echo -e "\n14. 转换字符:"
echo "转换大小写:"
echo "hello world" | sed 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'

# 清理
rm -f sed_test.txt sed_test_backup.txt sed_test_backup.txt.bak python_lines.txt

echo -e "\n=== sed 基础用法演示完成 ==="

3.2 sed 高级用法

创建 sed 高级教程:sed_advanced.sh

bash 复制代码
#!/bin/bash

# sed 高级用法教程

echo "=== sed 高级用法 ==="

# 创建复杂测试文件
cat > advanced_sed_test.txt << 'EOF'
Name: John Doe
Age: 30
Email: john@example.com
Phone: 123-456-7890
Address: 123 Main St, City, State 12345

Name: Jane Smith
Age: 25
Email: jane.smith@company.com
Phone: (555) 987-6543
Address: 456 Oak Ave, Another City, State 67890

Name: Bob Johnson
Age: 35
Email: bob_j@test.org
Phone: 111.222.3333
Address: 789 Pine Rd, Different City, State 54321
EOF

# 1. 模式空间和保持空间
echo -e "\n1. 模式空间和保持空间:"

echo "交换模式空间和保持空间:"
sed -n '1h; 2x; p' advanced_sed_test.txt

# 2. 分支和跳转
echo -e "\n2. 分支和跳转:"

echo "跳过包含'Email'的行:"
sed -n '/Email/!p' advanced_sed_test.txt

# 3. 多行处理
echo -e "\n3. 多行处理:"

echo "将多行合并为一行:"
sed ':a; N; $!ba; s/\n/ /g' advanced_sed_test.txt

# 4. 高级替换
echo -e "\n4. 高级替换:"

echo "使用分组和反向引用:"
echo "123-456-7890" | sed -E 's/([0-9]{3})-([0-9]{3})-([0-9]{4})/(\1) \2-\3/'

echo -e "\n条件替换:"
sed '/Phone/ s/[0-9]/X/g' advanced_sed_test.txt

# 5. 循环和条件
echo -e "\n5. 循环和条件:"

echo "编号非空行:"
sed '/^$/!{=;d}' advanced_sed_test.txt | sed 'N;s/\n/ /'

# 6. 文件处理
echo -e "\n6. 文件处理技巧:"

echo "删除文件中的空白行:"
sed '/^$/d' advanced_sed_test.txt

echo -e "\n删除行尾空格:"
sed 's/[[:space:]]*$//' advanced_sed_test.txt

# 7. 复杂模式匹配
echo -e "\n7. 复杂模式匹配:"

echo "提取邮箱地址:"
sed -nE 's/.*Email: ([^ ]+).*/\1/p' advanced_sed_test.txt

echo -e "\n提取电话号码:"
sed -nE 's/.*Phone: ([0-9().-]+).*/\1/p' advanced_sed_test.txt

# 8. 范围操作
echo -e "\n8. 范围操作:"

echo "处理特定记录(第1-6行):"
sed -n '1,6p' advanced_sed_test.txt

echo -e "\n从模式开始到文件结束:"
sed -n '/Jane Smith/,$p' advanced_sed_test.txt

# 9. 标签和跳转
echo -e "\n9. 标签和跳转:"

echo "使用标签实现循环:"
echo "aaa bbb ccc" | sed ':loop s/a/X/; t loop'

# 10. 保持空间操作
echo -e "\n10. 保持空间操作:"

echo "复制模式空间到保持空间:"
sed -n '1h; 1p; 2g; 2p' advanced_sed_test.txt

# 11. 流控制脚本
echo -e "\n11. 流控制脚本:"

# 创建复杂的sed脚本
cat > complex_script.sed << 'EOF'
# 复杂sed脚本示例
/Name:/ {
    h  # 复制到保持空间
    d  # 删除模式空间
}
/Email:/ {
    G  # 追加保持空间到模式空间
    s/\n/ - /  # 替换换行符
    p  # 打印
}
EOF

echo "执行复杂sed脚本:"
sed -n -f complex_script.sed advanced_sed_test.txt

# 12. 实际应用案例
echo -e "\n12. 实际应用案例:"

echo "格式化电话号码:"
sed -E 's/Phone: ([0-9]{3})[-.]([0-9]{3})[-.]([0-9]{4})/Phone: (\1) \2-\3/g' advanced_sed_test.txt

echo -e "\n提取姓名和邮箱:"
sed -nE '/Name:/{s/Name: //;h}; /Email:/{s/Email: //;G;s/\n/ : /p}' advanced_sed_test.txt

# 清理
rm -f advanced_sed_test.txt complex_script.sed

echo -e "\n=== sed 高级用法演示完成 ==="

3.3 sed 实战案例

创建 sed 实战脚本:sed_practical.sh

bash 复制代码
#!/bin/bash

# sed 实战案例

echo "=== sed 实战案例 ==="

# 案例1: 日志处理
echo -e "\n案例1: 日志处理"

echo "1. 提取特定时间段的日志:"
sed -n '/10\/Oct\/2023:10:30:0[2-5]/p' access.log

echo -e "\n2. 删除调试信息:"
sed '/DEBUG/d' access.log

echo -e "\n3. 替换敏感信息:"
sed 's/192.168.1.[0-9]*/XXX.XXX.XXX.XXX/g' access.log

# 案例2: 配置文件修改
echo -e "\n案例2: 配置文件修改"

echo "1. 修改配置项:"
sed 's/app.port=8080/app.port=9090/' config.txt

echo -e "\n2. 注释掉特定配置:"
sed '/feature.cache/s/^/# /' config.txt

echo -e "\n3. 取消注释:"
sed '/# database.host/s/^# //' config.txt

# 案例3: 数据清洗
echo -e "\n案例3: 数据清洗"

echo "1. 标准化CSV格式:"
sed 's/, /,/g' sales.csv | sed 's/^[[:space:]]*//' | sed 's/[[:space:]]*$//'

echo -e "\n2. 删除空行:"
sed '/^$/d' sales.csv

echo -e "\n3. 转换日期格式:"
sed 's|\([0-9]\{4\}\)-\([0-9]\{2\}\)-\([0-9]\{2\}\)|\2/\3/\1|g' sales.csv

# 案例4: 代码重构
echo -e "\n案例4: 代码重构"

# 创建模拟代码文件
cat > refactor_code.py << 'EOF'
def old_function_name():
    print("This is old function")
    
def another_old_function():
    print("Another old function")
    
# 调用旧函数
old_function_name()
another_old_function()
EOF

echo "1. 重命名函数:"
sed 's/old_function_name/new_function_name/g' refactor_code.py

echo -e "\n2. 添加日志:"
sed '/def /a\    print("Function called")' refactor_code.py

# 案例5: 文本格式化
echo -e "\n案例5: 文本格式化"

echo "1. 添加行号:"
sed = users.txt | sed 'N;s/\n/ /'

echo -e "\n2. 每N行添加分隔符:"
sed '3~3a\---' users.txt

echo -e "\n3. 文本对齐:"
sed 's/^/    /' users.txt | head -3

# 案例6: 批量文件处理
echo -e "\n案例6: 批量文件处理"

# 创建多个测试文件
for i in {1..3}; do
    echo "File $i content" > "test_file_$i.txt"
    echo "version=1.0" >> "test_file_$i.txt"
done

echo "批量修改文件内容:"
for file in test_file_*.txt; do
    echo "处理文件: $file"
    sed -i 's/version=1.0/version=2.0/' "$file"
    cat "$file"
done

# 案例7: 数据提取和转换
echo -e "\n案例7: 数据提取和转换"

echo "1. 提取薪资大于60000的员工:"
sed -n '/,[0-9]\{5,\}$/p' users.txt | awk -F, '$6 > 60000'

echo -e "\n2. 生成SQL插入语句:"
sed '1d; s/\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\)/INSERT INTO users VALUES(\1, \"\2\", \3, \"\4\", \"\5\", \6);/' users.txt

# 案例8: 复杂文本转换
echo -e "\n案例8: 复杂文本转换"

echo "转换多语言文本格式:"
sed -E 's/([^!])!([^!])/\1\n\2/g' multilang.txt

# 清理
rm -f refactor_code.py test_file_*.txt

echo -e "\n=== sed 实战案例演示完成 ==="

4. awk - 文本处理编程语言

4.1 awk 基础用法

创建 awk 基础教程:awk_basics.sh

bash 复制代码
#!/bin/bash

# awk 基础用法教程

echo "=== awk 基础用法 ==="

# 1. 基本打印
echo -e "\n1. 基本打印:"
echo "打印整个文件:"
awk '{print}' users.txt

echo -e "\n打印第一列:"
awk '{print $1}' users.txt

echo -e "\n打印多列:"
awk '{print $1, $3}' users.txt

# 2. 字段分隔符
echo -e "\n2. 字段分隔符:"

echo "使用逗号分隔符:"
awk -F, '{print $2, $6}' users.txt

echo -e "\n使用多个分隔符:"
echo "apple,banana;cherry" | awk -F'[,;]' '{print $1, $2, $3}'

# 3. 模式匹配
echo -e "\n3. 模式匹配:"

echo "匹配包含'北京'的行:"
awk '/北京/' users.txt

echo -e "\n匹配特定字段:"
awk -F, '$3 > 28' users.txt

# 4. 内置变量
echo -e "\n4. 内置变量:"

echo "行号:"
awk '{print NR, $0}' users.txt | head -3

echo -e "\n字段数量:"
awk -F, '{print NF, $0}' users.txt | head -3

echo -e "\n文件名:"
awk 'END{print FILENAME}' users.txt

# 5. BEGIN 和 END 块
echo -e "\n5. BEGIN 和 END 块:"

echo "添加表头:"
awk -F, 'BEGIN {print "ID\t姓名\t年龄\t职位"} {print $1"\t"$2"\t"$3"\t"$4} END {print "=== 结束 ==="}' users.txt

# 6. 变量和计算
echo -e "\n6. 变量和计算:"

echo "计算平均年龄:"
awk -F, '{sum += $3; count++} END {print "平均年龄:", sum/count}' users.txt

echo -e "\n薪资统计:"
awk -F, '{sum += $6; if($6 > max) max = $6} END {print "总薪资:", sum, "最高薪资:", max}' users.txt

# 7. 条件语句
echo -e "\n7. 条件语句:"

echo "薪资分类:"
awk -F, '{
    if ($6 > 80000) 
        print $2, "高薪"
    else if ($6 > 50000) 
        print $2, "中薪"
    else 
        print $2, "低薪"
}' users.txt

# 8. 循环
echo -e "\n8. 循环:"

echo "遍历字段:"
awk -F, '{
    printf "行 %d: ", NR
    for(i=1; i<=NF; i++) 
        printf "[%s] ", $i
    print ""
}' users.txt | head -3

# 9. 数组
echo -e "\n9. 数组:"

echo "按城市统计人数:"
awk -F, '{
    city[$5]++
} END {
    for(c in city) 
        print c, city[c]
}' users.txt

# 10. 字符串函数
echo -e "\n10. 字符串函数:"

echo "字符串操作:"
awk -F, '{
    print "原始:", $2, "大写:", toupper($2), "长度:", length($2)
}' users.txt | head -3

# 11. 数学函数
echo -e "\n11. 数学函数:"

echo "数学运算:"
awk -F, '{
    print $2, "薪资:", $6, "平方根:", sqrt($6)
}' users.txt | head -3

# 12. 输出格式控制
echo -e "\n12. 输出格式控制:"

echo "格式化输出:"
awk -F, 'BEGIN {printf "%-10s %-8s %-10s\n", "姓名", "年龄", "薪资"} 
{printf "%-10s %-8d %-10.2f\n", $2, $3, $6}' users.txt

echo -e "\n=== awk 基础用法演示完成 ==="

4.2 awk 高级用法

创建 awk 高级教程:awk_advanced.sh

bash 复制代码
#!/bin/bash

# awk 高级用法教程

echo "=== awk 高级用法 ==="

# 创建复杂测试数据
cat > advanced_awk_test.txt << 'EOF'
2023-10-01 08:30:25 INFO [UserService] User login successful: user_id=123
2023-10-01 08:45:12 ERROR [PaymentService] Payment failed: amount=500.00, reason=insufficient_funds
2023-10-01 09:15:33 WARN [AuthService] Multiple failed login attempts: ip=192.168.1.100
2023-10-01 10:20:45 INFO [OrderService] New order created: order_id=456, amount=299.99
2023-10-01 11:05:17 ERROR [DatabaseService] Connection timeout: retry_count=3
2023-10-01 14:30:22 INFO [InventoryService] Stock updated: product_id=789, quantity=50
EOF

# 1. 复杂字段分割
echo -e "\n1. 复杂字段分割:"

echo "多字符分隔符:"
awk -F'[][]' '{print "服务:", $2, "消息:", $3}' advanced_awk_test.txt

echo -e "\n正则表达式分隔符:"
awk -F'[=,]' '{for(i=1;i<=NF;i++) if($i~/[a-z]_id/) print $i}' advanced_awk_test.txt

# 2. 关联数组
echo -e "\n2. 关联数组:"

echo "统计日志级别:"
awk '{
    split($3, level, " ")
    levels[level[1]]++
} END {
    for(l in levels) 
        printf "%-6s: %d\n", l, levels[l]
}' advanced_awk_test.txt

# 3. 多维数组
echo -e "\n3. 多维数组:"

echo "按服务和级别统计:"
awk '{
    split($0, parts, "[][]")
    service = parts[2]
    level = $3
    stats[service][level]++
} END {
    for(service in stats) {
        print "服务:", service
        for(level in stats[service]) {
            print "  ", level, ":", stats[service][level]
        }
    }
}' advanced_awk_test.txt

# 4. 自定义函数
echo -e "\n4. 自定义函数:"

echo "使用自定义函数:"
awk '
function extract_number(str) {
    match(str, /[0-9]+(\.[0-9]+)?/)
    return substr(str, RSTART, RLENGTH)
}
{
    for(i=1; i<=NF; i++) {
        if($i ~ /amount=/) {
            amount = extract_number($i)
            print "金额:", amount
        }
    }
}' advanced_awk_test.txt

# 5. 模式范围
echo -e "\n5. 模式范围:"

echo "处理特定时间范围:"
awk '$2 >= "09:00:00" && $2 <= "11:00:00"' advanced_awk_test.txt

# 6. 输出重定向
echo -e "\n6. 输出重定向:"

echo "按级别输出到不同文件:"
awk '
$3 ~ /INFO/ {print > "info.log"}
$3 ~ /ERROR/ {print > "error.log"} 
$3 ~ /WARN/ {print > "warn.log"}
' advanced_awk_test.txt

echo "INFO日志:"
cat info.log
echo "ERROR日志:"
cat error.log

# 7. 管道输出
echo -e "\n7. 管道输出:"

echo "排序输出:"
awk '{print $3, $5}' advanced_awk_test.txt | sort

# 8. 系统命令集成
echo -e "\n8. 系统命令集成:"

echo "在awk中执行系统命令:"
awk '{
    "date +%Y-%m-%d" | getline current_date
    close("date +%Y-%m-%d")
    if($1 == current_date) 
        print "今天日志:", $0
}' advanced_awk_test.txt

# 9. 复杂数据处理
echo -e "\n9. 复杂数据处理:"

echo "提取和计算数值:"
awk '{
    total = 0
    count = 0
    for(i=1; i<=NF; i++) {
        if($i ~ /amount=([0-9.]+)/) {
            match($i, /amount=([0-9.]+)/, arr)
            total += arr[1]
            count++
        }
    }
    if(count > 0) {
        print "总金额:", total, "平均金额:", total/count
    }
}' advanced_awk_test.txt

# 10. 报表生成
echo -e "\n10. 报表生成:"

echo "生成统计报表:"
awk '
BEGIN {
    printf "%-20s %-10s %-10s\n", "服务", "INFO", "ERROR"
    printf "%-20s %-10s %-10s\n", "---", "---", "---"
}
{
    split($0, parts, "[][]")
    service = parts[2]
    if($3 ~ /INFO/) info[service]++
    if($3 ~ /ERROR/) error[service]++
} 
END {
    for(service in info) {
        printf "%-20s %-10d %-10d\n", service, info[service], error[service]
    }
}' advanced_awk_test.txt

# 11. 数据验证
echo -e "\n11. 数据验证:"

echo "验证数据格式:"
awk '
function is_valid_ip(ip) {
    return ip ~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/
}
{
    for(i=1; i<=NF; i++) {
        if($i ~ /ip=/) {
            split($i, ip_parts, "=")
            ip = ip_parts[2]
            if(is_valid_ip(ip)) {
                print "有效IP:", ip
            } else {
                print "无效IP:", ip
            }
        }
    }
}' advanced_awk_test.txt

# 清理
rm -f info.log error.log warn.log advanced_awk_test.txt

echo -e "\n=== awk 高级用法演示完成 ==="

4.3 awk 实战案例

创建 awk 实战脚本:awk_practical.sh

bash 复制代码
#!/bin/bash

# awk 实战案例

echo "=== awk 实战案例 ==="

# 案例1: 销售数据分析
echo -e "\n案例1: 销售数据分析"

echo "1. 按产品分类统计:"
awk -F, '
NR > 1 {
    products[$2] += $5
    quantity[$2] += $6
} 
END {
    printf "%-15s %-12s %-10s %-12s\n", "产品", "总销售额", "总数量", "平均单价"
    printf "%-15s %-12s %-10s %-12s\n", "---", "---", "---", "---"
    for(p in products) {
        avg = products[p] / quantity[p]
        printf "%-15s %-12.2f %-10d %-12.2f\n", p, products[p], quantity[p], avg
    }
}' sales.csv

echo -e "\n2. 按地区统计:"
awk -F, '
NR > 1 {
    regions[$4] += $5
}
END {
    print "=== 地区销售统计 ==="
    for(r in regions) {
        printf "%-10s: %.2f\n", r, regions[r]
    }
}' sales.csv

# 案例2: 系统监控报告
echo -e "\n案例2: 系统监控报告"

# 创建系统数据模拟
cat > system_stats.txt << 'EOF'
CPU 25% MEM 45% DISK 78% NET 120KB/s
CPU 30% MEM 48% DISK 79% NET 150KB/s
CPU 28% MEM 46% DISK 78% NET 130KB/s
CPU 35% MEM 50% DISK 80% NET 200KB/s
CPU 40% MEM 52% DISK 81% NET 180KB/s
EOF

echo "系统资源分析:"
awk '{
    cpu_sum += $2
    mem_sum += $4  
    disk_sum += $6
    net_sum += $8
    count++
} 
END {
    print "=== 系统资源统计 ==="
    printf "CPU平均使用率: %.1f%%\n", cpu_sum/count
    printf "内存平均使用率: %.1f%%\n", mem_sum/count
    printf "磁盘平均使用率: %.1f%%\n", disk_sum/count
    printf "网络平均速度: %.1fKB/s\n", net_sum/count
}' system_stats.txt

# 案例3: 日志分析
echo -e "\n案例3: 日志分析"

echo "1. HTTP状态码统计:"
awk '{
    match($0, /HTTP\/1\.1" ([0-9]{3})/, arr)
    if(arr[1] != "") {
        status_codes[arr[1]]++
    }
} 
END {
    print "=== HTTP状态码统计 ==="
    for(code in status_codes) {
        printf "状态码 %s: %d次\n", code, status_codes[code]
    }
}' access.log

echo -e "\n2. IP访问频率:"
awk '{
    ip = $1
    ips[ip]++
} 
END {
    print "=== IP访问频率 ==="
    for(ip in ips) {
        printf "%-15s: %d次\n", ip, ips[ip]
    }
}' access.log | sort -k2 -nr

# 案例4: 配置解析
echo -e "\n案例4: 配置解析"

echo "解析配置文件:"
awk -F= '
/^[^#]/ && NF == 2 {
    config[$1] = $2
} 
END {
    print "=== 配置信息 ==="
    for(key in config) {
        printf "%-20s: %s\n", key, config[key]
    }
}' config.txt

# 案例5: 数据转换
echo -e "\n案例5: 数据转换"

echo "生成JSON格式:"
awk -F, '
BEGIN {
    print "["
}
NR > 1 {
    if(NR > 2) print ","
    printf "  {\n"
    printf "    \"id\": %s,\n", $1
    printf "    \"name\": \"%s\",\n", $2
    printf "    \"age\": %s,\n", $3
    printf "    \"position\": \"%s\",\n", $4
    printf "    \"city\": \"%s\",\n", $5
    printf "    \"salary\": %s\n", $6
    printf "  }"
}
END {
    print "\n]"
}' users.txt

# 案例6: 文本处理
echo -e "\n案例6: 文本处理"

echo "多语言文本分析:"
awk '{
    # 统计中文字符
    chinese_chars = gsub(/[\\u4e00-\\u9fff]/, "&")
    # 统计英文字符
    english_words = gsub(/[a-zA-Z]+/, "&")
    # 统计法文字符
    french_chars = gsub(/[éèêëàâæçîïôœùûüÿ]/, "&")
    
    printf "行 %d: 中文%d个 英文%d个 法文%d个\n", NR, chinese_chars, english_words, french_chars
}' multilang.txt

# 案例7: 复杂计算
echo -e "\n案例7: 复杂计算"

echo "薪资分析报告:"
awk -F, '
BEGIN {
    print "=== 薪资分析报告 ==="
    printf "%-10s %-8s %-10s %-12s\n", "姓名", "年龄", "薪资", "等级"
    printf "%-10s %-8s %-10s %-12s\n", "---", "---", "---", "---"
}
NR > 0 {
    salary = $6
    age = $3
    
    # 薪资等级
    if(salary >= 100000) grade = "A"
    else if(salary >= 70000) grade = "B" 
    else if(salary >= 50000) grade = "C"
    else grade = "D"
    
    # 统计
    total_salary += salary
    count++
    if(salary > max_salary) max_salary = salary
    if(salary < min_salary || min_salary == 0) min_salary = salary
    
    # 年龄组统计
    if(age < 25) age_group["<25"]++
    else if(age < 30) age_group["25-29"]++
    else if(age < 35) age_group["30-34"]++
    else age_group[">=35"]++
    
    printf "%-10s %-8d %-10d %-12s\n", $2, age, salary, grade
}
END {
    print "\n=== 统计摘要 ==="
    printf "员工总数: %d\n", count
    printf "平均薪资: %.2f\n", total_salary/count
    printf "最高薪资: %d\n", max_salary
    printf "最低薪资: %d\n", min_salary
    
    print "\n=== 年龄分布 ==="
    for(group in age_group) {
        printf "%s岁: %d人\n", group, age_group[group]
    }
}' users.txt

# 清理
rm -f system_stats.txt

echo -e "\n=== awk 实战案例演示完成 ==="

5. 三剑客组合应用

5.1 工具组合工作流

创建组合应用脚本:text_processing_workflow.sh

bash 复制代码
#!/bin/bash

# 三剑客组合应用工作流

echo "=== 文本处理三剑客组合应用 ==="

# 工作流1: 日志分析管道
echo -e "\n工作流1: 日志分析管道"

echo "分析错误请求并统计:"
grep -E '(404|500|403)' access.log | \
awk '{
    match($0, /([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+).*"([A-Z]+) ([^"]+).* ([0-9]{3})/, arr)
    print arr[1], arr[2], arr[3], arr[4]
}' | \
sort | \
uniq -c | \
sort -rn | \
head -10

# 工作流2: 数据清洗和转换
echo -e "\n工作流2: 数据清洗和转换"

echo "用户数据清洗和统计:"
sed 's/, /,/g' users.txt | \
awk -F, '
NR > 0 {
    gsub(/^[[:space:]]+|[[:space:]]+$/, "", $2)  # 清理姓名空格
    gsub(/^[[:space:]]+|[[:space:]]+$/, "", $4)  # 清理职位空格
    print $1","$2","$3","$4","$5","$6
}' | \
awk -F, '{
    # 按城市统计薪资
    city_salary[$5] += $6
    city_count[$5]++
} 
END {
    print "=== 各城市平均薪资 ==="
    for(city in city_salary) {
        avg = city_salary[city] / city_count[city]
        printf "%-8s: %.2f\n", city, avg
    }
}'

# 工作流3: 配置文件处理
echo -e "\n工作流3: 配置文件处理"

echo "生成配置文档:"
grep -v '^#' config.txt | \
sed 's/^# //' | \
awk -F= '{
    if(NF == 2) {
        category = substr($1, 1, index($1, ".")-1)
        key = substr($1, index($1, ".")+1)
        config[category][key] = $2
    }
} 
END {
    print "# 配置文档"
    print "生成时间: '$(date)'"
    print ""
    
    for(category in config) {
        print "## " category
        print ""
        for(key in config[category]) {
            printf "%-20s: %s\n", key, config[category][key]
        }
        print ""
    }
}'

# 工作流4: 复杂文本转换
echo -e "\n工作流4: 复杂文本转换"

echo "多语言文本分析报告:"
cat multilang.txt | \
sed 's/! /!\n/g' | \
awk '{
    # 分离不同语言
    split($0, parts, " ")
    chinese_count = 0
    english_count = 0
    french_count = 0
    
    for(i in parts) {
        if(parts[i] ~ /[\u4e00-\u9fff]/) chinese_count++
        else if(parts[i] ~ /^[A-Za-z]/) english_count++
        else if(parts[i] ~ /[éèêëàâæçîïôœùûüÿ]/) french_count++
    }
    
    print "行", NR, "- 中文:", chinese_count, "英文:", english_count, "法文:", french_count
}'

# 工作流5: 性能监控管道
echo -e "\n工作流5: 系统监控管道"

echo "模拟系统监控分析:"
# 创建监控数据
for i in {1..10}; do
    echo "CPU $((20 + RANDOM % 30))% MEM $((40 + RANDOM % 20))% DISK $((70 + RANDOM % 15))%"
done > monitor.log

cat monitor.log | \
awk '{
    gsub(/%/, "", $2)
    gsub(/%/, "", $4) 
    gsub(/%/, "", $6)
    
    cpu_sum += $2
    mem_sum += $4
    disk_sum += $6
    count++
    
    # 告警检测
    if($2 > 80) print "警告: CPU使用率过高 - "$2"%"
    if($4 > 85) print "警告: 内存使用率过高 - "$4"%"
    if($6 > 90) print "警告: 磁盘使用率过高 - "$6"%"
} 
END {
    print "=== 平均使用率 ==="
    printf "CPU: %.1f%%\n", cpu_sum/count
    printf "内存: %.1f%%\n", mem_sum/count
    printf "磁盘: %.1f%%\n", disk_sum/count
}'

# 工作流6: 数据报表生成
echo -e "\n工作流6: 销售数据报表"

awk -F, '
BEGIN {
    print "=== 销售数据分析报告 ==="
    print "生成时间: '$(date)'"
    print ""
}
NR == 1 {
    # 跳过标题行
    next
}
{
    # 数据统计
    date_sales[$1] += $5
    product_sales[$2] += $5
    category_sales[$3] += $5
    region_sales[$4] += $5
    total_sales += $5
    total_quantity += $6
    record_count++
}
END {
    # 日期分析
    print "## 按日期统计"
    for(date in date_sales) {
        printf "%-12s: %8.2f\n", date, date_sales[date]
    }
    print ""
    
    # 产品分析
    print "## 按产品统计"
    for(product in product_sales) {
        printf "%-15s: %8.2f\n", product, product_sales[product]
    }
    print ""
    
    # 分类分析
    print "## 按分类统计"
    for(category in category_sales) {
        printf "%-15s: %8.2f\n", category, category_sales[category]
    }
    print ""
    
    # 地区分析  
    print "## 按地区统计"
    for(region in region_sales) {
        printf "%-10s: %8.2f\n", region, region_sales[region]
    }
    print ""
    
    # 汇总信息
    print "## 汇总信息"
    printf "总销售额: %.2f\n", total_sales
    printf "总数量: %d\n", total_quantity
    printf "平均单价: %.2f\n", total_sales/total_quantity
    printf "记录数量: %d\n", record_count
}' sales.csv

# 清理
rm -f monitor.log

echo -e "\n=== 组合应用演示完成 ==="

5.2 性能优化与最佳实践

创建优化指南:performance_tips.sh

bash 复制代码
#!/bin/bash

# 性能优化与最佳实践

echo "=== 文本处理性能优化与最佳实践 ==="

# 创建大文件测试性能
echo "创建大测试文件..."
for i in {1..10000}; do
    echo "Line $i: This is test data with some pattern $((i % 100)) and more content here." >> large_test.txt
done

echo -e "\n1. grep 性能优化:"

echo "使用固定字符串搜索:"
time grep -F 'pattern 50' large_test.txt > /dev/null

echo -e "\n使用简单正则:"
time grep 'pattern 50' large_test.txt > /dev/null

echo -e "\n使用扩展正则:"
time grep -E 'pattern (50|51)' large_test.txt > /dev/null

echo -e "\n2. sed 性能优化:"

echo "使用简单替换:"
time sed 's/pattern/PATTERN/g' large_test.txt > /dev/null

echo -e "\n使用复杂替换:"
time sed -E 's/pattern ([0-9]+)/PATTERN \1/g' large_test.txt > /dev/null

echo -e "\n3. awk 性能优化:"

echo "使用字段分割:"
time awk '{print $3}' large_test.txt > /dev/null

echo -e "\n使用正则分割:"
time awk -F'[: ]' '{print $3}' large_test.txt > /dev/null

echo -e "\n4. 管道优化技巧:"

echo "减少管道数量:"
time cat large_test.txt | grep 'pattern' | sed 's/pattern/PATTERN/' > /dev/null

echo -e "\n合并处理命令:"
time awk '/pattern/ {gsub(/pattern/, "PATTERN"); print}' large_test.txt > /dev/null

echo -e "\n5. 内存使用优化:"

echo "流式处理大文件:"
time while read line; do
    echo "$line" | grep -q 'pattern 50' && echo "$line"
done < large_test.txt > /dev/null

echo -e "\n使用高效工具组合:"
time grep 'pattern 50' large_test.txt | head -100 > /dev/null

# 最佳实践示例
echo -e "\n6. 最佳实践示例:"

echo "错误处理:"
grep 'nonexistent' large_test.txt || echo "没有找到匹配内容"

echo -e "\n输出控制:"
awk 'NR % 1000 == 0 {print "进度:", NR}' large_test.txt

echo -e "\n资源清理:"
trap 'rm -f temp_file.txt' EXIT

echo -e "\n7. 调试技巧:"

echo "步骤调试:"
grep 'pattern 50' large_test.txt | \
sed 's/pattern/PATTERN/' | \
awk '{print "处理结果:", $0}' | \
head -3

echo -e "\n变量跟踪:"
awk '{
    if(NR % 1000 == 0) {
        print "处理行数:", NR > "/dev/stderr"
    }
    print $0
}' large_test.txt > /dev/null

# 清理
rm -f large_test.txt

echo -e "\n=== 性能优化指南完成 ==="

6. 综合实战项目

6.1 完整的日志分析系统

创建日志分析系统:log_analysis_system.sh

bash 复制代码
#!/bin/bash

# 完整的日志分析系统

echo "=== 日志分析系统 ==="

# 创建更丰富的日志数据
cat > extended_access.log << 'EOF'
192.168.1.100 - john [10/Oct/2023:10:30:01 +0800] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0" 0.123
192.168.1.101 - jane [10/Oct/2023:10:30:02 +0800] "POST /api/login HTTP/1.1" 401 567 "https://example.com" "Mozilla/5.0" 0.456
192.168.1.102 - bob [10/Oct/2023:10:30:03 +0800] "GET /products.html HTTP/1.1" 200 7890 "https://example.com" "Chrome/91.0" 0.234
192.168.1.100 - john [10/Oct/2023:10:30:04 +0800] "GET /images/logo.png HTTP/1.1" 304 0 "https://example.com" "Mozilla/5.0" 0.078
192.168.1.103 - alice [10/Oct/2023:10:30:05 +0800] "PUT /api/users/1 HTTP/1.1" 403 234 "https://example.com" "Firefox/89.0" 0.345
192.168.1.104 - charlie [10/Oct/2023:10:30:06 +0800] "GET /contact.html HTTP/1.1" 200 3456 "https://example.com" "Safari/14.0" 0.189
192.168.1.101 - jane [10/Oct/2023:10:30:07 +0800] "DELETE /api/products/5 HTTP/1.1" 204 0 "https://example.com" "Mozilla/5.0" 0.267
192.168.1.105 - david [10/Oct/2023:10:30:08 +0800] "GET /about.html HTTP/1.1" 500 123 "https://example.com" "Chrome/92.0" 0.412
192.168.1.106 - eve [10/Oct/2023:10:30:09 +0800] "GET /admin/dashboard HTTP/1.1" 200 4567 "https://example.com" "Mozilla/5.0" 0.156
192.168.1.107 - frank [10/Oct/2023:10:30:10 +0800] "POST /api/orders HTTP/1.1" 201 789 "https://example.com" "Chrome/93.0" 0.298
EOF

# 分析函数定义
analyze_traffic() {
    echo "=== 流量分析 ==="
    
    echo -e "\n1. 总请求数:"
    wc -l extended_access.log | awk '{print $1}'
    
    echo -e "\n2. 请求方法分布:"
    awk -F'"' '{print $2}' extended_access.log | awk '{methods[$1]++} END {for(m in methods) print m, methods[m]}' | sort -k2 -nr
    
    echo -e "\n3. HTTP状态码分布:"
    awk '{print $9}' extended_access.log | sort | uniq -c | sort -rn
    
    echo -e "\n4. 用户代理分布:"
    awk -F'"' '{print $6}' extended_access.log | sort | uniq -c | sort -rn | head -5
}

analyze_performance() {
    echo -e "\n=== 性能分析 ==="
    
    echo -e "\n1. 平均响应时间:"
    awk '{response_time = $(NF); sum += response_time; count++} END {printf "%.3f秒\n", sum/count}' extended_access.log
    
    echo -e "\n2. 最慢的请求:"
    awk '{print $(NF), $0}' extended_access.log | sort -rn | head -3 | awk '{$1=""; print}'
    
    echo -e "\n3. 按端点统计平均响应时间:"
    awk -F'"' '{ 
        split($2, parts, " ")
        endpoint = parts[2]
        response_time = $(NF)
        total_time[endpoint] += response_time
        count[endpoint]++
    } END {
        for(ep in total_time) {
            avg = total_time[ep] / count[ep]
            printf "%-20s: %.3f秒 (%d次)\n", ep, avg, count[ep]
        }
    }' extended_access.log | sort -k2 -nr
}

analyze_security() {
    echo -e "\n=== 安全分析 ==="
    
    echo -e "\n1. 失败登录尝试:"
    grep 'POST /api/login.*401' extended_access.log
    
    echo -e "\n2. 权限拒绝访问:"
    awk '/ 403 /' extended_access.log
    
    echo -e "\n3. 服务器错误:"
    awk '/ 500 /' extended_access.log
    
    echo -e "\n4. 管理员访问:"
    grep '/admin' extended_access.log
}

analyze_users() {
    echo -e "\n=== 用户行为分析 ==="
    
    echo -e "\n1. 活跃用户:"
    awk '{print $3}' extended_access.log | grep -v '^-$' | sort | uniq -c | sort -rn
    
    echo -e "\n2. 用户会话分析:"
    awk '{
        ip = $1
        user = $3
        if(user != "-") {
            user_requests[user]++
            user_ips[user][ip]++
        }
    } END {
        for(user in user_requests) {
            ip_count = 0
            for(ip in user_ips[user]) ip_count++
            printf "用户: %-10s 请求数: %-3d IP数: %d\n", user, user_requests[user], ip_count
        }
    }' extended_access.log | sort -k4 -nr
}

generate_report() {
    echo -e "\n=== 分析报告生成 ==="
    
    cat > log_report.txt << 'EOF'
# 网站访问日志分析报告
生成时间: $(date)

## 执行摘要
EOF

    # 汇总信息
    {
        echo "总请求数: $(wc -l < extended_access.log)"
        echo "成功请求(2xx): $(grep -c ' 2[0-9][0-9] ' extended_access.log)"
        echo "客户端错误(4xx): $(grep -c ' 4[0-9][0-9] ' extended_access.log)"
        echo "服务器错误(5xx): $(grep -c ' 5[0-9][0-9] ' extended_access.log)"
        echo "平均响应时间: $(awk '{sum += $(NF)} END {printf "%.3f秒", sum/NR}' extended_access.log)"
    } >> log_report.txt
    
    # 详细分析
    echo -e "\n## 详细分析" >> log_report.txt
    
    echo -e "\n### 流量统计" >> log_report.txt
    awk -F'"' '{print $2}' extended_access.log | awk '{methods[$1]++} END {for(m in methods) printf "%s: %d\n", m, methods[m]}' >> log_report.txt
    
    echo -e "\n### 性能统计" >> log_report.txt
    awk '{
        status = $9
        response_time = $(NF)
        status_time[status] += response_time
        status_count[status]++
    } END {
        for(s in status_count) {
            avg = status_time[s] / status_count[s]
            printf "状态码 %s: 平均 %.3f秒 (%d次)\n", s, avg, status_count[s]
        }
    }' extended_access.log >> log_report.txt
    
    echo "报告已生成: log_report.txt"
}

# 执行分析
analyze_traffic
analyze_performance
analyze_security
analyze_users
generate_report

# 清理
rm -f extended_access.log

echo -e "\n=== 日志分析系统演示完成 ==="

6.2 数据转换和ETL管道

创建ETL管道:etl_pipeline.sh

bash 复制代码
#!/bin/bash

# 数据转换和ETL管道

echo "=== 数据转换和ETL管道 ==="

# 创建原始数据
cat > raw_data.csv << 'EOF'
id|name|age|salary|department|join_date
1|John Doe|30|50000|Engineering|2020-01-15
2|Jane Smith|25|45000|Marketing|2021-03-20
3|Bob Johnson|35|60000|Engineering|2019-11-10
4|Alice Brown|28|52000|Sales|2022-02-28
5|Charlie Wilson|42|75000|Management|2018-05-15
6|Diana Lee|26|48000|Engineering|2021-07-01
7|Edward Zhang|33|55000|Sales|2020-09-10
8|Fiona Chen|29|51000|Marketing|2022-01-05
EOF

# ETL管道函数
extract_data() {
    echo "=== 数据提取阶段 ==="
    
    # 验证数据格式
    if [[ ! -f raw_data.csv ]]; then
        echo "错误: 数据文件不存在"
        exit 1
    fi
    
    # 检查数据完整性
    local line_count=$(wc -l < raw_data.csv)
    if [[ $line_count -lt 2 ]]; then
        echo "错误: 数据文件为空或只有标题行"
        exit 1
    fi
    
    echo "数据文件验证通过,共 $line_count 行"
    
    # 提取数据
    cp raw_data.csv extracted_data.csv
    echo "数据提取完成"
}

transform_data() {
    echo -e "\n=== 数据转换阶段 ==="
    
    # 数据清洗和转换
    cat extracted_data.csv | \
    sed '1s/|/,/g' | \  # 转换标题行分隔符
    sed '1s/\([a-z]\)_\([a-z]\)/\1\U\2/g' | \  # 标题驼峰命名
    awk -F'|' '
    BEGIN {
        OFS = ","
        print "ID,Name,Age,Salary,Department,JoinDate,Experience,SalaryGrade,Bonus"
    }
    NR > 1 {
        # 数据清洗
        gsub(/^[[:space:]]+|[[:space:]]+$/, "", $2)  # 清理姓名空格
        gsub(/^[[:space:]]+|[[:space:]]+$/, "", $5)  # 清理部门空格
        
        # 计算工作经验(年)
        split($6, date_parts, "-")
        join_year = date_parts[1]
        current_year = strftime("%Y")
        experience = current_year - join_year
        
        # 薪资等级
        salary_grade = "C"
        if ($4 >= 70000) salary_grade = "A"
        else if ($4 >= 55000) salary_grade = "B"
        
        # 计算奖金(薪资的10-20%)
        bonus_rate = 0.1
        if (experience >= 3) bonus_rate = 0.15
        if (experience >= 5) bonus_rate = 0.2
        bonus = $4 * bonus_rate
        
        print $1, $2, $3, $4, $5, $6, experience, salary_grade, int(bonus)
    }' > transformed_data.csv
    
    echo "数据转换完成"
    
    # 数据质量检查
    echo -e "\n数据质量检查:"
    echo "总记录数: $(($(wc -l < transformed_data.csv) - 1))"
    echo "空值检查:"
    awk -F, '
    NR > 1 {
        for(i=1; i<=NF; i++) {
            if($i == "" || $i == "NULL") {
                print "第 " NR " 行第 " i " 列为空"
            }
        }
    }' transformed_data.csv
    
    # 统计信息
    echo -e "\n数据统计:"
    awk -F, '
    NR > 1 {
        total_salary += $4
        total_bonus += $9
        count++
        dept[$5]++
    } 
    END {
        printf "平均薪资: %.2f\n", total_salary/count
        printf "平均奖金: %.2f\n", total_bonus/count
        print "部门分布:"
        for(d in dept) {
            printf "  %-15s: %d人\n", d, dept[d]
        }
    }' transformed_data.csv
}

load_data() {
    echo -e "\n=== 数据加载阶段 ==="
    
    # 创建目标表结构(模拟)
    cat > database_schema.sql << 'EOF'
-- 员工表结构
CREATE TABLE employees (
    id INT PRIMARY KEY,
    name VARCHAR(100),
    age INT,
    salary DECIMAL(10,2),
    department VARCHAR(50),
    join_date DATE,
    experience INT,
    salary_grade CHAR(1),
    bonus DECIMAL(10,2)
);
EOF
    
    # 生成SQL插入语句
    awk -F, '
    NR > 1 {
        printf "INSERT INTO employees VALUES(%d, '\''%s'\'', %d, %.2f, '\''%s'\'', '\''%s'\'', %d, '\''%s'\'', %.2f);\n", 
            $1, $2, $3, $4, $5, $6, $7, $8, $9
    }' transformed_data.csv > load_data.sql
    
    echo "SQL加载脚本已生成:"
    head -3 load_data.sql
    echo "..."
    
    # 创建报表
    echo -e "\n生成数据报表..."
    awk -F, '
    BEGIN {
        print "# 员工数据分析报告"
        print "生成时间: '$(date)'"
        print ""
    }
    NR == 1 { next }  # 跳过标题行
    
    {
        total_salary += $4
        total_bonus += $9
        count++
        
        # 部门统计
        dept_salary[$5] += $4
        dept_count[$5]++
        dept_bonus[$5] += $9
        
        # 年龄组统计
        if($3 < 25) age_group["<25"]++
        else if($3 < 30) age_group["25-29"]++
        else if($3 < 35) age_group["30-34"]++
        else if($3 < 40) age_group["35-39"]++
        else age_group[">=40"]++
        
        # 薪资等级统计
        grade_count[$8]++
    }
    END {
        print "## 执行摘要"
        printf "员工总数: %d\n", count
        printf "总薪资: %.2f\n", total_salary
        printf "总奖金: %.2f\n", total_bonus
        printf "平均薪资: %.2f\n", total_salary/count
        printf "平均奖金: %.2f\n", total_bonus/count
        print ""
        
        print "## 部门分析"
        for(dept in dept_salary) {
            avg_salary = dept_salary[dept] / dept_count[dept]
            avg_bonus = dept_bonus[dept] / dept_count[dept]
            printf "### %s\n", dept
            printf "人数: %d\n", dept_count[dept]
            printf "平均薪资: %.2f\n", avg_salary
            printf "平均奖金: %.2f\n", avg_bonus
            print ""
        }
        
        print "## 年龄分布"
        for(group in age_group) {
            printf "%s岁: %d人 (%.1f%%)\n", group, age_group[group], (age_group[group]/count)*100
        }
        print ""
        
        print "## 薪资等级分布"
        for(grade in grade_count) {
            printf "等级 %s: %d人\n", grade, grade_count[grade]
        }
    }' transformed_data.csv > analysis_report.md
    
    echo "数据分析报告已生成: analysis_report.md"
}

# 执行ETL管道
extract_data
transform_data
load_data

# 清理临时文件
rm -f raw_data.csv extracted_data.csv transformed_data.csv database_schema.sql load_data.sql

echo -e "\n=== ETL管道演示完成 ==="

7. 总结与进阶学习

7.1 工具对比与选择指南

graph TB A[文本处理任务] --> B{任务类型} B -->|快速搜索| C[grep] B -->|简单替换| D[sed] B -->|复杂处理| E[awk] C --> F[模式匹配
文件过滤] D --> G[流编辑
批量替换] E --> H[数据提取
报表生成] F --> I[适用场景
日志搜索
文件过滤] G --> J[适用场景
配置修改
数据清洗] H --> K[适用场景
数据分析
报表生成] style A fill:#1e3a5f,color:#ffffff style B fill:#4a1e5f,color:#ffffff style C fill:#1e5f3a,color:#ffffff style D fill:#1e5f3a,color:#ffffff style E fill:#1e5f3a,color:#ffffff style F fill:#5f3a1e,color:#ffffff style G fill:#5f3a1e,color:#ffffff style H fill:#5f3a1e,color:#ffffff style I fill:#1e3a5f,color:#ffffff style J fill:#1e3a5f,color:#ffffff style K fill:#1e3a5f,color:#ffffff

7.2 创建学习检查清单

创建学习检查清单:learning_checklist.sh

bash 复制代码
#!/bin/bash

# 学习检查清单

echo "=== 文本处理三剑客学习检查清单 ==="

cat > learning_checklist.md << 'EOF'
# 文本处理三剑客学习检查清单

## grep 掌握程度检查
- [ ] 基础搜索和选项 (-i, -n, -v, -c)
- [ ] 正则表达式基础 (., *, +, ?, [])
- [ ] 字符类和预定义字符类
- [ ] 锚点和单词边界
- [ ] 分组和引用
- [ ] 扩展正则表达式
- [ ] 递归搜索和文件过滤
- [ ] 性能优化技巧

## sed 掌握程度检查
- [ ] 基本替换命令 (s///)
- [ ] 地址和范围指定
- [ ] 删除、插入、追加命令
- [ ] 模式空间和保持空间
- [ ] 流控制(分支、跳转)
- [ ] 多命令执行和脚本文件
- [ ] 高级替换技巧
- [ ] 原位编辑和备份

## awk 掌握程度检查
- [ ] 基本打印和字段处理
- [ ] 模式匹配和条件语句
- [ ] BEGIN 和 END 块
- [ ] 内置变量 (NR, NF, FS, OFS)
- [ ] 数组和关联数组
- [ ] 字符串和数学函数
- [ ] 控制结构 (if, for, while)
- [ ] 自定义函数
- [ ] 输入输出重定向

## 组合应用掌握程度检查
- [ ] 管道连接多个命令
- [ ] 复杂数据处理流程
- [ ] 性能优化和调试
- [ ] 错误处理和边界情况
- [ ] 实际项目应用

## 实战项目建议
1. 日志分析系统
2. 数据清洗管道
3. 配置文件管理工具
4. 报表生成系统
5. 监控告警脚本

## 进阶学习方向
- Perl 文本处理
- Python 数据处理 (pandas)
- jq (JSON 处理)
- xmlstarlet (XML 处理)
- 数据库查询优化
EOF

echo "学习检查清单已生成: learning_checklist.md"
echo ""
echo "下一步学习建议:"
echo "1. 完成检查清单中的所有项目"
echo "2. 在实际工作中应用所学知识"
echo "3. 阅读官方文档和man页面"
echo "4. 参与开源项目贡献"
echo "5. 学习相关工具如 jq, xmllint 等"

7.3 创建实用脚本库

创建实用脚本库:utility_scripts.sh

bash 复制代码
#!/bin/bash

# 实用脚本库

echo "=== 创建文本处理实用脚本库 ==="

# 1. 日志分析脚本
cat > analyze_logs.sh << 'EOF'
#!/bin/bash
# 日志分析脚本

LOG_FILE=${1:-access.log}

echo "分析日志文件: $LOG_FILE"

# 基本统计
echo "=== 基本统计 ==="
echo "总行数: $(wc -l < "$LOG_FILE")"
echo "独立IP数: $(awk '{print $1}' "$LOG_FILE" | sort -u | wc -l)"
echo "请求方法分布:"
awk -F'"' '{print $2}' "$LOG_FILE" | awk '{print $1}' | sort | uniq -c | sort -rn

# 状态码分析
echo -e "\n=== 状态码分析 ==="
awk '{print $9}' "$LOG_FILE" | sort | uniq -c | sort -rn

# 热门页面
echo -e "\n=== 热门页面 ==="
awk -F'"' '{print $2}' "$LOG_FILE" | awk '{print $2}' | sort | uniq -c | sort -rn | head -10

# 错误分析
echo -e "\n=== 错误分析 ==="
grep -E ' (4[0-9]{2}|5[0-9]{2}) ' "$LOG_FILE" | head -10
EOF

chmod +x analyze_logs.sh

# 2. 数据清洗脚本
cat > clean_data.sh << 'EOF'
#!/bin/bash
# 数据清洗脚本

INPUT_FILE=$1
OUTPUT_FILE=${2:-cleaned_data.csv}

if [[ -z "$INPUT_FILE" ]]; then
    echo "用法: $0 <输入文件> [输出文件]"
    exit 1
fi

echo "清洗数据文件: $INPUT_FILE -> $OUTPUT_FILE"

# 执行数据清洗
sed '
# 删除空行
/^$/d
# 删除行首行尾空格
s/^[[:space:]]*//
s/[[:space:]]*$//
# 标准化分隔符
s/[,;|][[:space:]]*/,/g
' "$INPUT_FILE" | \
awk -F, '
BEGIN {OFS=","}
NR == 1 {
    # 处理标题行
    print $0
    next
}
{
    # 数据验证和清理
    for(i=1; i<=NF; i++) {
        # 清理字段空格
        gsub(/^[[:space:]]+|[[:space:]]+$/, "", $i)
        # 处理空值
        if($i == "") $i = "NULL"
    }
    print $0
}' > "$OUTPUT_FILE"

echo "数据清洗完成: $OUTPUT_FILE"
echo "原始行数: $(wc -l < "$INPUT_FILE")"
echo "清洗后行数: $(wc -l < "$OUTPUT_FILE")"
EOF

chmod +x clean_data.sh

# 3. 配置文件管理脚本
cat > manage_config.sh << 'EOF'
#!/bin/bash
# 配置文件管理脚本

CONFIG_FILE=${1:-config.txt}
ACTION=$2
KEY=$3
VALUE=$4

usage() {
    echo "用法: $0 <配置文件> <动作> [键] [值]"
    echo "动作:"
    echo "  list       - 列出所有配置"
    echo "  get <key>  - 获取配置值"
    echo "  set <key> <value> - 设置配置值"
    echo "  delete <key> - 删除配置项"
}

list_config() {
    echo "=== 配置列表 ==="
    grep -v '^#' "$CONFIG_FILE" | grep '=' | while IFS= read -r line; do
        key=$(echo "$line" | cut -d'=' -f1)
        value=$(echo "$line" | cut -d'=' -f2-)
        printf "%-25s: %s\n" "$key" "$value"
    done
}

get_config() {
    local key=$1
    grep "^$key=" "$CONFIG_FILE" | cut -d'=' -f2-
}

set_config() {
    local key=$1
    local value=$2
    
    if grep -q "^$key=" "$CONFIG_FILE"; then
        # 更新现有配置
        sed -i "s/^$key=.*/$key=$value/" "$CONFIG_FILE"
        echo "更新配置: $key=$value"
    else
        # 添加新配置
        echo "$key=$value" >> "$CONFIG_FILE"
        echo "添加配置: $key=$value"
    fi
}

delete_config() {
    local key=$1
    sed -i "/^$key=/d" "$CONFIG_FILE"
    echo "删除配置: $key"
}

case "$ACTION" in
    "list")
        list_config
        ;;
    "get")
        if [[ -z "$KEY" ]]; then
            echo "错误: 需要指定键名"
            usage
            exit 1
        fi
        get_config "$KEY"
        ;;
    "set")
        if [[ -z "$KEY" || -z "$VALUE" ]]; then
            echo "错误: 需要指定键名和值"
            usage
            exit 1
        fi
        set_config "$KEY" "$VALUE"
        ;;
    "delete")
        if [[ -z "$KEY" ]]; then
            echo "错误: 需要指定键名"
            usage
            exit 1
        fi
        delete_config "$KEY"
        ;;
    *)
        usage
        exit 1
        ;;
esac
EOF

chmod +x manage_config.sh

# 4. 系统监控脚本
cat > system_monitor.sh << 'EOF'
#!/bin/bash
# 系统监控脚本

LOG_FILE="/var/log/system_monitor.log"
ALERT_THRESHOLD=80

# 日志函数
log_message() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >> "$LOG_FILE"
}

# 检查CPU使用率
check_cpu() {
    local cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
    echo "CPU使用率: ${cpu_usage}%"
    
    if (( $(echo "$cpu_usage > $ALERT_THRESHOLD" | bc -l) )); then
        log_message "警告: CPU使用率过高 - ${cpu_usage}%"
        return 1
    fi
    return 0
}

# 检查内存使用率
check_memory() {
    local mem_info=$(free | grep Mem)
    local total_mem=$(echo "$mem_info" | awk '{print $2}')
    local used_mem=$(echo "$mem_info" | awk '{print $3}')
    local mem_usage=$(echo "scale=2; $used_mem * 100 / $total_mem" | bc)
    
    echo "内存使用率: ${mem_usage}%"
    
    if (( $(echo "$mem_usage > $ALERT_THRESHOLD" | bc -l) )); then
        log_message "警告: 内存使用率过高 - ${mem_usage}%"
        return 1
    fi
    return 0
}

# 检查磁盘使用率
check_disk() {
    local disk_usage=$(df / | awk 'NR==2{print $5}' | cut -d'%' -f1)
    echo "磁盘使用率: ${disk_usage}%"
    
    if [ "$disk_usage" -gt "$ALERT_THRESHOLD" ]; then
        log_message "警告: 磁盘使用率过高 - ${disk_usage}%"
        return 1
    fi
    return 0
}

# 检查系统负载
check_load() {
    local load_avg=$(uptime | awk -F'load average:' '{print $2}' | awk -F, '{print $1}' | tr -d ' ')
    local cpu_cores=$(nproc)
    
    echo "系统负载: $load_avg (CPU核心: $cpu_cores)"
    
    if (( $(echo "$load_avg > $cpu_cores" | bc -l) )); then
        log_message "警告: 系统负载过高 - $load_avg"
        return 1
    fi
    return 0
}

# 生成报告
generate_report() {
    echo "=== 系统监控报告 ==="
    echo "生成时间: $(date)"
    echo
    
    check_cpu
    check_memory
    check_disk
    check_load
    
    echo
    echo "最近告警:"
    tail -5 "$LOG_FILE" 2>/dev/null || echo "无告警记录"
}

# 主函数
main() {
    case "${1:-report}" in
        "report")
            generate_report
            ;;
        "cpu")
            check_cpu
            ;;
        "memory")
            check_memory
            ;;
        "disk")
            check_disk
            ;;
        "load")
            check_load
            ;;
        "log")
            tail -20 "$LOG_FILE" 2>/dev/null || echo "日志文件不存在"
            ;;
        *)
            echo "用法: $0 {report|cpu|memory|disk|load|log}"
            exit 1
            ;;
    esac
}

main "$@"
EOF

chmod +x system_monitor.sh

echo "实用脚本库创建完成:"
echo "  analyze_logs.sh   - 日志分析脚本"
echo "  clean_data.sh     - 数据清洗脚本"
echo "  manage_config.sh  - 配置管理脚本"
echo "  system_monitor.sh - 系统监控脚本"

echo -e "\n=== 文本处理三剑客教程完成 ==="
echo "现在您已经掌握了 grep, sed, awk 的核心用法和实战技巧!"
echo "建议在实际工作中不断练习和应用这些工具。"

通过本教程,您已经系统学习了 Linux 文本处理三剑客的完整知识体系。从基础用法到高级技巧,从单一工具使用到组合应用,您现在应该能够:

  1. 使用 grep 进行高效的文本搜索和过滤
  2. 使用 sed 进行流编辑和批量处理
  3. 使用 awk 进行复杂的数据处理和报表生成
  4. 将三个工具组合使用解决复杂问题
  5. 在实际项目中应用这些技能

继续练习和探索,这些工具将成为您日常工作中不可或缺的利器!

相关推荐
hkhkhkhkh1231 小时前
Linux设备节点基础知识
linux·服务器·驱动开发
HZero.chen3 小时前
Linux字符串处理
linux·string
张童瑶3 小时前
Linux SSH隧道代理转发及多层转发
linux·运维·ssh
汪汪队立大功1233 小时前
什么是SELinux
linux
石小千3 小时前
Linux安装OpenProject
linux·运维
柏木乃一3 小时前
进程(2)进程概念与基本操作
linux·服务器·开发语言·性能优化·shell·进程
Lime-30903 小时前
制作Ubuntu 24.04-GPU服务器测试系统盘
linux·运维·ubuntu
百年渔翁_肯肯3 小时前
Linux 与 Unix 的核心区别(清晰对比版)
linux·运维·unix
胡闹544 小时前
Linux查询防火墙放过的端口并额外增加需要通过的端口命令
linux·运维·windows
lc9991025 小时前
简洁高效的相机预览
android·linux