实验要求
lab1的两个测试bash的解释
shell
#!/bin/sh
#
# basic map-reduce test
#
RACE=
# uncomment this to run the tests with the Go race detector.
#RACE=-race
# -------------------------------------------------------
# 初始化测试环境
# -------------------------------------------------------
# 创建新的测试目录并清理旧数据
rm -rf mr-tmp
mkdir mr-tmp || exit 1
cd mr-tmp || exit 1
rm -f mr-*
# -------------------------------------------------------
# 编译 MapReduce 所需的插件和主程序
# -------------------------------------------------------
# 在 `mrapps` 目录中编译多个插件应用程序
(cd ../../mrapps && go build $RACE -buildmode=plugin wc.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin indexer.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin mtiming.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin rtiming.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin crash.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin nocrash.go) || exit 1
# 编译主 MapReduce 程序,包括 master 和 worker
(cd .. && go build $RACE mrmaster.go) || exit 1
(cd .. && go build $RACE mrworker.go) || exit 1
(cd .. && go build $RACE mrsequential.go) || exit 1
failed_any=0
# -------------------------------------------------------
# 单词计数测试模块(wc test)
# -------------------------------------------------------
# 生成正确的单词计数输出,作为对比基准
../mrsequential ../../mrapps/wc.so ../pg*txt || exit 1
sort mr-out-0 > mr-correct-wc.txt
rm -f mr-out*
echo '***' Starting wc test.
# 启动 master 和多个 worker 执行单词计数任务
timeout -k 2s 180s ../mrmaster ../pg*txt &
# 等待 master 准备好套接字
sleep 1
# 启动多个 worker 并行执行
timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &
timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &
timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &
# 等待一个进程退出(所有 worker 都会在任务完成后退出)
wait
# 对比实际输出和预期输出
sort mr-out* | grep . > mr-wc-all
if cmp mr-wc-all mr-correct-wc.txt
then
echo '---' wc test: PASS
else
echo '---' wc output is not the same as mr-correct-wc.txt
echo '---' wc test: FAIL
failed_any=1
fi
# 等待剩余的进程退出
wait ; wait ; wait
# -------------------------------------------------------
# 索引创建测试模块(indexer test)
# -------------------------------------------------------
# 清除之前的输出文件,生成索引任务的正确输出
rm -f mr-*
../mrsequential ../../mrapps/indexer.so ../pg*txt || exit 1
sort mr-out-0 > mr-correct-indexer.txt
rm -f mr-out*
echo '***' Starting indexer test.
# 启动 master 和 worker 执行索引任务
timeout -k 2s 180s ../mrmaster ../pg*txt &
sleep 1
# 启动多个 worker 并行执行
timeout -k 2s 180s ../mrworker ../../mrapps/indexer.so &
timeout -k 2s 180s ../mrworker ../../mrapps/indexer.so
# 对比输出
sort mr-out* | grep . > mr-indexer-all
if cmp mr-indexer-all mr-correct-indexer.txt
then
echo '---' indexer test: PASS
else
echo '---' indexer output is not the same as mr-correct-indexer.txt
echo '---' indexer test: FAIL
failed_any=1
fi
wait ; wait
# -------------------------------------------------------
# Map 并行性测试模块(map parallelism test)
# -------------------------------------------------------
# 清理输出文件,启动 master 和 worker 以测试 Map 并行性
rm -f mr-out* mr-worker*
timeout -k 2s 180s ../mrmaster ../pg*txt &
sleep 1
# 启动两个 worker 执行 `mtiming.so` 插件,验证 Map 并行
timeout -k 2s 180s ../mrworker ../../mrapps/mtiming.so &
timeout -k 2s 180s ../mrworker ../../mrapps/mtiming.so
# 检查并行 Map 工作器的数量
NT=`cat mr-out* | grep '^times-' | wc -l | sed 's/ //g'`
if [ "$NT" != "2" ]
then
echo '---' saw "$NT" workers rather than 2
echo '---' map parallelism test: FAIL
failed_any=1
fi
# 检查是否达到期望的并行度
if cat mr-out* | grep '^parallel.* 2' > /dev/null
then
echo '---' map parallelism test: PASS
else
echo '---' map workers did not run in parallel
echo '---' map parallelism test: FAIL
failed_any=1
fi
wait ; wait
# -------------------------------------------------------
# Reduce 并行性测试模块(reduce parallelism test)
# -------------------------------------------------------
# 清理输出文件,启动 master 和 worker 以测试 Reduce 并行性
rm -f mr-out* mr-worker*
timeout -k 2s 180s ../mrmaster ../pg*txt &
sleep 1
# 启动两个 worker 执行 `rtiming.so` 插件,验证 Reduce 并行
timeout -k 2s 180s ../mrworker ../../mrapps/rtiming.so &
timeout -k 2s 180s ../mrworker ../../mrapps/rtiming.so
# 检查并行 Reduce 工作器的数量
NT=`cat mr-out* | grep '^[a-z] 2' | wc -l | sed 's/ //g'`
if [ "$NT" -lt "2" ]
then
echo '---' too few parallel reduces.
echo '---' reduce parallelism test: FAIL
failed_any=1
else
echo '---' reduce parallelism test: PASS
fi
wait ; wait
# -------------------------------------------------------
# 崩溃恢复测试模块(crash test)
# -------------------------------------------------------
# 生成正确的输出,作为崩溃测试的对比基准
../mrsequential ../../mrapps/nocrash.so ../pg*txt || exit 1
sort mr-out-0 > mr-correct-crash.txt
rm -f mr-out*
echo '***' Starting crash test.
# 准备测试环境并启动 master 和多个 worker
rm -f mr-done
(timeout -k 2s 180s ../mrmaster ../pg*txt ; touch mr-done ) &
sleep 1
# 启动 worker,加载 `crash.so` 插件,模拟崩溃恢复
timeout -k 2s 180s ../mrworker ../../mrapps/crash.so &
# 模拟 `rpc.go` 中的 `masterSock()` 生成套接字
SOCKNAME=/var/tmp/824-mr-`id -u`
# 循环重新启动 worker 模拟崩溃恢复
( while [ -e $SOCKNAME -a ! -f mr-done ]
do
timeout -k 2s 180s ../mrworker ../../mrapps/crash.so
sleep 1
done ) &
# 继续循环启动其他 worker
( while [ -e $SOCKNAME -a ! -f mr-done ]
do
timeout -k 2s 180s ../mrworker ../../mrapps/crash.so
sleep 1
done ) &
# 等待所有 worker 退出
while [ -e $SOCKNAME -a ! -f mr-done ]
do
timeout -k 2s 180s ../mrworker ../../mrapps/crash.so
sleep 1
done
# 比较崩溃测试输出
wait
wait
wait
rm $SOCKNAME
sort mr-out* | grep . > mr-crash-all
if cmp mr-crash-all mr-correct-crash.txt
then
echo '---' crash test: PASS
else
echo '---' crash output is not the same as mr-correct-crash.txt
echo '---' crash test: FAIL
failed_any=1
fi
# -------------------------------------------------------
# 最终总结
# -------------------------------------------------------
if [ $failed_any -eq 0 ]; then
echo '***' PASSED ALL TESTS
else
echo '***' FAILED SOME TESTS
exit 1
fi
是 否 崩溃恢复测试 启动Master 生成正确输出基准 启动crash Worker 循环重启Worker 比较崩溃恢复结果 Reduce并行性测试 启动两个rtiming Worker 启动Master 检查并行Reduce数量 Map并行性测试 启动两个mtiming Worker 启动Master 检查Worker数量 检查并行度 索引测试 启动Master 生成索引基准 启动Worker 验证索引结果 单词计数测试 启动Master 生成基准输出 启动多个Worker 比较输出结果 初始化环境 清理并创建测试目录 开始 编译MapReduce插件 编译主程序 所有测试通过? 测试成功 测试失败
以下是这个脚本的逐行备注:
bash
#!/usr/bin/env bash
- 指定使用
bash
解释器执行脚本。
bash
if [ $# -ne 1 ]; then
echo "Usage: $0 numTrials"
exit 1
fi
- 检查脚本的参数数量。
if [ $# -ne 1 ]; then
:判断是否传入了一个参数,$#
表示参数个数。- 如果参数数量不等于 1,输出用法提示并退出,
$0
表示脚本名。
bash
trap 'kill -INT -$pid; exit 1' INT
- 设置
trap
捕获INT
信号(通常为 Ctrl+C)。 - 捕获到信号时,发送
INT
信号给test-mr.sh
进程并退出脚本。
bash
# Note: because the socketID is based on the current userID,
# ./test-mr.sh cannot be run in parallel
- 注释解释:因为
socketID
基于当前userID
,所以不能并行运行./test-mr.sh
,否则会冲突。
bash
runs=$1
chmod +x test-mr.sh
runs=$1
:将命令行参数赋值给变量runs
,表示测试的执行次数。chmod +x test-mr.sh
:赋予test-mr.sh
可执行权限。
bash
for i in $(seq 1 $runs); do
timeout -k 2s 900s ./test-mr.sh &
pid=$!
if ! wait $pid; then
echo '***' FAILED TESTS IN TRIAL $i
exit 1
fi
done
- 循环执行指定次数的测试。
for i in $(seq 1 $runs); do
:循环从 1 到$runs
,i
表示当前循环的次数。timeout -k 2s 900s ./test-mr.sh &
:启动test-mr.sh
并在后台执行,限制其最长运行时间为 900 秒,超时后发送SIGKILL
信号。pid=$!
:将最后一个后台进程的 PID 赋值给pid
。if ! wait $pid; then
:等待test-mr.sh
执行完毕,如果返回值非零表示测试失败。echo '***' FAILED TESTS IN TRIAL $i
:如果测试失败,输出失败信息,并退出脚本。
bash
echo '***' PASSED ALL $i TESTING TRIALS
- 如果循环结束且所有测试均通过,输出成功信息。