From 91a0514fab38ed7c82e1791d5ff9813a889ddaea Mon Sep 17 00:00:00 2001 From: Ruben Ayrapetyan Date: Thu, 14 Jan 2016 23:32:31 +0300 Subject: [PATCH] Estimate performance measurement inaccuracy in tools/perf.sh and tools/run-perf-test.sh JerryScript-DCO-1.0-Signed-off-by: Ruben Ayrapetyan r.ayrapetyan@samsung.com --- tools/perf.sh | 169 +++++++++++++++++++++++++++++++-- tools/run-perf-test.sh | 207 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 350 insertions(+), 26 deletions(-) diff --git a/tools/perf.sh b/tools/perf.sh index 717fc510c..237450de7 100755 --- a/tools/perf.sh +++ b/tools/perf.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2014 Samsung Electronics Co., Ltd. +# Copyright 2014-2016 Samsung Electronics Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -30,17 +30,166 @@ fi perf_values=$( (( for i in `seq 1 1 $ITERS`; do time $ENGINE "$BENCHMARK"; if [ $? -ne 0 ]; then exit 1; fi; done ) 2>&1 ) | \ grep user | \ sed "$time_regexp" | \ - awk 'BEGIN { min_v = -1; } { v = $1 * 60 + $2; if (min_v == -1 || v < min_v) { min_v = v; }; s += v; n += 1; } END { print s / n, min_v; }'; - if [ ${PIPESTATUS[0]} -ne 0 ]; then exit 1; fi;); + awk '{ print ($1 * 60 + $2); }'; + if [ ${PIPESTATUS[0]} -ne 0 ]; then exit 1; fi; ); + +if [ "$PRINT_MIN" == "-min" ] +then + perf_values=$( echo "$perf_values" | \ + awk "BEGIN { + min_v = -1; + } + { + if (min_v == -1 || $1 < min_v) { + min_v = $1; + } + } + END { + print min_v + }" || exit 1; + ); + calc_status=$? +else + perf_values=$( echo "$perf_values" | \ + awk "BEGIN { + n = 0 + } + { + n++ + a[n] = \$1 + } + END { + # + # Values of 99% quantiles of two-sided t-distribution for given number of degrees of freedom + # + t_gamma_n_m1 [1] = 63.657 + t_gamma_n_m1 [2] = 9.9248 + t_gamma_n_m1 [3] = 5.8409 + t_gamma_n_m1 [4] = 4.6041 + t_gamma_n_m1 [5] = 4.0321 + t_gamma_n_m1 [6] = 3.7074 + t_gamma_n_m1 [7] = 3.4995 + t_gamma_n_m1 [8] = 3.3554 + t_gamma_n_m1 [9] = 3.2498 + t_gamma_n_m1 [10] = 3.1693 + t_gamma_n_m1 [11] = 3.1058 + t_gamma_n_m1 [12] = 3.0545 + t_gamma_n_m1 [13] = 3.0123 + t_gamma_n_m1 [14] = 2.9768 + t_gamma_n_m1 [15] = 2.9467 + t_gamma_n_m1 [16] = 2.9208 + t_gamma_n_m1 [17] = 2.8982 + t_gamma_n_m1 [18] = 2.8784 + t_gamma_n_m1 [19] = 2.8609 + t_gamma_n_m1 [20] = 2.8453 + t_gamma_n_m1 [21] = 2.8314 + t_gamma_n_m1 [22] = 2.8188 + t_gamma_n_m1 [23] = 2.8073 + t_gamma_n_m1 [24] = 2.7969 + t_gamma_n_m1 [25] = 2.7874 + t_gamma_n_m1 [26] = 2.7787 + t_gamma_n_m1 [27] = 2.7707 + t_gamma_n_m1 [28] = 2.7633 + t_gamma_n_m1 [29] = 2.7564 + t_gamma_n_m1 [30] = 2.75 + t_gamma_n_m1 [31] = 2.744 + t_gamma_n_m1 [32] = 2.7385 + t_gamma_n_m1 [33] = 2.7333 + t_gamma_n_m1 [34] = 2.7284 + t_gamma_n_m1 [35] = 2.7238 + t_gamma_n_m1 [36] = 2.7195 + t_gamma_n_m1 [37] = 2.7154 + t_gamma_n_m1 [38] = 2.7116 + t_gamma_n_m1 [39] = 2.7079 + t_gamma_n_m1 [40] = 2.7045 + t_gamma_n_m1 [41] = 2.7012 + t_gamma_n_m1 [42] = 2.6981 + t_gamma_n_m1 [43] = 2.6951 + t_gamma_n_m1 [44] = 2.6923 + t_gamma_n_m1 [45] = 2.6896 + t_gamma_n_m1 [46] = 2.687 + t_gamma_n_m1 [47] = 2.6846 + t_gamma_n_m1 [48] = 2.6822 + t_gamma_n_m1 [49] = 2.68 + t_gamma_n_m1 [50] = 2.6778 + + # + # Sort array of measurements + # + for (i = 2; i <= n; i++) { + j = i + k = a [j] + while (j > 1 && a [j - 1] > k) { + a [j] = a [j - 1] + j-- + } + a [j] = k + } + + # + # Remove 20% of lowest and 20% of highest values + # + n_20_percent = int (n / 5) + + for (i = 1; i <= n_20_percent; i++) { + delete a[n] + n-- + } + + for (i = 1; i <= n - n_20_percent; i++) { + a[i] = a[i + n_20_percent] + } + + n -= n_20_percent + + # + # Calculate average + # + sum = 0 + for (i = 1; i <= n; i++) { + sum += a[i] + } + + avg = sum / n + + if (n > 1) { + if (n - 1 <= 50) { + t_coef = t_gamma_n_m1 [n - 1] + } else { + # For greater degrees of freedom, values of corresponding quantiles + # are insignificantly less than the value. + # + # For example, the value for infinite number of freedoms is 2.5758 + # + # So, to reduce table size, we take this, greater value, + # overestimating inaccuracy for no more than 4%. + # + t_coef = t_gamma_n_m1 [50] + } + + # + # Calculate inaccuracy estimation + # + sum_delta_squares = 0 + for (i = 1; i <= n; i++) { + sum_delta_squares += (avg - a[i]) ^ 2 + } + + delta = t_coef * sqrt (sum_delta_squares / (n * (n - 1))) + + print avg, delta + } else { + print avg + } + } + " || exit 1; + ); + calc_status=$? +fi + +echo "$perf_values" if [ $? -ne 0 ]; then exit 1; fi; - -if [ "$PRINT_MIN" == "-min" ] -then - echo $perf_values | cut -d ' ' -f 2 -else - echo $perf_values | cut -d ' ' -f 1 -fi diff --git a/tools/run-perf-test.sh b/tools/run-perf-test.sh index 786536b53..cb9fcd5ee 100755 --- a/tools/run-perf-test.sh +++ b/tools/run-perf-test.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2014-2015 Samsung Electronics Co., Ltd. +# Copyright 2014-2016 Samsung Electronics Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,9 +26,9 @@ function exit_err() { exit 1 } -USAGE="Usage:\n sudo run.sh OLD_ENGINE NEW_ENGINE REPEATS TIMEOUT BENCH_FOLDER" +USAGE="Usage:\n sudo run.sh OLD_ENGINE NEW_ENGINE REPEATS TIMEOUT BENCH_FOLDER OUTPUT_FORMAT" -if [ "$#" -ne 5 ] +if [ "$#" -ne 6 ] then echo -e "${USAGE}" exit_err "Argument number mismatch..." @@ -39,6 +39,15 @@ ENGINE_NEW="$2" REPEATS="$3" TIMEOUT="$4" BENCH_FOLDER="$5" +OUTPUT_FORMAT="$6" + +if [ "${OUTPUT_FORMAT}" != "-m" ] +then + if [ "${OUTPUT_FORMAT}" != "-c" ] + then + exit_err "Please, use -m or -c as output format specifier" + fi +fi if [ "${REPEATS}" -lt 1 ] then @@ -54,13 +63,27 @@ perf_n=0 mem_n=0 perf_rel_mult=1.0 +perf_rel_inaccuracy_tmp=0 mem_rel_mult=1.0 +mem_rel_inaccuracy_tmp="-1" + +# Unicode "figure space" character +FIGURE_SPACE=$(echo -e -n "\xE2\x80\x87") + +# Unicode "approximately equal" character +APPROXIMATELY_EQUAL=$(echo -n -e "\xE2\x89\x88") function run-compare() { COMMAND=$1 PRE=$2 TEST=$3 + PRECISION=$4 + UNIT=$5 + + ABS_FP_FMT="%$((PRECISION + 4)).$((PRECISION))f$UNIT" + REL_FP_FMT="%0.3f" + REL_SHOW_PLUS_SIGN_FP_FMT="%+0.3f" OLD=$(timeout "${TIMEOUT}" ${COMMAND} "${ENGINE_OLD}" "${TEST}") || return 1 NEW=$(timeout "${TIMEOUT}" ${COMMAND} "${ENGINE_NEW}" "${TEST}") || return 1 @@ -68,20 +91,119 @@ function run-compare() #check result ! $OLD || ! $NEW || return 1 + OLD_value=$(echo "$OLD " | cut -d ' ' -f 1) + OLD_inaccuracy=$(echo "$OLD " | cut -d ' ' -f 2) + + NEW_value=$(echo "$NEW " | cut -d ' ' -f 1) + NEW_inaccuracy=$(echo "$NEW " | cut -d ' ' -f 2) + #calc relative speedup - rel=$(echo "${OLD}" "${NEW}" | awk '{print $2 / $1; }') + eval "rel_mult=\$${PRE}_rel_mult" + + rel=$(echo "${OLD_value}" "${NEW_value}" | awk '{ print $2 / $1; }') #increment n ((${PRE}_n++)) - #accumulate relative speedup - eval "rel_mult=\$${PRE}_rel_mult" + #calc percent to display + PERCENT=$(echo "$rel" | awk '{print (1.0 - $1) * 100; }') + + if [[ "$OLD_inaccuracy" != "" && "$NEW_inaccuracy" != "" ]] + then + DIFF=$(printf "$ABS_FP_FMT -> $ABS_FP_FMT" $OLD_value $NEW_value) + rel_inaccuracy=$(echo "$OLD_value $OLD_inaccuracy $NEW_value $NEW_inaccuracy" | \ + awk "{ + OLD_value=\$1 + OLD_inaccuracy=\$2 + NEW_value=\$3 + NEW_inaccuracy=\$4 + + rel_inaccuracy = (NEW_value / OLD_value) * sqrt ((OLD_inaccuracy / OLD_value) ^ 2 + (NEW_inaccuracy / NEW_value) ^ 2) + if (rel_inaccuracy < 0) { + rel_inaccuracy = -rel_inaccuracy + } + + print rel_inaccuracy + }") + PERCENT_inaccuracy=$(echo "$rel_inaccuracy" | awk '{ print $1 * 100.0 }') + + ext=$(echo "$PERCENT $PERCENT_inaccuracy" | \ + awk "{ + PERCENT=\$1 + PERCENT_inaccuracy=\$2 + + if (PERCENT > 0.0 && PERCENT > PERCENT_inaccuracy) { + print \"[+]\" + } else if (PERCENT < 0 && -PERCENT > PERCENT_inaccuracy) { + print \"[-]\" + } else { + print \"[$APPROXIMATELY_EQUAL]\" + } + }") + + if [[ $rel_inaccuracy_tmp -lt 0 ]] + then + return 1 + fi + + eval "rel_inaccuracy_tmp=\$${PRE}_rel_inaccuracy_tmp" + + rel_inaccuracy_tmp=$(echo "$rel $rel_inaccuracy $rel_inaccuracy_tmp" | \ + awk "{ + rel=\$1 + rel_inaccuracy=\$2 + rel_inaccuracy_tmp=\$3 + print rel_inaccuracy_tmp + (rel_inaccuracy / rel) ^ 2 + }") + + eval "${PRE}_rel_inaccuracy_tmp=\$rel_inaccuracy_tmp" + + PERCENT=$(printf "%8s %11s" $(printf "$REL_SHOW_PLUS_SIGN_FP_FMT%%" $PERCENT) $(printf "(+-$REL_FP_FMT%%)" $PERCENT_inaccuracy)) + PERCENT="$PERCENT : $ext" + + if [ "${OUTPUT_FORMAT}" == "-m" ] + then + WIDTH=42 + DIFF=$(printf "%s%s" "$DIFF" "$(printf "%$(($WIDTH - ${#DIFF}))s")") + PERCENT=$(printf "%s%s" "$(printf "%$(($WIDTH - ${#PERCENT}))s")" "$PERCENT") + + format="\`%s\`
\`%s\`" + else + format="%20s : %19s" + fi + else + ext="" + + if [[ "$OLD_inaccuracy" != "" || "$NEW_inaccuracy" != "" ]] + then + return 1; + fi + + DIFF=$(printf "$ABS_FP_FMT -> $ABS_FP_FMT" $OLD_value $NEW_value) + PERCENT=$(printf "$REL_SHOW_PLUS_SIGN_FP_FMT%%" $PERCENT) + + if [ "${OUTPUT_FORMAT}" == "-m" ] + then + WIDTH=20 + DIFF=$(printf "%s%s" "$DIFF" "$(printf "%$(($WIDTH - ${#DIFF}))s")") + PERCENT=$(printf "%s%s" "$(printf "%$(($WIDTH - ${#PERCENT}))s")" "$PERCENT") + + format="\`%s\`
\`%s\`" + else + format="%14s : %8s" + fi + fi + rel_mult=$(echo "$rel_mult" "$rel" | awk '{print $1 * $2;}') + eval "${PRE}_rel_mult=\$rel_mult" - #calc percent to display - percent=$(echo "$rel" | awk '{print (1.0 - $1) * 100; }') - printf "%28s" "$(printf "%6s->%6s (%3.3f)" "$OLD" "$NEW" "$percent")" + if [ "${OUTPUT_FORMAT}" == "-m" ] + then + printf "$format" "$DIFF" "$PERCENT" | sed "s/ /$FIGURE_SPACE/g" + else + printf "$format" "$DIFF" "$PERCENT" + fi } function run-test() @@ -89,11 +211,15 @@ function run-test() TEST=$1 # print only filename - printf "%40s | " "${TEST##*/}" - run-compare "./tools/rss-measure.sh" "mem" "${TEST}" || return 1 - printf " | " - run-compare "./tools/perf.sh ${REPEATS}" "perf" "${TEST}" || return 1 + if [ "${OUTPUT_FORMAT}" == "-m" ] + then + printf "%s | " "${TEST##*/}" + else + printf "%50s | " "${TEST##*/}" + fi + run-compare "./tools/rss-measure.sh" "mem" "${TEST}" 0 k || return 1 printf " | " + run-compare "./tools/perf.sh ${REPEATS}" "perf" "${TEST}" 3 s || return 1 printf "\n" } @@ -108,16 +234,65 @@ function run-suite() } date -printf "%40s | %28s | %28s |\n" "Benchmark" "RSS
(+ is better)" "Perf
(+ is better)" -printf "%40s | %28s | %28s |\n" "---------" "---" "----" + +if [ "${OUTPUT_FORMAT}" == "-m" ] +then + echo "Benchmark | RSS
(+ is better) | Perf
(+ is better)" + echo "---------: | --------- | ---------" +else + printf "%50s | %25s | %35s\n" "Benchmark" "RSS
(+ is better)" "Perf
(+ is better)" +fi run-suite "${BENCH_FOLDER}" mem_rel_gmean=$(echo "$mem_rel_mult" "$mem_n" | awk '{print $1 ^ (1.0 / $2);}') mem_percent_gmean=$(echo "$mem_rel_gmean" | awk '{print (1.0 - $1) * 100;}') +if [[ $mem_rel_inaccuracy_tmp != "-1" ]] +then + exit_err "Incorrect inaccuracy calculation for memory consumption geometric mean" +fi perf_rel_gmean=$(echo "$perf_rel_mult" "$perf_n" | awk '{print $1 ^ (1.0 / $2);}') perf_percent_gmean=$(echo "$perf_rel_gmean" | awk '{print (1.0 - $1) * 100;}') +if [[ "$perf_rel_inaccuracy_tmp" == "-1" ]] +then + exit_err "Incorrect inaccuracy calculation for performance geometric mean" +else + perf_percent_inaccuracy=$(echo "$perf_rel_gmean $perf_rel_inaccuracy_tmp $perf_n" | \ + awk "{ + perf_rel_gmean=\$1 + perf_rel_inaccuracy_tmp=\$2 + perf_n=\$3 + + print 100.0 * (perf_rel_gmean ^ (1.0 / perf_n) * sqrt (perf_rel_inaccuracy_tmp) / perf_n) + }") + perf_ext=$(echo "$perf_percent_gmean $perf_percent_inaccuracy" | \ + awk "{ + perf_percent_gmean=\$1 + perf_percent_inaccuracy=\$2 + + if (perf_percent_gmean > 0.0 && perf_percent_gmean > perf_percent_inaccuracy) { + print \"[+]\" + } else if (perf_percent_gmean < 0 && -perf_percent_gmean > perf_percent_inaccuracy) { + print \"[-]\" + } else { + print \"[$APPROXIMATELY_EQUAL]\" + } + }") + perf_percent_inaccuracy=$(printf "(+-%0.3f%%) : $perf_ext" $perf_percent_inaccuracy) +fi + +gmean_label_text="Geometric mean:" + +if [ "${OUTPUT_FORMAT}" == "-m" ] +then + mem_percent_gmean_text=$(printf "RSS reduction: \`%0.3f%%\`" "$mem_percent_gmean") + perf_percent_gmean_text=$(printf "Speed up: \`%0.3f%% %s\`" "$perf_percent_gmean" "$perf_percent_inaccuracy") + printf "%s | %s | %s\n" "$gmean_label_text" "$mem_percent_gmean_text" "$perf_percent_gmean_text" +else + mem_percent_gmean_text=$(printf "RSS reduction: %0.3f%%" "$mem_percent_gmean") + perf_percent_gmean_text=$(printf "Speed up: %0.3f%% %s" "$perf_percent_gmean" "$perf_percent_inaccuracy") + printf "%50s | %25s | %51s\n" "$gmean_label_text" "$mem_percent_gmean_text" "$perf_percent_gmean_text" +fi -printf "%40s | %28s | %28s |\n" "Geometric mean:" "RSS reduction: $mem_percent_gmean%" "Speed up: $perf_percent_gmean%" date