Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 139 additions & 45 deletions Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,12 @@ import Glibc
let quietRunning = argumentExtractor.extractFlag(named: "quiet")
let noProgress = argumentExtractor.extractFlag(named: "no-progress")
let checkAbsoluteThresholdsPath = argumentExtractor.extractOption(named: "check-absolute-path")
let skipLoadingBenchmarks = argumentExtractor.extractFlag(named: "skip-loading-benchmark-targets")
let checkAbsoluteThresholds =
checkAbsoluteThresholdsPath.count > 0 ? 1 : argumentExtractor.extractFlag(named: "check-absolute")
let runCount = argumentExtractor.extractOption(named: "run-count")
let relative = argumentExtractor.extractFlag(named: "relative")
let range = argumentExtractor.extractFlag(named: "range")
let groupingToUse = argumentExtractor.extractOption(named: "grouping")
let metricsToUse = argumentExtractor.extractOption(named: "metric")
let timeUnits = argumentExtractor.extractOption(named: "time-units")
Expand Down Expand Up @@ -233,6 +237,8 @@ import Glibc
throw MyError.invalidArgument
}

var totalRunCount = 1
var skipLoadingBenchmarksFlagIsValid = skipLoadingBenchmarks == 0
if commandToPerform == .thresholds {
guard positionalArguments.count > 0,
let thresholdsOperation = ThresholdsOperation(rawValue: positionalArguments.removeFirst())
Expand Down Expand Up @@ -262,11 +268,30 @@ import Glibc
)
throw MyError.invalidArgument
}
if positionalArguments.count > 0 {
let usesExistingBaseline = positionalArguments.count > 0
if usesExistingBaseline {
shouldBuildTargets = false
}
break
let requestedRunCount = runCount.first.flatMap { Int($0) } ?? 1
/// These update the run count to 5 by default if it's set to 1.
/// Using relative/range flags doesn't mean anything if we're not running multiple times.
/// The benchmarks will need to be run multiple times in order to be able to calculate a
/// relative/range of thresholds which satisfy all benchmark runs.
if relative > 0 {
args.append("--wants-relative-thresholds")
if !usesExistingBaseline {
totalRunCount = requestedRunCount < 2 ? 5 : requestedRunCount
}
}
if range > 0 {
args.append("--wants-range-thresholds")
if !usesExistingBaseline {
totalRunCount = requestedRunCount < 2 ? 5 : requestedRunCount
}
}
case .check:
skipLoadingBenchmarksFlagIsValid = true
shouldBuildTargets = skipLoadingBenchmarks == 0
let validRange = 0...1
guard validRange.contains(positionalArguments.count) else {
print(
Expand All @@ -281,6 +306,19 @@ import Glibc
}
}

if !skipLoadingBenchmarksFlagIsValid {
print("")
print(
"Flag --skip-loading-benchmark-targets is only valid for 'thresholds check' operations."
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed in #327 and in other older issues, the only reason you might need to build benchmark targets in thresholds check command is to load the threshold-tolerances that are in code in configuration.thresholds.
For users that have moved to using static threshold files with range/relative, this is pointless and only a waste of time.
So I've added the option to skip building benchmark targets for thresholds check.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Checking the benchmark runs, this has reduced the "compare" step's runtime in CI from 30 to 15s, so a 15s saving.
The package itself is very lightweight. Only that it has 2 benchmark targets to build, which isn't abnormal but does mean 2 targets need to get built.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In a pretty chunky project that step takes 68s for 1 benchmark target. I'd assume at least 30s of that will be shaved off with this change.

That CI takes 18 minutes though. These are all nice savings but aren't a big deal.

)
print("")
print(help)
print("")
print("Please visit https://github.com/ordo-one/package-benchmark for more in-depth documentation")
print("")
throw MyError.invalidArgument
}

if commandToPerform == .baseline {
guard positionalArguments.count > 0,
let baselineOperation = BaselineOperation(rawValue: positionalArguments.removeFirst())
Expand Down Expand Up @@ -463,53 +501,108 @@ import Glibc

var failedBenchmarkCount = 0

try withCStrings(args) { cArgs in
if debug > 0 {
print("To debug, start \(benchmarkToolName) in LLDB using:")
print("lldb \(benchmarkTool.string)")
print("")
print("Then launch \(benchmarkToolName) with:")
print("run \(args.dropFirst().joined(separator: " "))")
print("")
return
}
var allFailureCount = 0
let results: [Result<Void, Error>] = (0..<max(totalRunCount, 1))
.map { runIdx in
Comment on lines +505 to +506
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code creates an interim array using Array(0..<max(totalRunCount, 1)).map instead of using direct iteration. This violates the anti-pattern rule that states to avoid creating interim arrays when a direct loop can be used instead. The code should be refactored to use a standard for loop like 'for runIdx in 0..<max(totalRunCount, 1) { ... }' to avoid the unnecessary array creation.

Suggested change
let results: [Result<Void, Error>] = (0..<max(totalRunCount, 1))
.map { runIdx in
var results: [Result<Void, Error>] = []
for runIdx in 0..<max(totalRunCount, 1) {

Spotted by Graphite Agent (based on custom rule: Avoid iteration anti pattern for arrays)

Fix in Graphite


Is this helpful? React 👍 or 👎 to let us know.

// If we're running multiple times, we need to add the run count to the arguments
var args = args
if totalRunCount > 1 {
args += ["--run-number", "\(runIdx + 1)"]
if quietRunning == 0 {
print(
"""

var pid: pid_t = 0
var status = posix_spawn(&pid, benchmarkTool.string, nil, nil, cArgs, environ)

if status == 0 {
if waitpid(pid, &status, 0) != -1 {
// Ok, this sucks, but there is no way to get a C support target for plugins and
// the way the status is extracted portably is with macros - so we just need to
// reimplement the logic here in Swift according to the waitpid man page to
// get some nicer feedback on failure reason.
guard let waitStatus = ExitCode(rawValue: (status & 0xFF00) >> 8) else {
print("One or more benchmarks returned an unexpected return code \(status)")
throw MyError.benchmarkUnexpectedReturnCode
Running the command multiple times, round \(runIdx + 1) of \(totalRunCount)...
"""
)
}
switch waitStatus {
case .success:
break
case .baselineNotFound:
throw MyError.baselineNotFound
case .genericFailure:
print("One or more benchmark suites crashed during runtime.")
throw MyError.benchmarkCrashed
case .thresholdRegression:
throw MyError.benchmarkThresholdRegression
case .thresholdImprovement:
throw MyError.benchmarkThresholdImprovement
case .benchmarkJobFailed:
failedBenchmarkCount += 1
case .noPermissions:
throw MyError.noPermissions
}

return Result<Void, Error> {
try withCStrings(args) { cArgs in
/// We'll decrement this in the success path
allFailureCount += 1

if debug > 0 {
print("To debug, start \(benchmarkToolName) in LLDB using:")
print("lldb \(benchmarkTool.string)")
print("")
print("Then launch \(benchmarkToolName) with:")
print("run \(args.dropFirst().joined(separator: " "))")
print("")
return
}

var pid: pid_t = 0
var status = posix_spawn(&pid, benchmarkTool.string, nil, nil, cArgs, environ)

if status == 0 {
if waitpid(pid, &status, 0) != -1 {
// Ok, this sucks, but there is no way to get a C support target for plugins and
// the way the status is extracted portably is with macros - so we just need to
// reimplement the logic here in Swift according to the waitpid man page to
// get some nicer feedback on failure reason.
guard let waitStatus = ExitCode(rawValue: (status & 0xFF00) >> 8) else {
print("One or more benchmarks returned an unexpected return code \(status)")
throw MyError.benchmarkUnexpectedReturnCode
}
switch waitStatus {
case .success:
allFailureCount -= 1
Comment on lines +521 to +551
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Logic bug with allFailureCount when debug mode is enabled. The counter is incremented at line 524 before each operation, then decremented at line 551 only on success. However, when debug > 0 (line 526), the function returns early at line 533 without decrementing the counter. This causes allFailureCount to be greater than 0 even though the Result is .success. Later at line 585, the code will incorrectly think there were failures and attempt to find the first failure (line 593-605), which will throw MyError.unknownFailure because no actual failures exist in the results array.

// Fix: Decrement counter before returning in debug mode
if debug > 0 {
    allFailureCount -= 1  // Add this line
    print("To debug, start \(benchmarkToolName) in LLDB using:")
    // ... rest of debug output
    return
}
Suggested change
return Result<Void, Error> {
try withCStrings(args) { cArgs in
/// We'll decrement this in the success path
allFailureCount += 1
if debug > 0 {
print("To debug, start \(benchmarkToolName) in LLDB using:")
print("lldb \(benchmarkTool.string)")
print("")
print("Then launch \(benchmarkToolName) with:")
print("run \(args.dropFirst().joined(separator: " "))")
print("")
return
}
var pid: pid_t = 0
var status = posix_spawn(&pid, benchmarkTool.string, nil, nil, cArgs, environ)
if status == 0 {
if waitpid(pid, &status, 0) != -1 {
// Ok, this sucks, but there is no way to get a C support target for plugins and
// the way the status is extracted portably is with macros - so we just need to
// reimplement the logic here in Swift according to the waitpid man page to
// get some nicer feedback on failure reason.
guard let waitStatus = ExitCode(rawValue: (status & 0xFF00) >> 8) else {
print("One or more benchmarks returned an unexpected return code \(status)")
throw MyError.benchmarkUnexpectedReturnCode
}
switch waitStatus {
case .success:
allFailureCount -= 1
return Result<Void, Error> {
try withCStrings(args) { cArgs in
/// We'll decrement this in the success path
allFailureCount += 1
if debug > 0 {
allFailureCount -= 1
print("To debug, start \(benchmarkToolName) in LLDB using:")
print("lldb \(benchmarkTool.string)")
print("")
print("Then launch \(benchmarkToolName) with:")
print("run \(args.dropFirst().joined(separator: " "))")
print("")
return
}
var pid: pid_t = 0
var status = posix_spawn(&pid, benchmarkTool.string, nil, nil, cArgs, environ)
if status == 0 {
if waitpid(pid, &status, 0) != -1 {
// Ok, this sucks, but there is no way to get a C support target for plugins and
// the way the status is extracted portably is with macros - so we just need to
// reimplement the logic here in Swift according to the waitpid man page to
// get some nicer feedback on failure reason.
guard let waitStatus = ExitCode(rawValue: (status & 0xFF00) >> 8) else {
print("One or more benchmarks returned an unexpected return code \(status)")
throw MyError.benchmarkUnexpectedReturnCode
}
switch waitStatus {
case .success:
allFailureCount -= 1

Spotted by Graphite Agent

Fix in Graphite


Is this helpful? React 👍 or 👎 to let us know.

case .baselineNotFound:
throw MyError.baselineNotFound
case .genericFailure:
print("One or more benchmark suites crashed during runtime.")
throw MyError.benchmarkCrashed
case .thresholdRegression:
throw MyError.benchmarkThresholdRegression
case .thresholdImprovement:
throw MyError.benchmarkThresholdImprovement
case .benchmarkJobFailed:
failedBenchmarkCount += 1
case .noPermissions:
throw MyError.noPermissions
}
} else {
print(
"waitpid() for pid \(pid) returned a non-zero exit code \(status), errno = \(errno)"
)
exit(errno)
}
} else {
print("Failed to run BenchmarkTool, posix_spawn() returned [\(status)]")
}
}
} else {
print("waitpid() for pid \(pid) returned a non-zero exit code \(status), errno = \(errno)")
exit(errno)
}
} else {
print("Failed to run BenchmarkTool, posix_spawn() returned [\(status)]")
}

switch results.count {
case ...0:
throw MyError.unknownFailure
case 1:
try results[0].get()
default:
if allFailureCount > 0 {
print(
"""
Ran BenchmarkTool \(results.count) times, but it failed \(allFailureCount) times.
Will exit with the first failure.

"""
)
guard
let failure = results.first(where: { result in
switch result {
case .failure:
return true
case .success:
return false
}
})
else {
throw MyError.unknownFailure
}
try failure.get()
}
}

Expand All @@ -529,5 +622,6 @@ import Glibc
case noPermissions = 6
case invalidArgument = 101
case buildFailed = 102
case unknownFailure = 103
}
}
36 changes: 36 additions & 0 deletions Plugins/BenchmarkHelpGenerator/BenchmarkHelpGenerator.swift
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,42 @@ struct Benchmark: AsyncParsableCommand {
)
var checkAbsolute = false

@Flag(
name: .long,
help: """
Specifies that thresholds check command should skip loading benchmark targets.
Use this flag to skip unnecessary building of benchmark targets and loading of benchmark results, to save time.
This flag is specially useful when combined with static threshold files that contain the newly supported relative or range thresholds.
With such a set up, you'll save the time needed to build the benchmark targets and the thresholds check operation
will only read the threshold tolerance values from the static files.
"""
)
var skipLoadingBenchmarks = false

@Option(
name: .long,
help: """
The number of times to run each benchmark in thresholds update operation.
This is only valid when --relative or --range are also specified.
When combined with --relative or --range flags, this option will run the benchmarks multiple times to calculate
relative or range thresholds, and each time it'll widen the threshold tolerances according to the new result.
Defaults to 1.
"""
)
var runCount: Int?

@Flag(
name: .long,
help: "Specifies that thresholds update command should output relative thresholds to the static files."
)
var relative = false

@Flag(
name: .long,
help: "Specifies that thresholds update command should output min-max range thresholds to the static files."
)
var range = false

@Option(
name: .long,
help:
Expand Down
5 changes: 2 additions & 3 deletions Plugins/BenchmarkTool/BenchmarkTool+Baselines.swift
Original file line number Diff line number Diff line change
Expand Up @@ -449,8 +449,7 @@ extension BenchmarkBaseline: Equatable {
benchmarks,
name: lhsBenchmarkIdentifier.name,
target: lhsBenchmarkIdentifier.target,
metric: lhsBenchmarkResult.metric,
defaultThresholds: lhsBenchmarkResult.thresholds ?? BenchmarkThresholds.default
metric: lhsBenchmarkResult.metric
)

let deviationResults = lhsBenchmarkResult.deviationsComparedWith(
Expand Down Expand Up @@ -483,7 +482,7 @@ extension BenchmarkBaseline: Equatable {
public func failsAbsoluteThresholdChecks(
benchmarks: [Benchmark],
p90Thresholds: [BenchmarkIdentifier:
[BenchmarkMetric: BenchmarkThresholds.AbsoluteThreshold]]
[BenchmarkMetric: BenchmarkThreshold]]
) -> BenchmarkResult.ThresholdDeviations {
var allDeviationResults = BenchmarkResult.ThresholdDeviations()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ class InfluxCSVFormatter {
let memory = machine.memory

if header {
let dataTypeHeader = "#datatype tag,tag,tag,tag,tag,tag,tag,tag,tag,double,double,double,long,long,dateTime\n"
let dataTypeHeader =
"#datatype tag,tag,tag,tag,tag,tag,tag,tag,tag,double,double,double,long,long,dateTime\n"
Comment on lines +57 to +58
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line is longer than 140 characters and uses a string literal. According to the API Design Guidelines, when lines are more than 140 characters and use a string literal, you should suggest a line broken version using triple-quote ("""...""") Swift string literals instead. The dataTypeHeader string should be broken into multiple lines using triple-quote syntax for better readability.

Suggested change
let dataTypeHeader =
"#datatype tag,tag,tag,tag,tag,tag,tag,tag,tag,double,double,double,long,long,dateTime\n"
let dataTypeHeader = """
#datatype tag,tag,tag,tag,tag,tag,tag,tag,tag,double,double,double,long,long,dateTime
"""

Spotted by Graphite Agent (based on custom rule: Swift API Guidelines)

Fix in Graphite


Is this helpful? React 👍 or 👎 to let us know.

finalFileFormat.append(dataTypeHeader)
let headers =
"measurement,hostName,processoryType,processors,memory,kernelVersion,metric,unit,test,percentile,value,test_average,iterations,warmup_iterations,time\n"
Expand Down
12 changes: 6 additions & 6 deletions Plugins/BenchmarkTool/BenchmarkTool+Export+JMHFormatter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,23 @@ extension JMHPrimaryMetric {
let factor = result.metric.countable == false ? 1_000 : 1

for p in percentiles {
percentileValues[String(p)] = Statistics.roundToDecimalplaces(
percentileValues[String(p)] = Statistics.roundToDecimalPlaces(
Double(histogram.valueAtPercentile(p)) / Double(factor),
3
)
}

for value in histogram.recordedValues() {
for _ in 0..<value.count {
recordedValues.append(Statistics.roundToDecimalplaces(Double(value.value) / Double(factor), 3))
recordedValues.append(Statistics.roundToDecimalPlaces(Double(value.value) / Double(factor), 3))
}
}

self.score = Statistics.roundToDecimalplaces(score / Double(factor), 3)
scoreError = Statistics.roundToDecimalplaces(error / Double(factor), 3)
self.score = Statistics.roundToDecimalPlaces(score / Double(factor), 3)
scoreError = Statistics.roundToDecimalPlaces(error / Double(factor), 3)
scoreConfidence = [
Statistics.roundToDecimalplaces(score - error) / Double(factor),
Statistics.roundToDecimalplaces(score + error) / Double(factor),
Statistics.roundToDecimalPlaces(score - error) / Double(factor),
Statistics.roundToDecimalPlaces(score + error) / Double(factor),
]
scorePercentiles = percentileValues
if result.metric.countable {
Expand Down
Loading
Loading