Benchmarking Guide¶
This guide covers performance testing and optimisation techniques for Olla.
Quick Start¶
# Run all benchmarks
make bench
# Run specific benchmarks
go test -bench=BenchmarkProxy ./internal/adapter/proxy/
# With memory profiling
go test -bench=. -benchmem ./...
# Run for specific duration
go test -bench=. -benchtime=10s ./...
Writing Benchmarks¶
Basic Benchmark Structure¶
func BenchmarkEndpointSelection(b *testing.B) {
// Setup - not timed
endpoints := generateEndpoints(100)
selector := NewPrioritySelector()
// Reset timer after setup
b.ResetTimer()
// Report allocations
b.ReportAllocs()
// Run benchmark
for i := 0; i < b.N; i++ {
selector.Select(endpoints)
}
}
Sub-benchmarks¶
func BenchmarkBalancers(b *testing.B) {
endpoints := generateEndpoints(100)
b.Run("Priority", func(b *testing.B) {
selector := NewPrioritySelector()
b.ResetTimer()
for i := 0; i < b.N; i++ {
selector.Select(endpoints)
}
})
b.Run("RoundRobin", func(b *testing.B) {
selector := NewRoundRobinSelector()
b.ResetTimer()
for i := 0; i < b.N; i++ {
selector.Select(endpoints)
}
})
}
Table-Driven Benchmarks¶
func BenchmarkPayloadSizes(b *testing.B) {
sizes := []struct {
name string
size int
}{
{"1KB", 1024},
{"10KB", 10 * 1024},
{"100KB", 100 * 1024},
{"1MB", 1024 * 1024},
}
for _, tc := range sizes {
b.Run(tc.name, func(b *testing.B) {
data := generatePayload(tc.size)
b.ResetTimer()
b.SetBytes(int64(tc.size))
for i := 0; i < b.N; i++ {
processPayload(data)
}
})
}
}
Key Benchmarks¶
Proxy Engine Comparison¶
Compare Sherpa vs Olla performance:
# Run proxy benchmarks
go test -bench=BenchmarkProxyComparison -benchmem \
./internal/adapter/proxy/
# Example output:
# BenchmarkProxyComparison/Sherpa-8 10000 115623 ns/op 4096 B/op 42 allocs/op
# BenchmarkProxyComparison/Olla-8 12000 98456 ns/op 3072 B/op 35 allocs/op
Load Balancer Performance¶
# Test balancer strategies
go test -bench=BenchmarkBalancer -benchmem \
./internal/adapter/balancer/
Concurrent Performance¶
func BenchmarkConcurrentStats(b *testing.B) {
stats := NewStats()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
stats.IncrementRequests()
stats.RecordLatency(100 * time.Millisecond)
}
})
}
Memory Profiling¶
Allocation Analysis¶
# Generate memory profile
go test -bench=. -memprofile=mem.prof ./...
# Analyse allocations
go tool pprof -alloc_space mem.prof
# Show top allocators
(pprof) top10
# View specific function
(pprof) list functionName
Escape Analysis¶
# Show escape analysis
go build -gcflags="-m -m" ./...
# Example output:
# ./internal/adapter/stats/collector.go:42: moved to heap: data
# ./internal/adapter/stats/collector.go:43: inlining call to atomic.AddInt64
CPU Profiling¶
Generate CPU Profile¶
# Profile for 30 seconds
go test -bench=. -cpuprofile=cpu.prof -benchtime=30s ./...
# Analyse profile
go tool pprof cpu.prof
# Interactive commands:
(pprof) top # Show top functions
(pprof) web # Open in browser
(pprof) list function # Show source
Profile Running Service¶
import _ "net/http/pprof"
// In main.go
go func() {
log.Println(http.ListenAndServe("localhost:6060", nil))
}()
Optimisation Techniques¶
1. Reduce Allocations¶
// Bad - allocates on each call
func processRequest() []byte {
return make([]byte, 1024)
}
// Good - uses pool
var bufferPool = sync.Pool{
New: func() interface{} {
buf := make([]byte, 1024)
return &buf
},
}
func processRequest() []byte {
bufPtr := bufferPool.Get().(*[]byte)
defer bufferPool.Put(bufPtr)
return *bufPtr
}
2. Minimise Interface Conversions¶
// Bad - interface conversion overhead
func process(v interface{}) {
str := v.(string)
// use str
}
// Good - concrete type
func process(str string) {
// use str directly
}
3. Pre-allocate Slices¶
// Bad - grows dynamically
var results []Result
for _, item := range items {
results = append(results, process(item))
}
// Good - pre-allocated
results := make([]Result, 0, len(items))
for _, item := range items {
results = append(results, process(item))
}
4. Use Atomic Operations¶
// Bad - mutex for counter
type Counter struct {
mu sync.Mutex
value int64
}
func (c *Counter) Inc() {
c.mu.Lock()
c.value++
c.mu.Unlock()
}
// Good - atomic operation
type Counter struct {
value int64
}
func (c *Counter) Inc() {
atomic.AddInt64(&c.value, 1)
}
Benchmark Targets¶
Performance Goals¶
Component | Target | Measurement |
---|---|---|
Request Latency | < 5ms overhead | p99 latency |
Throughput | > 10K req/s | Single core |
Memory | < 100MB | Under load |
Allocations | < 50/request | Steady state |
Critical Path Benchmarks¶
Focus on these hot paths:
- Endpoint Selection: < 100ns
- Health Checking: < 1ms
- Stats Collection: < 50ns overhead
- Request Forwarding: < 1ms overhead
Continuous Benchmarking¶
GitHub Actions¶
name: Benchmarks
on:
pull_request:
paths:
- 'internal/**'
- 'pkg/**'
jobs:
benchmark:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-go@v4
with:
go-version: '1.24'
- name: Run benchmarks
run: |
go test -bench=. -benchmem -count=3 \
-benchtime=10s ./... | tee new.txt
- name: Compare with main
run: |
git checkout main
go test -bench=. -benchmem -count=3 \
-benchtime=10s ./... | tee old.txt
# Install benchstat
go install golang.org/x/perf/cmd/benchstat@latest
# Compare results
benchstat old.txt new.txt
Local Comparison¶
# Benchmark current branch
git stash
go test -bench=. -count=5 ./... > new.txt
# Benchmark main branch
git checkout main
go test -bench=. -count=5 ./... > old.txt
# Compare
benchstat old.txt new.txt
Analysis Tools¶
benchstat¶
Statistical comparison of benchmarks:
# Install
go install golang.org/x/perf/cmd/benchstat@latest
# Compare
benchstat old.txt new.txt
# Example output:
# name old time/op new time/op delta
# Proxy-8 105µs ± 2% 95µs ± 1% -9.52% (p=0.000 n=10+10)
pprof Web UI¶
# Start web interface
go tool pprof -http=:8080 cpu.prof
# Opens browser with:
# - Flame graph
# - Top functions
# - Source view
# - Call graph
trace Tool¶
Best Practices¶
1. Benchmark Hygiene¶
func BenchmarkFunction(b *testing.B) {
// Setup outside timer
data := prepareData()
// Reset after setup
b.ResetTimer()
// Report metrics
b.ReportAllocs()
b.SetBytes(int64(len(data)))
// Prevent compiler optimisations
var result int
for i := 0; i < b.N; i++ {
result = function(data)
}
_ = result
}
2. Realistic Workloads¶
// Bad - unrealistic
func BenchmarkEmpty(b *testing.B) {
for i := 0; i < b.N; i++ {
processEmpty("")
}
}
// Good - representative data
func BenchmarkRealistic(b *testing.B) {
requests := loadRealRequests()
b.ResetTimer()
for i := 0; i < b.N; i++ {
processRequest(requests[i%len(requests)])
}
}
3. Stable Environment¶
- Close unnecessary applications
- Disable CPU frequency scaling
- Use consistent hardware
- Run multiple iterations
# Disable CPU scaling (Linux)
sudo cpupower frequency-set --governor performance
# Run stable benchmark
go test -bench=. -count=10 -benchtime=10s
Common Pitfalls¶
Compiler Optimisations¶
// Bad - result discarded, may be optimised away
func BenchmarkBad(b *testing.B) {
for i := 0; i < b.N; i++ {
expensiveOperation()
}
}
// Good - use result
func BenchmarkGood(b *testing.B) {
var result int
for i := 0; i < b.N; i++ {
result = expensiveOperation()
}
_ = result
}
Timer Pollution¶
// Bad - includes setup in timing
func BenchmarkBad(b *testing.B) {
for i := 0; i < b.N; i++ {
data := generateData() // Timed!
processData(data)
}
}
// Good - setup outside loop
func BenchmarkGood(b *testing.B) {
data := generateData()
b.ResetTimer()
for i := 0; i < b.N; i++ {
processData(data)
}
}
Next Steps¶
- Review Testing Guide for test patterns
- See Technical Patterns for optimisation techniques
- Check current benchmarks in
internal/adapter/proxy/
- Run
make bench
to establish baseline