27.3 Advanced Compilation Techniques
Modern JIT compilers employ sophisticated optimization techniques to generate highly efficient native code.
Intrinsic Methods
// Intrinsic Methods
public class IntrinsicMethods {
public static void printIntrinsicsConcepts() {
System.out.println("=== INTRINSIC METHODS ===");
System.out.println("\n--- WHAT ARE INTRINSICS? ---");
System.out.println("Special methods replaced with optimized assembly");
System.out.println("JVM recognizes method signature and substitutes implementation");
System.out.println("\n--- WHY USE INTRINSICS? ---");
System.out.println("✓ Hand-tuned assembly code");
System.out.println("✓ CPU-specific optimizations");
System.out.println("✓ SIMD instructions");
System.out.println("✓ 10-100x faster than Java implementation");
System.out.println("\n--- COMMON INTRINSICS ---");
System.out.println("\n1. MATH OPERATIONS");
System.out.println(" Math.sqrt(x) → sqrtsd instruction");
System.out.println(" Math.sin(x) → fsin instruction");
System.out.println(" Math.cos(x) → fcos instruction");
System.out.println(" Math.abs(x) → Bitwise operation");
System.out.println(" Math.min/max → Conditional move");
System.out.println("\n2. STRING OPERATIONS");
System.out.println(" String.indexOf() → Optimized search (SIMD)");
System.out.println(" String.equals() → Vectorized comparison");
System.out.println(" String.compareTo() → Optimized comparison");
System.out.println(" String.hashCode() → Fast hash computation");
System.out.println("\n3. ARRAY OPERATIONS");
System.out.println(" System.arraycopy() → Memory copy (SIMD)");
System.out.println(" Arrays.equals() → Vectorized comparison");
System.out.println(" Arrays.fill() → Optimized fill");
System.out.println("\n4. OBJECT OPERATIONS");
System.out.println(" Object.hashCode() → Identity hash");
System.out.println(" Object.clone() → Optimized copy");
System.out.println("\n5. UNSAFE OPERATIONS");
System.out.println(" Unsafe.getInt() → Direct memory access");
System.out.println(" Unsafe.compareAndSwap() → CPU CAS instruction");
System.out.println("\n6. CRYPTOGRAPHIC");
System.out.println(" AES encrypt/decrypt → AES-NI instructions");
System.out.println(" SHA digest → SHA-NI instructions");
System.out.println("\n--- VECTORIZED STRING OPERATIONS ---");
System.out.println("String.indexOf() with AVX2:");
System.out.println(" • Searches 32 characters simultaneously");
System.out.println(" • ~30x faster than scalar loop");
System.out.println("\nString.equals() with AVX:");
System.out.println(" • Compares 32 bytes at once");
System.out.println(" • ~10x faster than byte-by-byte");
}
// Example: Intrinsic method usage
public static void demonstrateIntrinsics() {
System.out.println("\n=== INTRINSIC DEMONSTRATION ===");
// Math intrinsics
double value = 16.0;
double sqrt = Math.sqrt(value); // Uses sqrtsd instruction
System.out.println("Math.sqrt(16) = " + sqrt);
// String intrinsics
String text = "Hello, World!";
int index = text.indexOf("World"); // Vectorized search
System.out.println("indexOf(\"World\") = " + index);
// Array intrinsics
int[] source = {1, 2, 3, 4, 5};
int[] dest = new int[5];
System.arraycopy(source, 0, dest, 0, 5); // Optimized copy
System.out.println("Array copied using intrinsic");
System.out.println("\n✓ All operations use intrinsic implementations");
System.out.println("✓ No Java bytecode interpretation");
System.out.println("✓ Direct CPU instruction execution");
}
}
// Intrinsic Configuration
class IntrinsicConfiguration {
/*
# List available intrinsics
java -XX:+UnlockDiagnosticVMOptions
-XX:+PrintIntrinsics MyApp
# Disable specific intrinsic (for testing)
java -XX:DisableIntrinsic=_indexOf MyApp
# Control inline decisions for intrinsics
java -XX:+PrintCompilation
-XX:+UnlockDiagnosticVMOptions
-XX:+PrintInlining MyApp
*/
}
Speculative Optimization and Uncommon Traps
// Speculative Optimizations
public class SpeculativeOptimizations {
public static void printSpeculativeOptimizations() {
System.out.println("=== SPECULATIVE OPTIMIZATIONS ===");
System.out.println("\n--- CONCEPT ---");
System.out.println("Optimize based on observed behavior");
System.out.println("Assume common case, deoptimize if assumption fails");
System.out.println("\n--- TYPE SPECULATION ---");
System.out.println("\nExample:");
System.out.println(" interface Shape { double area(); }");
System.out.println(" ");
System.out.println(" double calculateArea(Shape shape) {");
System.out.println(" return shape.area(); // Virtual call");
System.out.println(" }");
System.out.println("\nProfile observation:");
System.out.println(" 99% of calls: shape is Circle");
System.out.println("\nSpeculative optimization:");
System.out.println(" double calculateArea(Shape shape) {");
System.out.println(" if (shape.getClass() == Circle.class) {");
System.out.println(" // Inlined Circle.area()");
System.out.println(" return 3.14159 * radius * radius;");
System.out.println(" } else {");
System.out.println(" // Uncommon trap → deoptimize");
System.out.println(" return shape.area();");
System.out.println(" }");
System.out.println(" }");
System.out.println("\n--- NULL CHECK ELIMINATION ---");
System.out.println("\nOriginal:");
System.out.println(" void process(Object obj) {");
System.out.println(" obj.toString(); // Implicit null check");
System.out.println(" obj.hashCode(); // Another null check");
System.out.println(" }");
System.out.println("\nProfile: obj is never null");
System.out.println("\nOptimized:");
System.out.println(" void process(Object obj) {");
System.out.println(" // Single null check at entry");
System.out.println(" if (obj == null) uncommon_trap();");
System.out.println(" // Subsequent checks eliminated");
System.out.println(" obj.toString();");
System.out.println(" obj.hashCode();");
System.out.println(" }");
System.out.println("\n--- BRANCH PREDICTION ---");
System.out.println("\nExample:");
System.out.println(" if (condition) {");
System.out.println(" fastPath(); // Taken 99.9%");
System.out.println(" } else {");
System.out.println(" slowPath(); // Taken 0.1%");
System.out.println(" }");
System.out.println("\nOptimization:");
System.out.println(" • Optimize for fastPath");
System.out.println(" • Inline fastPath()");
System.out.println(" • slowPath becomes uncommon trap");
System.out.println(" • CPU branch predictor optimized");
System.out.println("\n--- UNCOMMON TRAPS ---");
System.out.println("\nWhat happens when speculation fails:");
System.out.println(" 1. Trap triggered");
System.out.println(" 2. Execution transfers to interpreter");
System.out.println(" 3. JVM records trap reason");
System.out.println(" 4. Method marked for recompilation");
System.out.println(" 5. Recompile with updated profile");
System.out.println(" 6. New code without failed speculation");
System.out.println("\nTrap reasons:");
System.out.println(" • class_check: Type speculation failed");
System.out.println(" • null_check: Unexpected null");
System.out.println(" • range_check: Array bounds violation");
System.out.println(" • div0_check: Division by zero");
System.out.println(" • unstable_if: Branch profile changed");
System.out.println("\n--- DEOPTIMIZATION ---");
System.out.println("\nProcess:");
System.out.println(" 1. Save current state");
System.out.println(" 2. Restore interpreter state");
System.out.println(" 3. Invalidate compiled code");
System.out.println(" 4. Continue in interpreter");
System.out.println(" 5. Reprofile method");
System.out.println(" 6. Recompile with new profile");
System.out.println("\nCost:");
System.out.println(" • Single deopt: ~1-10ms");
System.out.println(" • Frequent deopts: Significant overhead");
}
// Example: Monomorphic call site (good for speculation)
interface Processor {
void process(int value);
}
static class FastProcessor implements Processor {
public void process(int value) {
// Fast implementation
}
}
public static void monomorphicExample(Processor proc, int value) {
// If proc is always FastProcessor, JIT will inline
proc.process(value);
}
// Example: Polymorphic call site (bad for speculation)
public static void polymorphicExample(Processor proc, int value) {
// If proc varies (Fast, Slow, Other), no speculation
proc.process(value);
}
}
// Observing Deoptimizations
class DeoptimizationObservation {
/*
# Log deoptimizations
java -XX:+UnlockDiagnosticVMOptions
-XX:+LogCompilation
-XX:+TraceDeoptimization MyApp
# Print uncommon traps
java -XX:+UnlockDiagnosticVMOptions
-XX:+PrintCompilation
-XX:+PrintDeoptimization MyApp
# Disable speculative optimizations (testing)
java -XX:-UseTypeSpeculation MyApp
*/
}
Lock Optimizations
// Lock Optimization Techniques
public class LockOptimizations {
public static void printLockOptimizations() {
System.out.println("=== LOCK OPTIMIZATIONS ===");
System.out.println("\n--- 1. LOCK ELISION ---");
System.out.println("Remove unnecessary locks on non-escaping objects");
System.out.println("\nExample:");
System.out.println(" void method() {");
System.out.println(" StringBuffer sb = new StringBuffer();");
System.out.println(" sb.append(\"hello\"); // synchronized");
System.out.println(" sb.append(\" world\"); // synchronized");
System.out.println(" return sb.toString();");
System.out.println(" }");
System.out.println("\nOptimization:");
System.out.println(" • sb doesn't escape method");
System.out.println(" • Single-threaded access guaranteed");
System.out.println(" • JIT removes all synchronization");
System.out.println(" ✓ Zero lock overhead");
System.out.println("\n--- 2. LOCK COARSENING ---");
System.out.println("Merge adjacent locks into single lock");
System.out.println("\nOriginal:");
System.out.println(" synchronized(obj) { operation1(); }");
System.out.println(" synchronized(obj) { operation2(); }");
System.out.println(" synchronized(obj) { operation3(); }");
System.out.println("\nOptimized:");
System.out.println(" synchronized(obj) {");
System.out.println(" operation1();");
System.out.println(" operation2();");
System.out.println(" operation3();");
System.out.println(" }");
System.out.println("\nBenefit:");
System.out.println(" ✓ Single lock acquire/release");
System.out.println(" ✓ Reduced lock overhead");
System.out.println("\n--- 3. BIASED LOCKING ---");
System.out.println("Optimize for single-threaded access pattern");
System.out.println("\nConcept:");
System.out.println(" • Object \"biased\" to first thread");
System.out.println(" • Subsequent locks by same thread are free");
System.out.println(" • No CAS operations needed");
System.out.println("\nPhases:");
System.out.println(" 1. Unbiased: Normal locking");
System.out.println(" 2. Biased: Fast path for biased thread");
System.out.println(" 3. Revoked: Multiple threads, back to normal");
System.out.println("\n⚠ Deprecated in Java 15, removed in Java 18");
System.out.println(" Reason: Complex, rarely beneficial with modern hardware");
System.out.println("\n--- 4. ADAPTIVE SPINNING ---");
System.out.println("Spin instead of blocking for short waits");
System.out.println("\nStrategy:");
System.out.println(" • If lock likely free soon: Spin");
System.out.println(" • If lock held long: Block");
System.out.println(" • Adaptive: Learn from past behavior");
System.out.println("\nBenefit:");
System.out.println(" ✓ Avoid thread context switch overhead");
System.out.println(" ✓ Better for short critical sections");
System.out.println("\n--- 5. LIGHTWEIGHT LOCKING ---");
System.out.println("Fast path for uncontended locks");
System.out.println("\nMechanism:");
System.out.println(" • Use CAS to acquire lock");
System.out.println(" • No OS calls for uncontended case");
System.out.println(" • Inflate to heavyweight if contended");
System.out.println("\nPerformance:");
System.out.println(" • Uncontended: ~1-5 CPU cycles");
System.out.println(" • Contended: Fallback to OS mutex");
}
// Example: Lock elision candidate
public String lockElisionExample() {
// sb doesn't escape - locks can be elided
StringBuffer sb = new StringBuffer();
sb.append("Hello");
sb.append(" ");
sb.append("World");
return sb.toString();
}
// Example: Lock coarsening
public void lockCoarseningExample(Object lock) {
// These locks will be coarsened
synchronized(lock) {
System.out.println("1");
}
synchronized(lock) {
System.out.println("2");
}
synchronized(lock) {
System.out.println("3");
}
}
}
// Lock Configuration
class LockConfiguration {
/*
# Disable biased locking (Java 15+, already deprecated)
java -XX:-UseBiasedLocking MyApp
# Tune adaptive spinning
java -XX:+UseSpinning
-XX:PreBlockSpin=10 MyApp
# Monitor lock statistics
java -XX:+PrintBiasedLockingStatistics MyApp
*/
}
Constant Folding and Dead Code Elimination
// Compile-Time Optimizations
public class CompileTimeOptimizations {
public static void printOptimizations() {
System.out.println("=== CONSTANT FOLDING ===");
System.out.println("\nOriginal code:");
System.out.println(" int x = 2 + 3;");
System.out.println(" int y = x * 10;");
System.out.println(" int z = y / 5;");
System.out.println("\nAfter constant folding:");
System.out.println(" int x = 5;");
System.out.println(" int y = 50;");
System.out.println(" int z = 10;");
System.out.println("\nFinal optimization:");
System.out.println(" int z = 10;");
System.out.println("\n=== CONSTANT PROPAGATION ===");
System.out.println("\nOriginal:");
System.out.println(" final int SIZE = 100;");
System.out.println(" int[] array = new int[SIZE];");
System.out.println(" for (int i = 0; i < SIZE; i++) {");
System.out.println(" array[i] = i * SIZE;");
System.out.println(" }");
System.out.println("\nAfter propagation:");
System.out.println(" int[] array = new int[100];");
System.out.println(" for (int i = 0; i < 100; i++) {");
System.out.println(" array[i] = i * 100;");
System.out.println(" }");
System.out.println("\n=== DEAD CODE ELIMINATION ===");
System.out.println("\nOriginal:");
System.out.println(" int compute(boolean flag) {");
System.out.println(" int x = 10; // Used");
System.out.println(" int y = 20; // Never used");
System.out.println(" int z = 30; // Used");
System.out.println(" return x + z;");
System.out.println(" }");
System.out.println("\nOptimized:");
System.out.println(" int compute(boolean flag) {");
System.out.println(" int x = 10;");
System.out.println(" int z = 30;");
System.out.println(" return x + z;");
System.out.println(" }");
System.out.println("\nFurther optimization:");
System.out.println(" int compute(boolean flag) {");
System.out.println(" return 40;");
System.out.println(" }");
System.out.println("\n=== UNREACHABLE CODE ELIMINATION ===");
System.out.println("\nOriginal:");
System.out.println(" void method() {");
System.out.println(" if (true) {");
System.out.println(" doSomething();");
System.out.println(" } else {");
System.out.println(" neverExecuted(); // Unreachable");
System.out.println(" }");
System.out.println(" }");
System.out.println("\nOptimized:");
System.out.println(" void method() {");
System.out.println(" doSomething();");
System.out.println(" }");
}
// Example: Constant folding
public static int constantFoldingExample() {
// All computed at compile time
int result = (10 + 20) * 3 / 6;
return result; // Returns 15
}
// Example: Dead code elimination
public static int deadCodeExample(int x) {
int unused1 = 100; // Eliminated
int unused2 = 200; // Eliminated
int used = x * 2;
return used;
}
}
Profile Pollution and Warmup
// Warmup and Profile Quality
public class WarmupAndProfiling {
public static void printWarmupConcepts() {
System.out.println("=== WARMUP IMPORTANCE ===");
System.out.println("\n--- WARMUP PHASES ---");
System.out.println("\n1. COLD START (0-1000 invocations)");
System.out.println(" • Interpreter only");
System.out.println(" • Collecting initial profile");
System.out.println(" • Slowest performance");
System.out.println("\n2. WARMUP (1000-15000 invocations)");
System.out.println(" • C1 compilation (Tier 3)");
System.out.println(" • Building comprehensive profile");
System.out.println(" • Moderate performance");
System.out.println("\n3. STEADY STATE (15000+ invocations)");
System.out.println(" • C2 compilation (Tier 4)");
System.out.println(" • Peak optimization");
System.out.println(" • Maximum performance");
System.out.println("\n--- PROFILE POLLUTION ---");
System.out.println("\nProblem:");
System.out.println(" Startup code pollutes profile with atypical behavior");
System.out.println("\nExample:");
System.out.println(" • Initialization creates different object types");
System.out.println(" • Steady state uses only one type");
System.out.println(" • Profile shows polymorphic call site");
System.out.println(" • JIT can't optimize for monomorphic case");
System.out.println("\nSolution:");
System.out.println(" • Run warmup with representative data");
System.out.println(" • Separate initialization from hot path");
System.out.println(" • Use -XX:CompileCommand=exclude for init code");
System.out.println("\n--- BENCHMARKING PITFALLS ---");
System.out.println("\n❌ DON'T: Single iteration");
System.out.println(" long start = System.nanoTime();");
System.out.println(" method(); // Not warmed up!");
System.out.println(" long time = System.nanoTime() - start;");
System.out.println("\n✓ DO: Proper warmup");
System.out.println(" // Warmup: 15000 iterations");
System.out.println(" for (int i = 0; i < 15000; i++) {");
System.out.println(" method();");
System.out.println(" }");
System.out.println(" ");
System.out.println(" // Now measure");
System.out.println(" long start = System.nanoTime();");
System.out.println(" for (int i = 0; i < 1000; i++) {");
System.out.println(" method();");
System.out.println(" }");
System.out.println(" long time = (System.nanoTime() - start) / 1000;");
System.out.println("\n--- USE JMH FOR ACCURATE BENCHMARKS ---");
System.out.println("Java Microbenchmark Harness handles:");
System.out.println(" ✓ Proper warmup");
System.out.println(" ✓ Multiple iterations");
System.out.println(" ✓ Statistical analysis");
System.out.println(" ✓ Dead code elimination detection");
System.out.println(" ✓ Constant folding detection");
}
}
Best Practices
- Understand intrinsics: Use library methods that map to CPU instructions.
- Keep call sites monomorphic: Limit polymorphism in hot code.
- Allow proper warmup: Run representative workload before measuring.
- Monitor deoptimizations: Frequent deopts indicate profile pollution.
- Use final for constants: Enables constant folding and propagation.
- Leverage escape analysis: Local objects can be stack-allocated.
- Trust lock optimizations: JVM often eliminates synchronization overhead.
- Use JMH for benchmarking: Accurate measurement with proper warmup.
- Avoid profile pollution: Separate initialization from steady-state code.
- Monitor compilation: Use -XX:+PrintCompilation to understand JIT behavior.