Collectors and Advanced Operations · JAVA

6.3 Collectors and Advanced Stream Operations

The Collectors utility class provides powerful tools for accumulating stream elements into collections, computing statistics, grouping data, and building complex aggregations. Mastering collectors enables elegant, efficient data transformations.

Basic Collectors

toList, toSet, toCollection:

List<String> list = stream.collect(Collectors.toList());
Set<String> set = stream.collect(Collectors.toSet());

// Custom collection type
LinkedList<String> linkedList = stream
    .collect(Collectors.toCollection(LinkedList::new));

TreeSet<String> treeSet = stream
    .collect(Collectors.toCollection(TreeSet::new));

toMap: Build maps from streams

record Product(String id, String name, BigDecimal price) {}

List<Product> products = List.of(
    new Product("P001", "Laptop", BigDecimal.valueOf(1200)),
    new Product("P002", "Mouse", BigDecimal.valueOf(25))
);

// Simple map: id -> product
Map<String, Product> byId = products.stream()
    .collect(Collectors.toMap(
        Product::id,
        p -> p
    ));

// Map with custom value: id -> name
Map<String, String> idToName = products.stream()
    .collect(Collectors.toMap(
        Product::id,
        Product::name
    ));

// Handle duplicates with merge function
Map<String, BigDecimal> merged = products.stream()
    .collect(Collectors.toMap(
        Product::name,
        Product::price,
        BigDecimal::add  // merge function for duplicate keys
    ));

// Specify map type
TreeMap<String, Product> treeMap = products.stream()
    .collect(Collectors.toMap(
        Product::id,
        p -> p,
        (p1, p2) -> p1,  // merge function (required)
        TreeMap::new     // supplier for map type
    ));

Joining Collectors

joining: Concatenate strings

List<String> words = List.of("Java", "is", "awesome");

// Simple join
String simple = words.stream()
    .collect(Collectors.joining()); // "Javaisawesome"

// With delimiter
String withDelimiter = words.stream()
    .collect(Collectors.joining(" ")); // "Java is awesome"

// With prefix and suffix
String formatted = words.stream()
    .collect(Collectors.joining(", ", "[", "]")); // "[Java, is, awesome]"

// Real-world: CSV generation
record Person(String name, int age, String city) {}

String csv = people.stream()
    .map(p -> String.format("%s,%d,%s", p.name(), p.age(), p.city()))
    .collect(Collectors.joining("\n", "Name,Age,City\n", ""));

Grouping Collectors

groupingBy: Group elements by classifier

record Employee(String name, String department, int salary) {}

List<Employee> employees = List.of(
    new Employee("Alice", "IT", 70000),
    new Employee("Bob", "HR", 60000),
    new Employee("Charlie", "IT", 80000),
    new Employee("Diana", "HR", 65000)
);

// Simple grouping
Map<String, List<Employee>> byDept = employees.stream()
    .collect(Collectors.groupingBy(Employee::department));
// {IT=[Alice, Charlie], HR=[Bob, Diana]}

// Grouping with custom downstream collector - count per group
Map<String, Long> countByDept = employees.stream()
    .collect(Collectors.groupingBy(
        Employee::department,
        Collectors.counting()
    ));
// {IT=2, HR=2}

// Sum salaries by department
Map<String, Integer> salaryByDept = employees.stream()
    .collect(Collectors.groupingBy(
        Employee::department,
        Collectors.summingInt(Employee::salary)
    ));
// {IT=150000, HR=125000}

// Average salary by department
Map<String, Double> avgSalaryByDept = employees.stream()
    .collect(Collectors.groupingBy(
        Employee::department,
        Collectors.averagingInt(Employee::salary)
    ));
// {IT=75000.0, HR=62500.0}

// Get names by department
Map<String, List<String>> namesByDept = employees.stream()
    .collect(Collectors.groupingBy(
        Employee::department,
        Collectors.mapping(
            Employee::name,
            Collectors.toList()
        )
    ));
// {IT=[Alice, Charlie], HR=[Bob, Diana]}

// Highest paid employee per department
Map<String, Optional<Employee>> highestPaidByDept = employees.stream()
    .collect(Collectors.groupingBy(
        Employee::department,
        Collectors.maxBy(Comparator.comparingInt(Employee::salary))
    ));

// Multi-level grouping
record Sale(String region, String product, int quantity) {}

Map<String, Map<String, Integer>> salesByRegionAndProduct = sales.stream()
    .collect(Collectors.groupingBy(
        Sale::region,
        Collectors.groupingBy(
            Sale::product,
            Collectors.summingInt(Sale::quantity)
        )
    ));
// {East={ProductA=100, ProductB=150}, West={ProductA=80}}

partitioningBy: Split into two groups by predicate

// Partition by condition
Map<Boolean, List<Employee>> partitioned = employees.stream()
    .collect(Collectors.partitioningBy(e -> e.salary() > 65000));
// {true=[Alice, Charlie, Diana], false=[Bob]}

// Partition with downstream collector
Map<Boolean, Long> countByHighSalary = employees.stream()
    .collect(Collectors.partitioningBy(
        e -> e.salary() > 65000,
        Collectors.counting()
    ));
// {true=3, false=1}

Statistical Collectors

Numeric summaries:

// Summing
int totalSalary = employees.stream()
    .collect(Collectors.summingInt(Employee::salary));

// Averaging
double avgSalary = employees.stream()
    .collect(Collectors.averagingInt(Employee::salary));

// Summary statistics
IntSummaryStatistics stats = employees.stream()
    .collect(Collectors.summarizingInt(Employee::salary));

System.out.println("Count: " + stats.getCount());
System.out.println("Sum: " + stats.getSum());
System.out.println("Min: " + stats.getMin());
System.out.println("Max: " + stats.getMax());
System.out.println("Average: " + stats.getAverage());

Advanced Collectors

reducing: General-purpose reduction

// Sum using reducing
Optional<Integer> sum = employees.stream()
    .map(Employee::salary)
    .collect(Collectors.reducing(Integer::sum));

// With identity value
Integer sumWithIdentity = employees.stream()
    .map(Employee::salary)
    .collect(Collectors.reducing(0, Integer::sum));

// With mapper and combiner
Integer totalSalary = employees.stream()
    .collect(Collectors.reducing(
        0,                      // identity
        Employee::salary,       // mapper
        Integer::sum           // combiner
    ));

collectingAndThen: Transform the result

// Get immutable list
List<String> immutableNames = employees.stream()
    .map(Employee::name)
    .collect(Collectors.collectingAndThen(
        Collectors.toList(),
        Collections::unmodifiableList
    ));

// Count and format
String countMessage = employees.stream()
    .collect(Collectors.collectingAndThen(
        Collectors.counting(),
        count -> String.format("Total employees: %d", count)
    ));

teeing: Combine two collectors (Java 12+)

record Stats(long count, Integer sum) {}

// Compute count and sum simultaneously
Stats stats = employees.stream()
    .collect(Collectors.teeing(
        Collectors.counting(),
        Collectors.summingInt(Employee::salary),
        Stats::new
    ));

// Calculate average with custom precision
record Average(long count, int sum, double average) {}

Average avg = employees.stream()
    .collect(Collectors.teeing(
        Collectors.counting(),
        Collectors.summingInt(Employee::salary),
        (count, sum) -> new Average(
            count,
            sum,
            count > 0 ? (double) sum / count : 0.0
        )
    ));

filtering: Filter within collector (Java 9+)

// Count high-salary IT employees
Map<String, Long> highSalaryCount = employees.stream()
    .collect(Collectors.groupingBy(
        Employee::department,
        Collectors.filtering(
            e -> e.salary() > 65000,
            Collectors.counting()
        )
    ));

flatMapping: FlatMap within collector (Java 9+)

record Team(String name, List<String> members) {}

List<Team> teams = List.of(
    new Team("Alpha", List.of("Alice", "Bob")),
    new Team("Beta", List.of("Charlie", "Diana"))
);

// Get all unique members
Set<String> allMembers = teams.stream()
    .collect(Collectors.flatMapping(
        team -> team.members().stream(),
        Collectors.toSet()
    ));

Custom Collectors

Create your own collectors for specialized aggregations:

import java.util.stream.Collector;

class CustomCollectors {
    // Collector that joins strings with a limit
    public static Collector<String, ?, String> toStringWithLimit(
        int maxLength,
        String delimiter
    ) {
        return Collector.of(
            StringBuilder::new,  // supplier
            (sb, s) -> {         // accumulator
                if (sb.length() + s.length() <= maxLength) {
                    if (sb.length() > 0) sb.append(delimiter);
                    sb.append(s);
                }
            },
            (sb1, sb2) -> {      // combiner
                if (sb1.length() + sb2.length() <= maxLength) {
                    if (sb1.length() > 0) sb1.append(delimiter);
                    sb1.append(sb2);
                }
                return sb1;
            },
            StringBuilder::toString  // finisher
        );
    }

    // Collector that creates an immutable copy of a collection
    public static <T> Collector<T, ?, List<T>> toImmutableList() {
        return Collectors.collectingAndThen(
            Collectors.toList(),
            List::copyOf
        );
    }
}

// Usage
String limited = words.stream()
    .collect(CustomCollectors.toStringWithLimit(20, ", "));

Real-World Example: Sales Analytics

import java.math.BigDecimal;
import java.time.*;
import java.util.*;
import java.util.stream.Collectors;

record Sale(
    String id,
    String productId,
    String category,
    LocalDate date,
    BigDecimal amount,
    int quantity,
    String region
) {}

class SalesAnalytics {
    private final List<Sale> sales;

    public SalesAnalytics(List<Sale> sales) {
        this.sales = sales;
    }

    // Total revenue by category
    public Map<String, BigDecimal> getRevenueByCategory() {
        return sales.stream()
            .collect(Collectors.groupingBy(
                Sale::category,
                Collectors.reducing(
                    BigDecimal.ZERO,
                    Sale::amount,
                    BigDecimal::add
                )
            ));
    }

    // Top N products by revenue
    public List<Map.Entry<String, BigDecimal>> getTopProducts(int n) {
        return sales.stream()
            .collect(Collectors.groupingBy(
                Sale::productId,
                Collectors.reducing(
                    BigDecimal.ZERO,
                    Sale::amount,
                    BigDecimal::add
                )
            ))
            .entrySet().stream()
            .sorted(Map.Entry.<String, BigDecimal>comparingByValue().reversed())
            .limit(n)
            .toList();
    }

    // Sales statistics by region
    record RegionStats(
        String region,
        long saleCount,
        BigDecimal totalRevenue,
        BigDecimal avgSaleAmount,
        int totalQuantity
    ) {}

    public List<RegionStats> getRegionStatistics() {
        return sales.stream()
            .collect(Collectors.groupingBy(
                Sale::region,
                Collectors.teeing(
                    Collectors.counting(),
                    Collectors.teeing(
                        Collectors.reducing(
                            BigDecimal.ZERO,
                            Sale::amount,
                            BigDecimal::add
                        ),
                        Collectors.summingInt(Sale::quantity),
                        (revenue, qty) -> new Object[] {revenue, qty}
                    ),
                    (count, arr) -> {
                        BigDecimal revenue = (BigDecimal) arr[0];
                        Integer qty = (Integer) arr[1];
                        BigDecimal avg = count > 0
                            ? revenue.divide(
                                BigDecimal.valueOf(count),
                                2,
                                BigDecimal.ROUND_HALF_UP
                              )
                            : BigDecimal.ZERO;
                        return new Object[] {count, revenue, avg, qty};
                    }
                )
            ))
            .entrySet().stream()
            .map(e -> {
                Object[] stats = (Object[]) e.getValue();
                return new RegionStats(
                    e.getKey(),
                    (Long) stats[0],
                    (BigDecimal) stats[1],
                    (BigDecimal) stats[2],
                    (Integer) stats[3]
                );
            })
            .sorted(Comparator.comparing(RegionStats::totalRevenue).reversed())
            .toList();
    }

    // Monthly revenue trend
    public Map<YearMonth, BigDecimal> getMonthlyRevenue() {
        return sales.stream()
            .collect(Collectors.groupingBy(
                sale -> YearMonth.from(sale.date()),
                TreeMap::new,  // Sorted by month
                Collectors.reducing(
                    BigDecimal.ZERO,
                    Sale::amount,
                    BigDecimal::add
                )
            ));
    }

    // Category performance by region
    public Map<String, Map<String, BigDecimal>> getCategoryRevenueByRegion() {
        return sales.stream()
            .collect(Collectors.groupingBy(
                Sale::region,
                Collectors.groupingBy(
                    Sale::category,
                    Collectors.reducing(
                        BigDecimal.ZERO,
                        Sale::amount,
                        BigDecimal::add
                    )
                )
            ));
    }

    // Partition sales by threshold
    record SalesPartition(
        List<Sale> highValue,
        List<Sale> lowValue,
        BigDecimal highValueTotal,
        BigDecimal lowValueTotal
    ) {}

    public SalesPartition partitionByThreshold(BigDecimal threshold) {
        Map<Boolean, List<Sale>> partitioned = sales.stream()
            .collect(Collectors.partitioningBy(
                sale -> sale.amount().compareTo(threshold) >= 0
            ));

        List<Sale> highValue = partitioned.get(true);
        List<Sale> lowValue = partitioned.get(false);

        BigDecimal highTotal = highValue.stream()
            .map(Sale::amount)
            .reduce(BigDecimal.ZERO, BigDecimal::add);

        BigDecimal lowTotal = lowValue.stream()
            .map(Sale::amount)
            .reduce(BigDecimal.ZERO, BigDecimal::add);

        return new SalesPartition(highValue, lowValue, highTotal, lowTotal);
    }
}

// Usage example
void demonstrateSalesAnalytics() {
    List<Sale> sales = List.of(
        new Sale("S001", "P001", "Electronics", LocalDate.of(2025, 1, 15),
                 BigDecimal.valueOf(1200), 1, "East"),
        new Sale("S002", "P002", "Clothing", LocalDate.of(2025, 1, 16),
                 BigDecimal.valueOf(50), 2, "West"),
        new Sale("S003", "P001", "Electronics", LocalDate.of(2025, 2, 10),
                 BigDecimal.valueOf(1200), 1, "East"),
        new Sale("S004", "P003", "Books", LocalDate.of(2025, 2, 12),
                 BigDecimal.valueOf(30), 5, "East")
    );

    var analytics = new SalesAnalytics(sales);

    // Revenue by category
    Map<String, BigDecimal> categoryRevenue = analytics.getRevenueByCategory();
    System.out.println("Revenue by category: " + categoryRevenue);

    // Top products
    List<Map.Entry<String, BigDecimal>> topProducts = analytics.getTopProducts(3);
    System.out.println("Top products: " + topProducts);

    // Region statistics
    List<SalesAnalytics.RegionStats> regionStats = analytics.getRegionStatistics();
    regionStats.forEach(stat ->
        System.out.printf("Region: %s, Sales: %d, Revenue: $%s%n",
            stat.region(), stat.saleCount(), stat.totalRevenue())
    );

    // Monthly trends
    Map<YearMonth, BigDecimal> monthlyRevenue = analytics.getMonthlyRevenue();
    System.out.println("Monthly revenue: " + monthlyRevenue);
}

Collector Performance Tips

Choose the right downstream collector

// If you only need count, don't collect to list first
// Bad
long count = stream.collect(Collectors.toList()).size();

// Good
long count = stream.collect(Collectors.counting());
// Even better
long count = stream.count();

Use specialized collectors for primitives

// Avoid boxing
int sum = stream.collect(Collectors.summingInt(Item::quantity));
// vs
Integer sum = stream.map(Item::quantity).reduce(0, Integer::sum);

Avoid unnecessary intermediate collections

// Inefficient
List<String> temp = stream.collect(Collectors.toList());
Set<String> result = new HashSet<>(temp);

// Efficient
Set<String> result = stream.collect(Collectors.toSet());

Use Collectors.toMap carefully with duplicates

// Will throw if duplicate keys exist
Map<String, User> map = users.stream()
    .collect(Collectors.toMap(User::email, u -> u));

// Handle duplicates explicitly
Map<String, User> map = users.stream()
    .collect(Collectors.toMap(
        User::email,
        u -> u,
        (existing, replacement) -> existing // keep first
    ));

Common Patterns

Frequency counting:

Map<String, Long> wordFrequency = words.stream()
    .collect(Collectors.groupingBy(
        word -> word,
        Collectors.counting()
    ));

Finding min/max in groups:

Map<String, Optional<Employee>> highestPaidByDept = employees.stream()
    .collect(Collectors.groupingBy(
        Employee::department,
        Collectors.maxBy(Comparator.comparingInt(Employee::salary))
    ));

Boolean logic on groups:

Map<String, Boolean> allActiveByDept = employees.stream()
    .collect(Collectors.groupingBy(
        Employee::department,
        Collectors.mapping(
            Employee::isActive,
            Collectors.reducing(true, Boolean::logicalAnd)
        )
    ));