6.3 Collectors and Advanced Stream Operations
The Collectors utility class provides powerful tools for accumulating stream elements into collections, computing statistics, grouping data, and building complex aggregations. Mastering collectors enables elegant, efficient data transformations.
Basic Collectors
toList, toSet, toCollection:
List<String> list = stream.collect(Collectors.toList());
Set<String> set = stream.collect(Collectors.toSet());
// Custom collection type
LinkedList<String> linkedList = stream
.collect(Collectors.toCollection(LinkedList::new));
TreeSet<String> treeSet = stream
.collect(Collectors.toCollection(TreeSet::new));
toMap: Build maps from streams
record Product(String id, String name, BigDecimal price) {}
List<Product> products = List.of(
new Product("P001", "Laptop", BigDecimal.valueOf(1200)),
new Product("P002", "Mouse", BigDecimal.valueOf(25))
);
// Simple map: id -> product
Map<String, Product> byId = products.stream()
.collect(Collectors.toMap(
Product::id,
p -> p
));
// Map with custom value: id -> name
Map<String, String> idToName = products.stream()
.collect(Collectors.toMap(
Product::id,
Product::name
));
// Handle duplicates with merge function
Map<String, BigDecimal> merged = products.stream()
.collect(Collectors.toMap(
Product::name,
Product::price,
BigDecimal::add // merge function for duplicate keys
));
// Specify map type
TreeMap<String, Product> treeMap = products.stream()
.collect(Collectors.toMap(
Product::id,
p -> p,
(p1, p2) -> p1, // merge function (required)
TreeMap::new // supplier for map type
));
Joining Collectors
joining: Concatenate strings
List<String> words = List.of("Java", "is", "awesome");
// Simple join
String simple = words.stream()
.collect(Collectors.joining()); // "Javaisawesome"
// With delimiter
String withDelimiter = words.stream()
.collect(Collectors.joining(" ")); // "Java is awesome"
// With prefix and suffix
String formatted = words.stream()
.collect(Collectors.joining(", ", "[", "]")); // "[Java, is, awesome]"
// Real-world: CSV generation
record Person(String name, int age, String city) {}
String csv = people.stream()
.map(p -> String.format("%s,%d,%s", p.name(), p.age(), p.city()))
.collect(Collectors.joining("\n", "Name,Age,City\n", ""));
Grouping Collectors
groupingBy: Group elements by classifier
record Employee(String name, String department, int salary) {}
List<Employee> employees = List.of(
new Employee("Alice", "IT", 70000),
new Employee("Bob", "HR", 60000),
new Employee("Charlie", "IT", 80000),
new Employee("Diana", "HR", 65000)
);
// Simple grouping
Map<String, List<Employee>> byDept = employees.stream()
.collect(Collectors.groupingBy(Employee::department));
// {IT=[Alice, Charlie], HR=[Bob, Diana]}
// Grouping with custom downstream collector - count per group
Map<String, Long> countByDept = employees.stream()
.collect(Collectors.groupingBy(
Employee::department,
Collectors.counting()
));
// {IT=2, HR=2}
// Sum salaries by department
Map<String, Integer> salaryByDept = employees.stream()
.collect(Collectors.groupingBy(
Employee::department,
Collectors.summingInt(Employee::salary)
));
// {IT=150000, HR=125000}
// Average salary by department
Map<String, Double> avgSalaryByDept = employees.stream()
.collect(Collectors.groupingBy(
Employee::department,
Collectors.averagingInt(Employee::salary)
));
// {IT=75000.0, HR=62500.0}
// Get names by department
Map<String, List<String>> namesByDept = employees.stream()
.collect(Collectors.groupingBy(
Employee::department,
Collectors.mapping(
Employee::name,
Collectors.toList()
)
));
// {IT=[Alice, Charlie], HR=[Bob, Diana]}
// Highest paid employee per department
Map<String, Optional<Employee>> highestPaidByDept = employees.stream()
.collect(Collectors.groupingBy(
Employee::department,
Collectors.maxBy(Comparator.comparingInt(Employee::salary))
));
// Multi-level grouping
record Sale(String region, String product, int quantity) {}
Map<String, Map<String, Integer>> salesByRegionAndProduct = sales.stream()
.collect(Collectors.groupingBy(
Sale::region,
Collectors.groupingBy(
Sale::product,
Collectors.summingInt(Sale::quantity)
)
));
// {East={ProductA=100, ProductB=150}, West={ProductA=80}}
partitioningBy: Split into two groups by predicate
// Partition by condition
Map<Boolean, List<Employee>> partitioned = employees.stream()
.collect(Collectors.partitioningBy(e -> e.salary() > 65000));
// {true=[Alice, Charlie, Diana], false=[Bob]}
// Partition with downstream collector
Map<Boolean, Long> countByHighSalary = employees.stream()
.collect(Collectors.partitioningBy(
e -> e.salary() > 65000,
Collectors.counting()
));
// {true=3, false=1}
Statistical Collectors
Numeric summaries:
// Summing
int totalSalary = employees.stream()
.collect(Collectors.summingInt(Employee::salary));
// Averaging
double avgSalary = employees.stream()
.collect(Collectors.averagingInt(Employee::salary));
// Summary statistics
IntSummaryStatistics stats = employees.stream()
.collect(Collectors.summarizingInt(Employee::salary));
System.out.println("Count: " + stats.getCount());
System.out.println("Sum: " + stats.getSum());
System.out.println("Min: " + stats.getMin());
System.out.println("Max: " + stats.getMax());
System.out.println("Average: " + stats.getAverage());
Advanced Collectors
reducing: General-purpose reduction
// Sum using reducing
Optional<Integer> sum = employees.stream()
.map(Employee::salary)
.collect(Collectors.reducing(Integer::sum));
// With identity value
Integer sumWithIdentity = employees.stream()
.map(Employee::salary)
.collect(Collectors.reducing(0, Integer::sum));
// With mapper and combiner
Integer totalSalary = employees.stream()
.collect(Collectors.reducing(
0, // identity
Employee::salary, // mapper
Integer::sum // combiner
));
collectingAndThen: Transform the result
// Get immutable list
List<String> immutableNames = employees.stream()
.map(Employee::name)
.collect(Collectors.collectingAndThen(
Collectors.toList(),
Collections::unmodifiableList
));
// Count and format
String countMessage = employees.stream()
.collect(Collectors.collectingAndThen(
Collectors.counting(),
count -> String.format("Total employees: %d", count)
));
teeing: Combine two collectors (Java 12+)
record Stats(long count, Integer sum) {}
// Compute count and sum simultaneously
Stats stats = employees.stream()
.collect(Collectors.teeing(
Collectors.counting(),
Collectors.summingInt(Employee::salary),
Stats::new
));
// Calculate average with custom precision
record Average(long count, int sum, double average) {}
Average avg = employees.stream()
.collect(Collectors.teeing(
Collectors.counting(),
Collectors.summingInt(Employee::salary),
(count, sum) -> new Average(
count,
sum,
count > 0 ? (double) sum / count : 0.0
)
));
filtering: Filter within collector (Java 9+)
// Count high-salary IT employees
Map<String, Long> highSalaryCount = employees.stream()
.collect(Collectors.groupingBy(
Employee::department,
Collectors.filtering(
e -> e.salary() > 65000,
Collectors.counting()
)
));
flatMapping: FlatMap within collector (Java 9+)
record Team(String name, List<String> members) {}
List<Team> teams = List.of(
new Team("Alpha", List.of("Alice", "Bob")),
new Team("Beta", List.of("Charlie", "Diana"))
);
// Get all unique members
Set<String> allMembers = teams.stream()
.collect(Collectors.flatMapping(
team -> team.members().stream(),
Collectors.toSet()
));
Custom Collectors
Create your own collectors for specialized aggregations:
import java.util.stream.Collector;
class CustomCollectors {
// Collector that joins strings with a limit
public static Collector<String, ?, String> toStringWithLimit(
int maxLength,
String delimiter
) {
return Collector.of(
StringBuilder::new, // supplier
(sb, s) -> { // accumulator
if (sb.length() + s.length() <= maxLength) {
if (sb.length() > 0) sb.append(delimiter);
sb.append(s);
}
},
(sb1, sb2) -> { // combiner
if (sb1.length() + sb2.length() <= maxLength) {
if (sb1.length() > 0) sb1.append(delimiter);
sb1.append(sb2);
}
return sb1;
},
StringBuilder::toString // finisher
);
}
// Collector that creates an immutable copy of a collection
public static <T> Collector<T, ?, List<T>> toImmutableList() {
return Collectors.collectingAndThen(
Collectors.toList(),
List::copyOf
);
}
}
// Usage
String limited = words.stream()
.collect(CustomCollectors.toStringWithLimit(20, ", "));
Real-World Example: Sales Analytics
import java.math.BigDecimal;
import java.time.*;
import java.util.*;
import java.util.stream.Collectors;
record Sale(
String id,
String productId,
String category,
LocalDate date,
BigDecimal amount,
int quantity,
String region
) {}
class SalesAnalytics {
private final List<Sale> sales;
public SalesAnalytics(List<Sale> sales) {
this.sales = sales;
}
// Total revenue by category
public Map<String, BigDecimal> getRevenueByCategory() {
return sales.stream()
.collect(Collectors.groupingBy(
Sale::category,
Collectors.reducing(
BigDecimal.ZERO,
Sale::amount,
BigDecimal::add
)
));
}
// Top N products by revenue
public List<Map.Entry<String, BigDecimal>> getTopProducts(int n) {
return sales.stream()
.collect(Collectors.groupingBy(
Sale::productId,
Collectors.reducing(
BigDecimal.ZERO,
Sale::amount,
BigDecimal::add
)
))
.entrySet().stream()
.sorted(Map.Entry.<String, BigDecimal>comparingByValue().reversed())
.limit(n)
.toList();
}
// Sales statistics by region
record RegionStats(
String region,
long saleCount,
BigDecimal totalRevenue,
BigDecimal avgSaleAmount,
int totalQuantity
) {}
public List<RegionStats> getRegionStatistics() {
return sales.stream()
.collect(Collectors.groupingBy(
Sale::region,
Collectors.teeing(
Collectors.counting(),
Collectors.teeing(
Collectors.reducing(
BigDecimal.ZERO,
Sale::amount,
BigDecimal::add
),
Collectors.summingInt(Sale::quantity),
(revenue, qty) -> new Object[] {revenue, qty}
),
(count, arr) -> {
BigDecimal revenue = (BigDecimal) arr[0];
Integer qty = (Integer) arr[1];
BigDecimal avg = count > 0
? revenue.divide(
BigDecimal.valueOf(count),
2,
BigDecimal.ROUND_HALF_UP
)
: BigDecimal.ZERO;
return new Object[] {count, revenue, avg, qty};
}
)
))
.entrySet().stream()
.map(e -> {
Object[] stats = (Object[]) e.getValue();
return new RegionStats(
e.getKey(),
(Long) stats[0],
(BigDecimal) stats[1],
(BigDecimal) stats[2],
(Integer) stats[3]
);
})
.sorted(Comparator.comparing(RegionStats::totalRevenue).reversed())
.toList();
}
// Monthly revenue trend
public Map<YearMonth, BigDecimal> getMonthlyRevenue() {
return sales.stream()
.collect(Collectors.groupingBy(
sale -> YearMonth.from(sale.date()),
TreeMap::new, // Sorted by month
Collectors.reducing(
BigDecimal.ZERO,
Sale::amount,
BigDecimal::add
)
));
}
// Category performance by region
public Map<String, Map<String, BigDecimal>> getCategoryRevenueByRegion() {
return sales.stream()
.collect(Collectors.groupingBy(
Sale::region,
Collectors.groupingBy(
Sale::category,
Collectors.reducing(
BigDecimal.ZERO,
Sale::amount,
BigDecimal::add
)
)
));
}
// Partition sales by threshold
record SalesPartition(
List<Sale> highValue,
List<Sale> lowValue,
BigDecimal highValueTotal,
BigDecimal lowValueTotal
) {}
public SalesPartition partitionByThreshold(BigDecimal threshold) {
Map<Boolean, List<Sale>> partitioned = sales.stream()
.collect(Collectors.partitioningBy(
sale -> sale.amount().compareTo(threshold) >= 0
));
List<Sale> highValue = partitioned.get(true);
List<Sale> lowValue = partitioned.get(false);
BigDecimal highTotal = highValue.stream()
.map(Sale::amount)
.reduce(BigDecimal.ZERO, BigDecimal::add);
BigDecimal lowTotal = lowValue.stream()
.map(Sale::amount)
.reduce(BigDecimal.ZERO, BigDecimal::add);
return new SalesPartition(highValue, lowValue, highTotal, lowTotal);
}
}
// Usage example
void demonstrateSalesAnalytics() {
List<Sale> sales = List.of(
new Sale("S001", "P001", "Electronics", LocalDate.of(2025, 1, 15),
BigDecimal.valueOf(1200), 1, "East"),
new Sale("S002", "P002", "Clothing", LocalDate.of(2025, 1, 16),
BigDecimal.valueOf(50), 2, "West"),
new Sale("S003", "P001", "Electronics", LocalDate.of(2025, 2, 10),
BigDecimal.valueOf(1200), 1, "East"),
new Sale("S004", "P003", "Books", LocalDate.of(2025, 2, 12),
BigDecimal.valueOf(30), 5, "East")
);
var analytics = new SalesAnalytics(sales);
// Revenue by category
Map<String, BigDecimal> categoryRevenue = analytics.getRevenueByCategory();
System.out.println("Revenue by category: " + categoryRevenue);
// Top products
List<Map.Entry<String, BigDecimal>> topProducts = analytics.getTopProducts(3);
System.out.println("Top products: " + topProducts);
// Region statistics
List<SalesAnalytics.RegionStats> regionStats = analytics.getRegionStatistics();
regionStats.forEach(stat ->
System.out.printf("Region: %s, Sales: %d, Revenue: $%s%n",
stat.region(), stat.saleCount(), stat.totalRevenue())
);
// Monthly trends
Map<YearMonth, BigDecimal> monthlyRevenue = analytics.getMonthlyRevenue();
System.out.println("Monthly revenue: " + monthlyRevenue);
}
Collector Performance Tips
Choose the right downstream collector
// If you only need count, don't collect to list first // Bad long count = stream.collect(Collectors.toList()).size(); // Good long count = stream.collect(Collectors.counting()); // Even better long count = stream.count();Use specialized collectors for primitives
// Avoid boxing int sum = stream.collect(Collectors.summingInt(Item::quantity)); // vs Integer sum = stream.map(Item::quantity).reduce(0, Integer::sum);Avoid unnecessary intermediate collections
// Inefficient List<String> temp = stream.collect(Collectors.toList()); Set<String> result = new HashSet<>(temp); // Efficient Set<String> result = stream.collect(Collectors.toSet());Use
Collectors.toMapcarefully with duplicates// Will throw if duplicate keys exist Map<String, User> map = users.stream() .collect(Collectors.toMap(User::email, u -> u)); // Handle duplicates explicitly Map<String, User> map = users.stream() .collect(Collectors.toMap( User::email, u -> u, (existing, replacement) -> existing // keep first ));
Common Patterns
Frequency counting:
Map<String, Long> wordFrequency = words.stream()
.collect(Collectors.groupingBy(
word -> word,
Collectors.counting()
));
Finding min/max in groups:
Map<String, Optional<Employee>> highestPaidByDept = employees.stream()
.collect(Collectors.groupingBy(
Employee::department,
Collectors.maxBy(Comparator.comparingInt(Employee::salary))
));
Boolean logic on groups:
Map<String, Boolean> allActiveByDept = employees.stream()
.collect(Collectors.groupingBy(
Employee::department,
Collectors.mapping(
Employee::isActive,
Collectors.reducing(true, Boolean::logicalAnd)
)
));