21.2 Locale-Based Operations and CLDR Integration

Building on fundamental locale understanding, this section explores practical operations that depend on locale information, including sorting, collation, currency codes, and integration with the Unicode Common Locale Data Repository (CLDR).

Collation and Locale-Based Sorting

Different locales have different rules for sorting strings, especially with special characters and accents.

// Collation and sorting with locales
import java.text.Collator;
import java.util.Arrays;
import java.util.Locale;

// Creating collators for different locales
Collator englishCollator = Collator.getInstance(Locale.ENGLISH);
Collator germanCollator = Collator.getInstance(Locale.GERMANY);
Collator spanishCollator = Collator.getInstance(Locale.forLanguageTag("es-ES"));

// Example strings with accents and special characters
String[] words = {"café", "carrot", "apple", "über", "éclair", "zulu"};

// Sort using English collation
String[] englishSorted = words.clone();
Arrays.sort(englishSorted, englishCollator);
System.out.println("English sort: " + Arrays.toString(englishSorted));
// Output: [apple, café, carrot, éclair, über, zulu]

// Sort using German collation
String[] germanSorted = words.clone();
Arrays.sort(germanSorted, germanCollator);
System.out.println("German sort: " + Arrays.toString(germanSorted));
// Ü treated differently than in English

// Collation strength levels
// PRIMARY: Ignores accents and case differences (a = à = A)
// SECONDARY: Considers accents but ignores case (a ≠ à = À)
// TERTIARY: Considers both accents and case (default)
// IDENTICAL: Considers all differences including internal Unicode values

englishCollator.setStrength(Collator.PRIMARY);
System.out.println("Compare 'café' vs 'cafe' (PRIMARY): " + 
    englishCollator.compare("café", "cafe")); // 0 (equal)

englishCollator.setStrength(Collator.SECONDARY);
System.out.println("Compare 'café' vs 'cafe' (SECONDARY): " + 
    englishCollator.compare("café", "cafe")); // ≠ 0 (different)

// Case-insensitive collation
englishCollator.setStrength(Collator.SECONDARY);
System.out.println("Compare 'Hello' vs 'hello': " + 
    englishCollator.compare("Hello", "hello")); // 0 (case ignored)

Currency and Locale

Currency information is deeply tied to locale. Java provides access to currency codes, symbols, and formatting information.

// Currency operations with locales
import java.util.Currency;
import java.util.Locale;
import java.text.NumberFormat;

// Getting currency for a locale
Currency usCurrency = Currency.getInstance(Locale.US); // USD
Currency euroCurrency = Currency.getInstance(Locale.FRANCE); // EUR
Currency jpyCurrency = Currency.getInstance(Locale.JAPAN); // JPY

System.out.println("US Currency: " + usCurrency.getCurrencyCode()); // USD
System.out.println("Euro Currency: " + euroCurrency.getCurrencyCode()); // EUR
System.out.println("JPY Currency: " + jpyCurrency.getCurrencyCode()); // JPY

// Currency symbols
System.out.println("US Symbol: " + usCurrency.getSymbol()); // $
System.out.println("US Symbol in German locale: " + 
    usCurrency.getSymbol(Locale.GERMANY)); // US$

System.out.println("Euro Symbol: " + euroCurrency.getSymbol()); // €

// Number of fraction digits
System.out.println("USD fraction digits: " + usCurrency.getDefaultFractionDigits()); // 2
System.out.println("JPY fraction digits: " + jpyCurrency.getDefaultFractionDigits()); // 0

// Formatting currency amounts
double price = 1234.56;
NumberFormat usFormat = NumberFormat.getCurrencyInstance(Locale.US);
NumberFormat gerFormat = NumberFormat.getCurrencyInstance(Locale.GERMANY);
NumberFormat jpFormat = NumberFormat.getCurrencyInstance(Locale.JAPAN);

System.out.println("US Format: " + usFormat.format(price)); // $1,234.56
System.out.println("German Format: " + gerFormat.format(price)); // 1.234,56 €
System.out.println("JP Format: " + jpFormat.format(price)); // ¥1,235

// Creating custom currency
Currency btc = Currency.getInstance("XXX"); // Currency code for custom currencies

// All available currencies
java.util.Set<Currency> allCurrencies = Currency.getAvailableCurrencies();
System.out.println("Total currencies: " + allCurrencies.size());

// Find currencies by code
Currency gbp = Currency.getInstance("GBP");
System.out.println("GBP: " + gbp.getDisplayName()); // British Pound

Language Display Names

Locales can provide display names for themselves and other locales, useful for language selection menus.

// Language display names and menus
import java.util.Locale;
import java.util.TreeMap;
import java.util.Map;

// Building a language selection menu
Map<String, Locale> languageMenu = new TreeMap<>();

Locale[] baseLanguages = {
    Locale.ENGLISH,
    Locale.FRENCH,
    Locale.GERMAN,
    Locale.ITALIAN,
    Locale.SPANISH,
    Locale.JAPANESE,
    Locale.CHINESE,
    Locale.KOREAN,
    new Locale("pt", "BR"),
    new Locale("ru")
};

// Add display name in English
for (Locale lang : baseLanguages) {
    String displayName = lang.getDisplayLanguage(Locale.ENGLISH);
    languageMenu.put(displayName, lang);
}

System.out.println("Language Menu (English):");
languageMenu.forEach((name, locale) -> 
    System.out.println("  " + name + " (" + locale.toLanguageTag() + ")")
);

// Add display name in native language (better UX)
Map<String, Locale> nativeLanguageMenu = new TreeMap<>(
    String.CASE_INSENSITIVE_ORDER
);

for (Locale lang : baseLanguages) {
    String displayName = lang.getDisplayLanguage(lang); // Display in native language
    nativeLanguageMenu.put(displayName, lang);
}

System.out.println("\nLanguage Menu (Native Names):");
nativeLanguageMenu.forEach((name, locale) -> 
    System.out.println("  " + name)
);

// Full locale display name with country
Locale[] locales = {Locale.US, Locale.UK, Locale.FRANCE, Locale.GERMANY, Locale.JAPAN};

System.out.println("\nFull Locale Display Names:");
for (Locale loc : locales) {
    String displayName = loc.getDisplayName(loc); // English: "English (United States)"
    System.out.println("  " + displayName);
}

ISO Code and Locale Lookup

Working with ISO 639 (language), ISO 3166 (country), and ISO 4217 (currency) codes.

// ISO codes and locale lookup
import java.util.Locale;
import java.util.Currency;

class IsoCodeLookup {
    // ISO 639 language codes (2 or 3 letter)
    public static Locale getLocaleByLanguageCode(String languageCode) {
        Locale[] locales = Locale.getAvailableLocales();
        for (Locale locale : locales) {
            if (locale.getLanguage().equals(languageCode)) {
                return locale;
            }
        }
        return null;
    }

    // ISO 3166 country codes (2 letter)
    public static Locale getLocaleByCountryCode(String countryCode) {
        Locale[] locales = Locale.getAvailableLocales();
        for (Locale locale : locales) {
            if (locale.getCountry().equals(countryCode)) {
                return locale;
            }
        }
        return null;
    }

    // ISO 4217 currency codes (3 letter)
    public static String getCurrencyForLocale(Locale locale) {
        try {
            Currency currency = Currency.getInstance(locale);
            return currency.getCurrencyCode();
        } catch (IllegalArgumentException e) {
            return null; // Locale may not have associated currency
        }
    }

    // Look up locale by language + country
    public static Locale createLocaleFromCodes(String language, String country) {
        return new Locale(language, country);
    }
}

// Using ISO code lookup
System.out.println("Language code 'en': " + 
    IsoCodeLookup.getLocaleByLanguageCode("en")); // en_US

System.out.println("Language code 'de': " + 
    IsoCodeLookup.getLocaleByLanguageCode("de")); // de_DE

System.out.println("Country code 'FR': " + 
    IsoCodeLookup.getLocaleByCountryCode("FR")); // fr_FR

System.out.println("Currency for US: " + 
    IsoCodeLookup.getCurrencyForLocale(Locale.US)); // USD

System.out.println("Currency for Japan: " + 
    IsoCodeLookup.getCurrencyForLocale(Locale.JAPAN)); // JPY

Locale-Aware String Operations

String operations like uppercase/lowercase conversion differ across locales.

// Locale-specific string operations
import java.util.Locale;

// Uppercase with locale
String text = "istanbul";
String englishUpper = text.toUpperCase(Locale.ENGLISH); // ISTANBUL
String turkishUpper = text.toUpperCase(new Locale("tr")); // İSTANBUL

System.out.println("English uppercase: " + englishUpper);
System.out.println("Turkish uppercase: " + turkishUpper);

// The difference: In Turkish, 'i' becomes 'İ' (with dot) and 'ı' becomes 'I' (without dot)

// Lowercase with locale
String titleText = "ISTANBUL";
String englishLower = titleText.toLowerCase(Locale.ENGLISH); // istanbul
String turkishLower = titleText.toLowerCase(new Locale("tr")); // istanbul

System.out.println("English lowercase: " + englishLower);
System.out.println("Turkish lowercase: " + turkishLower);

// Case conversion utilities
class LocaleAwareStringUtils {
    public static String toCaseInsensitiveKey(String value, Locale locale) {
        // Used for case-insensitive comparison across locales
        return value.toLowerCase(locale);
    }

    public static String toDisplayCase(String value, Locale locale) {
        // First character uppercase, rest lowercase
        if (value == null || value.isEmpty()) {
            return value;
        }
        return value.substring(0, 1).toUpperCase(locale) +
               value.substring(1).toLowerCase(locale);
    }
}

String name = "john smith";
System.out.println("Display case: " + 
    LocaleAwareStringUtils.toDisplayCase(name, Locale.ENGLISH)); // John smith

Locale Variations by Region

Same language but different locales may have different conventions.

// Regional variations
import java.util.Locale;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.text.NumberFormat;

Date now = new Date();

// English variations
Locale[] englishLocales = {Locale.US, Locale.UK, Locale.CANADA,
                           Locale.forLanguageTag("en-AU"),
                           Locale.forLanguageTag("en-NZ")};

System.out.println("Date format across English locales:");
for (Locale locale : englishLocales) {
    DateFormat df = DateFormat.getDateInstance(DateFormat.LONG, locale);
    String formatted = df.format(now);
    System.out.println("  " + locale.getDisplayName() + ": " + formatted);
}

// Spanish variations
Locale[] spanishLocales = {Locale.forLanguageTag("es-ES"),
                           Locale.forLanguageTag("es-MX"),
                           Locale.forLanguageTag("es-AR")};

System.out.println("\nNumber format across Spanish locales:");
double number = 1234.56;
for (Locale locale : spanishLocales) {
    NumberFormat nf = NumberFormat.getInstance(locale);
    String formatted = nf.format(number);
    System.out.println("  " + locale.getDisplayName() + ": " + formatted);
}

// Portuguese variations
Locale[] portugueseLocales = {Locale.forLanguageTag("pt-PT"),
                              new Locale("pt", "BR")};

System.out.println("\nCurrency format across Portuguese locales:");
for (Locale locale : portugueseLocales) {
    NumberFormat nf = NumberFormat.getCurrencyInstance(locale);
    String formatted = nf.format(number);
    System.out.println("  " + locale.getDisplayName() + ": " + formatted);
}

CLDR (Common Locale Data Repository) Concepts

Java 8+ includes CLDR data for comprehensive locale information. Understanding CLDR helps with advanced localization.

// CLDR integration and concepts
import java.util.Locale;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Calendar;

// CLDR provides data for:
// 1. Number formats (separators, grouping)
// 2. Date/time patterns (order of components)
// 3. Collation (sorting rules)
// 4. Calendar systems (when weeks start, etc.)
// 5. Plural rules (singular, plural, dual, etc.)
// 6. Transliterations

// Week start varies by locale
Locale us = Locale.US;
Locale france = Locale.FRANCE;

Calendar usCalendar = Calendar.getInstance(us);
Calendar frCalendar = Calendar.getInstance(france);

System.out.println("US first day of week: " + 
    usCalendar.getFirstDayOfWeek()); // 1 = SUNDAY

System.out.println("France first day of week: " + 
    frCalendar.getFirstDayOfWeek()); // 2 = MONDAY

// Pattern variation based on locale
SimpleDateFormat usFormat = new SimpleDateFormat("M/d/yy", Locale.US); // 1/15/25
SimpleDateFormat frFormat = new SimpleDateFormat("d/M/yy", Locale.FRANCE); // 15/01/25

Calendar cal = Calendar.getInstance();
cal.set(2025, Calendar.JANUARY, 15);
System.out.println("US date format: " + usFormat.format(cal.getTime()));
System.out.println("France date format: " + frFormat.format(cal.getTime()));

// Number pattern variation
class LocaleDataInfo {
    public static String getNumberFormatPattern(Locale locale) {
        // Get decimal separator for locale
        java.text.DecimalFormat df = 
            (java.text.DecimalFormat) java.text.NumberFormat.getInstance(locale);
        char decimalSeparator = df.getDecimalFormatSymbols().getDecimalSeparator();
        char groupingSeparator = df.getDecimalFormatSymbols().getGroupingSeparator();

        return String.format("Decimal: '%c', Grouping: '%c'", 
                           decimalSeparator, groupingSeparator);
    }
}

System.out.println("US number format: " + 
    LocaleDataInfo.getNumberFormatPattern(Locale.US)); // Decimal: '.', Grouping: ','

System.out.println("Germany number format: " + 
    LocaleDataInfo.getNumberFormatPattern(Locale.GERMANY)); // Decimal: ',', Grouping: '.'

Best Practices

  • Use Collator for locale-aware sorting: Never use simple string comparison for user-facing sort operations.
  • Be aware of regional variations: Same language code may have different conventions in different countries.
  • Cache Collator and Currency instances: These are relatively expensive to create.
  • Handle currency formatting locale-aware: Currency symbol placement and separators vary significantly.
  • Test with multiple locales: Especially when sorting, formatting, or comparing strings.
  • Use ISO codes for data interchange: Language, country, and currency codes are consistent across systems.
  • Account for locale changes: Applications may need to support user locale changes mid-session.
  • Document locale limitations: Note which locales are fully supported vs. partially supported.