#!/usr/bin/perl
my %uv = ();
my %lv = ();
open ( UNICODATA , "< UnicodeData.txt")
|| die "cannot find UnicodeData.txt";
while() {
chop;
my @entry = split(/;/ , $_);
my $codepoint = $entry[0];
my $upper = $entry[12];
my $lower = $entry[13];
unless ($upper eq "") {
$uv{$codepoint} = $upper;
}
unless ($lower eq "") {
$lv{$codepoint} = $lower;
}
}
close(UNIDATA);
open ( UNICODEBLOCKS , "< Blocks.txt")
|| die "cannot find Blocks.txt";
my $data = "";
my $code = <) {
chop;
if (/^($ucp)..($ucp); (.+)$/) { # If the line looks like this pattern
# (example: 0000..007F; Basic Latin)
$first = $1;
$last = $2;
($name = $3) =~ s/[ -]//g;
my $blockHasCasing = 0;
for (hex($first) .. hex($last)) {
my $codepoint = sprintf("%04X", $_);
if (defined $uv{$codepoint} || defined $lv{$codepoint}) {
$blockHasCasing = 1;
last;
}
}
if ($blockHasCasing) {
print("printing data for $name\n");
my $toUpperInput = "const " . $name . "ToUpperInput=\"";
my $toUpperExpected = "const " . $name . "ToUpperExpected=\"";
my $toLowerInput = "const " . $name . "ToLowerInput=\"";
my $toLowerExpected = "const " . $name . "ToLowerExpected=\"";
foreach $u (hex($first) .. hex($last)) {
unless ($u < 0x20) {
my $codepoint = sprintf("%04X", $u);
$toUpperInput .= "\\u$codepoint";
$toUpperExpected .= (exists $uv{$codepoint}) ? "\\u$uv{$codepoint}" : "\\u$codepoint";
$toLowerInput .= "\\u$codepoint";
$toLowerExpected .= (exists $lv{$codepoint}) ? "\\u$lv{$codepoint}" : "\\u$codepoint" ;
}
}
$toUpperInput .= "\";\n";
$toUpperExpected .= "\";\n";
$toLowerInput .= "\";\n";
$toLowerExpected .= "\";\n";
$data .= $toUpperInput . "\n" . $toUpperExpected . "\n" .
$toLowerInput . "\n" . $toLowerExpected . "\n";
$code .= " testUpper(" . $name . "ToUpperInput, " .
$name . "ToUpperExpected);\n";
$code .= " testLower(" . $name . "ToLowerInput, " .
$name . "ToLowerExpected);\n";
}
}
}
close(UNICODEBLOCKS);
$code .= " do_check_true(gPassed);\n}\n";
open ( OUT , "> test_casing.data")
|| die "cannot open output file";
print OUT $data;
print OUT $code;
close(OUT);