From 80b82c506c747a32d551545abcd5be9dff8a9a03 Mon Sep 17 00:00:00 2001 From: root Date: Sat, 11 Apr 2015 18:27:25 +0000 Subject: *** empty log message *** --- src/gentables | 307 ++++ src/table/big5.h | 2436 ++++++++++++++++++++++++++++++ src/table/gb2312_1980_0.h | 2229 ++++++++++++++++++++++++++++ src/table/iso8859_8.h | 31 + src/table/jis0201_1976_0.h | 20 + src/table/ksc5601_1987_0.h | 3494 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 8517 insertions(+) create mode 100755 src/gentables create mode 100644 src/table/big5.h create mode 100644 src/table/gb2312_1980_0.h create mode 100644 src/table/iso8859_8.h create mode 100644 src/table/jis0201_1976_0.h create mode 100644 src/table/ksc5601_1987_0.h diff --git a/src/gentables b/src/gentables new file mode 100755 index 0000000..262169d --- /dev/null +++ b/src/gentables @@ -0,0 +1,307 @@ +#!/opt/bin/perl + +# the generated tables mostly have NOT been checked so far! + +use v5.8.2; +use utf8; +use Encode; +use Encode::JP; +use Encode::CN; +use Encode::KR; +use Encode::HanExtra; +use Encode::JIS2K; + +my $gen; + +$TO_UNICODE = 0; # also generate to_unicode tables + +sub linear { + my ($a, $l, $h, $b) = @_; + for ($l .. $h) { + return unless defined $a->[$_] && $a->[$_] == $_ + ($b - $l); + } + 1; +} + +sub wrap { + my $res = " "; + $res .= "$1\n " while $_[0] =~ /\G(.{90}\S*?)\s+/gc; + $res .= "$1" if $_[0] =~ /\G(.*)$/; + $res; +} + +my $last_tab_full; # hack + +sub gentab { + my ($enc, $l, $h, $f, $rep) = @_; + $last_tab_full = 0; + + " = {\n" + . (wrap join ", ", + map +(sprintf "0x$f", + defined $enc->[$_] ? $enc->[$_] : $last_tab_full++ * 0 + $rep + ), $l..$h) + . "\n};\n"; +} + +sub gen { + my ($enc, $base, $ch) = @_; + + my ($fun, $tab); + my (@t1, @t2); + + for (255, 159, 127, 126, 125) { + if (linear $enc, 0, $_, 0) { + undef $enc->[$_] for 0..$_; + $fun .= sprintf "if (· <= 0x%04x) return ·;\n", $_; + } + } + + for (126, 127, 128) { + if (linear $enc, $_, 159, $_) { + undef $enc->[$_] for $_..159; + $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x) return ·;\n", $_, 159; + } + } + + for (126, 127) { + next unless defined $enc->[$_]; + $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $_, delete $enc->[$_]; + } + + my @map; + my @map2; + + for (0 .. $#$enc) { + if (defined $enc->[$_]) { + $map[$_] = $enc->[$_]; + $map2[$_ >> 8]++; + } + } + + for my $p (0..255) { + if ($map2[$p]) { + my $b = $p << 8; + + my ($l, $h); + for my $i (0..255) { $l = $i, last if defined $map[$b + $i]; } + for my $i (0..255) { $h = 255 - $i, last if defined $map[$b + 255 - $i]; } + + if ($map2[$p] <= 5) { + for ($l .. $h) { + next unless defined $enc->[$b + $_]; + $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $b + $_, $enc->[$b + $_]; + } + } elsif (linear $enc, $b + $l, $b + $h, $enc->[$b + $l]) { + my $dif = $enc->[$b + $l] - ($b + $l); + $dif = $dif < 0 ? sprintf "- 0x%04x", -$dif : sprintf "+ 0x%04x", $dif; + $fun .= sprintf "if (· >= 0x%04x && 0x%04x >= ·) return · %s;\n", $b + $l, $b + $h, $dif; + } elsif ($map2[$p] <= 5) { # defunct + $fun .= "switch (·)\n {\n"; + for ($l .. $h) { + next unless defined $enc->[$b + $_]; + $fun .= sprintf " case 0x%04x: return 0x%04x;\n", $b + $_, $enc->[$b + $_]; + } + $fun .= " }\n"; + } else { + my ($i, $t, $f) = (0, "uint8_t", "%02x"); + for ($l .. $h) { + if ($enc->[$b + $_] > 255) { + ($i, $t, $f) = (1, "uint16_t", "%04x"); + last; + } + } + + $i ? push @t2, [$p, $l, $h, $t, $f] + : push @t1, [$b, $l, $h, $t, $f]; + } + + } + } + + if (@t2 <= 3) { + push @t1, @t2; + @t2 = (); + } + + for (@t1) { + my ($b, $l, $h, $t, $f) = @$_; + my $rep = $b + $l == 0 ? 128 : 0; + $tab .= "static const $t $base\_$b\[]" . gentab $enc, $b + $l, $b + $h, $f; + $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x)\n" + . " return %s$base\_$b\[· - 0x%04x];\n", + $b + $l, $b + $h, + ($last_tab_full ? sprintf "$base\_$b\[· - 0x%04x] == $rep ? NOCHAR : ", $b + $l : ""), + $b + $l; + } + + if (@t2) { + my ($min, $max) = (255, 0); + my ($l, $h) = (255, 0); + for (@t2) { + my ($p, $L, $H, $t, $f) = @$_; + $max = $p if $p > $max; + $min = $p if $p < $min; + $l = $L if $L < $l; + $h = $H if $H > $h; + } + + $fun .= "uint8_t l = ·;\n"; + $fun .= "uint16_t h = · >> 8;\n"; + + if ($h - $l < 200) { + my $d = $h - $l + 1; + + my @enc; + for $p ($min .. $max) { + for $i ($l .. $h) { + $enc[($p - $min) * $d + $i - $l] = $enc->[$p * 256 + $i]; + } + } + $tab .= "static const uint16_t $base\_m[]" . gentab \@enc, 0, $#enc, "%04x"; + + if ($last_tab_full) { + $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n" + . " && 0x%02x <= l && l <= 0x%02x)\n" + . " return $base\_m\[h * 0x%02x + l - 0x%04x]\n" + . " ? $base\_m\[h * 0x%02x + l - 0x%04x]\n" + . " : NOCHAR;\n", + $min, $max, $l, $h, + ($d, $min * $d + $l) x 2; + } else { + $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n" + . " && 0x%02x <= l && l <= 0x%02x)\n" + . " return $base\_m\[h * 0x%02x + l - 0x%04x];\n", + $min, $max, $l, $h, + $d, + $min * $d + $l; + } + } else { + my @tab = (0) x ($max - $min); + for (@t2) { + my ($p, undef, undef, $t, $f) = @$_; + $tab .= "static const $t $base\_$p\[]" . gentab $enc, $p * 256 + $l, $p * 256 + $h, $f, 0; + $tab[$p - $min] = "$base\_$p"; + } + + $tab .= "const uint16_t *$base\_i[] = {\n" + . (wrap join ", ", @tab) + . "\n};\n\n"; + + $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n" + . " && 0x%02x <= l && l <= 0x%02x\n" + . " && $base\_i[h - 0x%02x])\n" + . " return $base\_i\[h - 0x%02x][l - 0x%02x]\n" + . " ? $base\_i\[h - 0x%02x][l - 0x%02x]\n" + . " : NOCHAR;\n", + $min, $max, $l, $h, + $min, ($min, $l) x 2; + } + } + + $fun .= "return NOCHAR;\n"; + + $fun =~ s/·/$ch/g; + + ($tab, $fun); +} + +while () { + my ($group, $base, $cs, $type) = split /\s+/; + + my @enc1; + my @enc2; + + for (0 .. 65535) { + my $enc = encode $cs, (chr $_), Encode::FB_QUIET; + + if (length $enc) { + my $code = hex unpack "H*", $enc; + $enc1[$_] = $code; + $enc2[$code] = $_ unless defined $enc2[$code]; + } + } + + my ($tab1, $fun1); + my ($tab2, $fun2); + + ($tab1, $fun1) = gen \@enc1, "$base\_f", "unicode"; + ($tab2, $fun2) = gen \@enc2, "$base\_t", "enc" if $TO_UNICODE; + + $fun1 =~ s/^/ /gm; + $fun2 =~ s/^/ /gm; + + print "$base\n"; + open OUT, ">table/$base.h" or die; + + print OUT <