NAME UTF8::R2 - makes UTF-8 scripting easy for enterprise use SYNOPSIS use UTF8::R2; use UTF8::R2 ver.sion; # match or die use UTF8::R2 qw( RFC3629 ); # m/./ matches RFC3629 codepoint (default) use UTF8::R2 qw( RFC2279 ); # m/./ matches RFC2279 codepoint use UTF8::R2 qw( WTF8 ); # m/./ matches WTF-8 codepoint use UTF8::R2 qw( RFC3629.ja_JP ); # optimized RFC3629 for ja_JP use UTF8::R2 qw( WTF8.ja_JP ); # optimized WTF-8 for ja_JP use UTF8::R2 qw( %mb ); # multibyte regex by %mb use UTF8::R2 qw( *mb ); # multibyte regex by %mb, and mb::* subroutines DESCRIPTION UTF8::R2 module provides minimal UTF-8 subroutines for stable scripting environment, using no utf8 pragma, no UTF-8 flag. # on use UTF8::R2 qw( RFC2279 ); # m/./ means # beautiful concept in young days # https://www.ietf.org/rfc/rfc2279.txt 'RFC2279' => qr{(?>@{[join('', qw( [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF] | [\xC2-\xDF][\x80-\xBF] | [\xE0-\xEF][\x80-\xBF][\x80-\xBF] | [\xF0-\xF4][\x80-\xBF][\x80-\xBF][\x80-\xBF] | [\x00-\xFF] ))]})}x, # on use UTF8::R2; # or use UTF8::R2 qw( RFC3629 ); # m/./ means # https://tools.ietf.org/rfc/rfc3629.txt 'RFC3629' => qr{(?>@{[join('', qw( [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF] | [\xC2-\xDF][\x80-\xBF] | [\xE0-\xE0][\xA0-\xBF][\x80-\xBF] | [\xE1-\xEC][\x80-\xBF][\x80-\xBF] | [\xED-\xED][\x80-\x9F][\x80-\xBF] | [\xEE-\xEF][\x80-\xBF][\x80-\xBF] | [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] | [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] | [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] | [\x00-\xFF] ))]})}x, # or use UTF8::R2 qw( WTF8 ); # m/./ means # http://simonsapin.github.io/wtf-8/ 'WTF8' => qr{(?>@{[join('', qw( [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF] | [\xC2-\xDF][\x80-\xBF] | [\xE0-\xE0][\xA0-\xBF][\x80-\xBF] | [\xE1-\xEF][\x80-\xBF][\x80-\xBF] | [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] | [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] | [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] | [\x00-\xFF] ))]})}x, # or use UTF8::R2 qw( RFC3629.ja_JP ); # m/./ means # optimized RFC3629 for ja_JP 'RFC3629.ja_JP' => qr{(?>@{[join('', qw( [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF] | [\xE1-\xEC][\x80-\xBF][\x80-\xBF] | [\xC2-\xDF][\x80-\xBF] | [\xEE-\xEF][\x80-\xBF][\x80-\xBF] | [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] | [\xE0-\xE0][\xA0-\xBF][\x80-\xBF] | [\xED-\xED][\x80-\x9F][\x80-\xBF] | [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] | [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] | [\x00-\xFF] ))]})}x, # or use UTF8::R2 qw( WTF8.ja_JP ); # m/./ means # optimized WTF-8 for ja_JP 'WTF8.ja_JP' => qr{(?>@{[join('', qw( [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF] | [\xE1-\xEF][\x80-\xBF][\x80-\xBF] | [\xC2-\xDF][\x80-\xBF] | [\xE0-\xE0][\xA0-\xBF][\x80-\xBF] | [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] | [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] | [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] | [\x00-\xFF] ))]})}x, SUBROUTINES VERY USEFUL UTF-8 CODEPOINT FEATURE UTF8::R2::length($_) UTF8::R2::qr(qr/ utf8_regex_here . \D \H \N \R \S \V \W \b \d \h \s \v \w \x{UTF8hex} [ \D \H \S \V \W \b \d \h \s \v \w \x{UTF8hex} \x{UTF8hex}-\x{UTF8hex} [:POSIX:] [:^POSIX:] ] ? + * {n} {n,} {n,m} /imsxo) # no /gc UTF8::R2::split(qr/$utf8regex/imsxo, $_, 3) UTF8::R2::substr($_, 0, 5) UTF8::R2::tr($_, 'ABC', 'XYZ', 'cdsr') use UTF8::R2 qw(%mb); $_ =~ $mb{qr/$utf8regex/imsxo} # no /gc $_ =~ m<\G$mb{qr/$utf8regex/imsxo}>gc $_ =~ s<$mb{qr/before/imsxo}>egr DEPENDENCIES perl version 5.005_03 to newest perl INSTALLATION perl Makefile.PL make make test make install AUTHOR INABA Hitoshi This software was made referring to software and the document that the following hackers or persons had made. I am thankful to all persons. Larry Wall, Perl http://www.perl.org/ LICENSE AND COPYRIGHT This software is free software; you can redistribute it and/or modify it under the same terms as Perl itself. See L. SEE ALSO http://search.cpan.org/~ina/ http://backpan.perl.org/authors/id/I/IN/INA/