package HTML::Entities::Numbered;
use strict;
use HTML::Entities::Numbered::Table;
use base qw(Exporter);
use vars qw($VERSION @EXPORT %DECIMALS %ENTITIES);
@EXPORT = qw(
name2decimal name2hex name2decimal_xml name2hex_xml decimal2name hex2name
);
$VERSION = '0.04';
BEGIN { %ENTITIES = reverse %DECIMALS }
sub name2decimal {
my $content = shift;
$content =~ s/(&[a-z0-9]+;)/_convert2num($1, '%d;')/ieg;
return $content;
}
sub name2hex {
my $content = shift;
$content =~ s/(&[a-z0-9]+;)/_convert2num($1, '%X;')/ieg;
return $content;
}
sub name2decimal_xml {
my $content = shift;
$content =~ s{(&(?:(lt|gt|amp|quot|apos)|[a-z0-9]+);)}
{ $2 ? $1 : _convert2num($1, '%d;') }ieg;
return $content;
}
sub name2hex_xml {
my $content = shift;
$content =~ s{(&(?:(lt|gt|amp|quot|apos)|[a-z0-9]+);)}
{ $2 ? $1 : _convert2num($1, '%X;') }ieg;
return $content;
}
sub decimal2name {
my $content = shift;
$content =~ s/(\d+;)/_convert2name($1)/ieg;
return $content;
}
sub hex2name {
my $content = shift;
$content =~ s/([a-f0-9]+;)/_convert2name($1)/ieg;
return $content;
}
sub _convert2num {
my ( $reference, $format ) = @_;
my ($name) = $reference =~ /^&([a-z0-9]+);$/i;
return exists $DECIMALS{$name}
? sprintf( $format, $DECIMALS{$name} )
: $reference;
}
sub _convert2name {
my $reference = shift;
my ( $is_hex, $decimal ) = $reference =~ /^(x?)([a-f0-9]+);$/i;
$decimal = sprintf( '%d', ( $is_hex ? hex($decimal) : $decimal ) );
return exists $ENTITIES{$decimal}
? sprintf( '&%s;', $ENTITIES{$decimal} )
: $reference;
}
1;
__END__
=head1 NAME
HTML::Entities::Numbered - Conversion of numbered HTML entities
=head1 SYNOPSIS
use HTML::Entities::Numbered;
$html = 'Hi Honey♥';
# convert named HTML entities to numbered (decimal)
$decimal = name2decimal($html); # Hi Honey♥
# to numbered (hexadecimal)
$hex = name2hex($html); # Hi Honey♥
$content = 'Copyright © Larry Wall';
# convert numbered HTML entities (decimal) to named
$name1 = decimal2name($content); # Copyright © Larry Wall
$content = 'Copyright © Larry Wall';
# convert numbered HTML entitites (hexadecimal) to named
$name2 = hex2name($content); # Copyright © Larry Wall
$xml = '"Give me ¥10,000" > cherie♠';
# convert named HTML entities to numbered
# except valid XML entities (decimal)
$decimal = name2decimal_xml($xml); # "Give me ¥10,000"
# > cherie♠
# to numbered except valid XML entities (hexdecimal)
$hex = name2hex_xml($xml); # "Give me ¥10,000"
# > cherie♠
=head1 DESCRIPTION
HTML::Entities::Numbered is a content conversion filter for named HTML
entities (symbols, mathmetical symbols, Greek letters, Latin letters,
etc.).
When an argument of C or C contains some
B HTML entities, they will be replaced to numbered HTML
entities. And when an argument of C or
C contains some B numbered HTML entities,
they will be replaced to numbered HTML entities B (the excepted "valid XML entities" are the following five
entities: C<<>, C<>>, C<&>, C<">, C<'>).
By the same token, when an argument of C or
C contains some B numbered HTML entities, they
will be replaced to named HTML entities.
(the exception "valid XML entities" means the following five entities:
C<<>, C<>>, C<&>, C<">, C<'>)
On version 0.03, the entities hash table is imported from
L (with obsolete class
C for older releases of Perl).
At the moment, 0.04 (or later) is included
L to import HTML entities table, and
thereby we do not need to have L (included in
L distribution).
This may be also useful for making valid XML (corrects the undefined
entity references, and enhanced by addition of functions conform to
the XML).
=head1 FUNCTIONS
Following all functions are exported by default.
=over 4
=item * name2decimal
Some included named HTML entities in argument of C
will be replaced to decimal numbered HTML entities.
=item * name2hex
Some included named HTML entities in argument of C
will be replaced to hexadecimal numbered HTML entities.
=item * decimal2name
Some include decimal numbered HTML entities in argument of
C will be replaced to named HTML entities
(If they're nameable).
=item * hex2name
Some include hexadecimal numbered HTML entities in argument of
C will be replaced to named HTML entities
(If they're nameable).
=item * name2decimal_xml
Some included named HTML entities in argument of C
will be replaced to decimal numbered HTML entities B.
=item * name2hex_xml
Some included named HTML entities in argument of C
will be replaced to hexadecimal numbered HTML entities B.
=back
If you'd prefer not to import them functions into the caller's
namespace, you can call them as below:
use HTML::Entities::Numbered ();
$decimal = HTML::Entities::Numbered::name2decimal($str);
$hex = HTML::Entities::Numbered::name2hex($str);
$named1 = HTML::Entities::Numbered::decimal2name($str);
$named2 = HTML::Entities::Numbered::hex2name($str);
$decimal = HTML::Entities::Numbered::name2decimal_xml($str);
$hex = HTML::Entities::Numbered::name2hex_xml($str);
=head1 AUTHOR
Koichi Taniguchi Etaniguchi@livedoor.jpE
Develop triggered by IKEBE Tomohiro Eikebe@cpan.orgE
Many thanks to Tatsuhiko Miyagawa Emiyagawa@cpan.orgE
=head1 COPYRIGHT
Copyright (c) 2004 Koichi Taniguchi. Japan. All rights reserved.
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=head1 SEE ALSO
L,
L
=cut