Server IP : 85.214.239.14 / Your IP : 18.119.117.231 Web Server : Apache/2.4.62 (Debian) System : Linux h2886529.stratoserver.net 4.9.0 #1 SMP Tue Jan 9 19:45:01 MSK 2024 x86_64 User : www-data ( 33) PHP Version : 7.4.18 Disable Function : pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_get_handler,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,pcntl_async_signals,pcntl_unshare, MySQL : OFF | cURL : OFF | WGET : ON | Perl : ON | Python : ON | Sudo : ON | Pkexec : OFF Directory : /usr/share/perl5/XML/SAX/PurePerl/ |
Upload File : |
# $Id$ package XML::SAX::PurePerl; # NB, not ::EncodingDetect! use strict; sub encoding_detect { my ($parser, $reader) = @_; my $error = "Invalid byte sequence at start of file"; my $data = $reader->data; if ($data =~ /^\x00\x00\xFE\xFF/) { # BO-UCS4-be $reader->move_along(4); $reader->set_encoding('UCS-4BE'); return; } elsif ($data =~ /^\x00\x00\xFF\xFE/) { # BO-UCS-4-2143 $reader->move_along(4); $reader->set_encoding('UCS-4-2143'); return; } elsif ($data =~ /^\x00\x00\x00\x3C/) { $reader->set_encoding('UCS-4BE'); return; } elsif ($data =~ /^\x00\x00\x3C\x00/) { $reader->set_encoding('UCS-4-2143'); return; } elsif ($data =~ /^\x00\x3C\x00\x00/) { $reader->set_encoding('UCS-4-3412'); return; } elsif ($data =~ /^\x00\x3C\x00\x3F/) { $reader->set_encoding('UTF-16BE'); return; } elsif ($data =~ /^\xFF\xFE\x00\x00/) { # BO-UCS-4LE $reader->move_along(4); $reader->set_encoding('UCS-4LE'); return; } elsif ($data =~ /^\xFF\xFE/) { $reader->move_along(2); $reader->set_encoding('UTF-16LE'); return; } elsif ($data =~ /^\xFE\xFF\x00\x00/) { $reader->move_along(4); $reader->set_encoding('UCS-4-3412'); return; } elsif ($data =~ /^\xFE\xFF/) { $reader->move_along(2); $reader->set_encoding('UTF-16BE'); return; } elsif ($data =~ /^\xEF\xBB\xBF/) { # UTF-8 BOM $reader->move_along(3); $reader->set_encoding('UTF-8'); return; } elsif ($data =~ /^\x3C\x00\x00\x00/) { $reader->set_encoding('UCS-4LE'); return; } elsif ($data =~ /^\x3C\x00\x3F\x00/) { $reader->set_encoding('UTF-16LE'); return; } elsif ($data =~ /^\x3C\x3F\x78\x6D/) { # $reader->set_encoding('UTF-8'); return; } elsif ($data =~ /^\x3C\x3F\x78/) { # $reader->set_encoding('UTF-8'); return; } elsif ($data =~ /^\x3C\x3F/) { # $reader->set_encoding('UTF-8'); return; } elsif ($data =~ /^\x3C/) { # $reader->set_encoding('UTF-8'); return; } elsif ($data =~ /^[\x20\x09\x0A\x0D]+\x3C[^\x3F]/) { # $reader->set_encoding('UTF-8'); return; } elsif ($data =~ /^\x4C\x6F\xA7\x94/) { $reader->set_encoding('EBCDIC'); return; } warn("Unable to recognise encoding of this document"); return; } 1;