Server IP : 85.214.239.14 / Your IP : 18.191.147.146 Web Server : Apache/2.4.62 (Debian) System : Linux h2886529.stratoserver.net 4.9.0 #1 SMP Tue Jan 9 19:45:01 MSK 2024 x86_64 User : www-data ( 33) PHP Version : 7.4.18 Disable Function : pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_get_handler,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,pcntl_async_signals,pcntl_unshare, MySQL : OFF | cURL : OFF | WGET : ON | Perl : ON | Python : ON | Sudo : ON | Pkexec : OFF Directory : /lib/python3/dist-packages/chardet/cli/ |
Upload File : |
""" Script which takes one or more file paths and reports on their detected encodings Example:: % chardetect somefile someotherfile somefile: windows-1252 with confidence 0.5 someotherfile: ascii with confidence 1.0 If no paths are provided, it takes its input from stdin. """ import argparse import sys from typing import Iterable, List, Optional from .. import __version__ from ..universaldetector import UniversalDetector def description_of( lines: Iterable[bytes], name: str = "stdin", minimal: bool = False, should_rename_legacy: bool = False, ) -> Optional[str]: """ Return a string describing the probable encoding of a file or list of strings. :param lines: The lines to get the encoding of. :type lines: Iterable of bytes :param name: Name of file or collection of lines :type name: str :param should_rename_legacy: Should we rename legacy encodings to their more modern equivalents? :type should_rename_legacy: ``bool`` """ u = UniversalDetector(should_rename_legacy=should_rename_legacy) for line in lines: line = bytearray(line) u.feed(line) # shortcut out of the loop to save reading further - particularly useful if we read a BOM. if u.done: break u.close() result = u.result if minimal: return result["encoding"] if result["encoding"]: return f'{name}: {result["encoding"]} with confidence {result["confidence"]}' return f"{name}: no result" def main(argv: Optional[List[str]] = None) -> None: """ Handles command line arguments and gets things started. :param argv: List of arguments, as if specified on the command-line. If None, ``sys.argv[1:]`` is used instead. :type argv: list of str """ # Get command line arguments parser = argparse.ArgumentParser( description=( "Takes one or more file paths and reports their detected encodings" ) ) parser.add_argument( "input", help="File whose encoding we would like to determine. (default: stdin)", type=argparse.FileType("rb"), nargs="*", default=[sys.stdin.buffer], ) parser.add_argument( "--minimal", help="Print only the encoding to standard output", action="store_true", ) parser.add_argument( "-l", "--legacy", help="Rename legacy encodings to more modern ones.", action="store_true", ) parser.add_argument( "--version", action="version", version=f"%(prog)s {__version__}" ) args = parser.parse_args(argv) for f in args.input: if f.isatty(): print( "You are running chardetect interactively. Press " "CTRL-D twice at the start of a blank line to signal the " "end of your input. If you want help, run chardetect " "--help\n", file=sys.stderr, ) print( description_of( f, f.name, minimal=args.minimal, should_rename_legacy=args.legacy ) ) if __name__ == "__main__": main()