@echo off :: filename: nonasc.btm (4DOS, 4NT batch file) :: author: Eric Pement :: date: 1997-05-24 -- 2011-10-04 :: ver: 1.43 :: requires: getopts, od, sed :: purpose: :: First use is to see if there are any graphics or control :: characters in a file or input stream. The only control chars :: permitted are CR, LF, and TAB. 0x1a (^Z) is allowed only if it :: is the last char of a file. :: :: If the file is plain text, exit code 0 is issued. Any graphics :: chars (hex 80-FF) or control codes (NUL, BS, BEL, FF, ESC, DEL, :: etc.) will be displayed for the first 2 matching lines, and an :: exit code of 1 will be issued. :: :: Second use is as a plain hex/ascii dump of the entire file. :: Numeric option can control the number of output lines. Switch :: can define TAB chars as invalid and locate them in the file. :: :: Update switches :: -z exclude ^Z at EOF :: -r exclude CR from permissible char :: :: Revisions: :: 2011-10-04 - exit if no args and no piped input :: 2010-03-24 - required pgms no longer use %@search[..] :: 2008-07-09 - add inverse video highlight, used -v switch in od setlocal unalias * if "%1" == "/?" .or. "%1" == "-?" .or. "%1" == "--help" goto help :: set HILITE=ON :: Check for required programs :: Cannot use %@search[..] because it is not supported in TCC_LE ( which od | ffind /E"external|alias|batch" /B >NUL ) || ( echo ERROR! 'od' is NOT on the PATH! Exiting... %+ quit) (which sed | ffind /E"external|alias|batch" /B >NUL ) || ( echo ERROR! 'sed' is NOT on the PATH! Exiting... %+ quit) :: Parse switch options using "getopts" (free 4DOS option parser at www.pement.org) set F0=%@truename[%0] set chr_opts=d t r z set num_opts=Y call getopts.btm %[%@option[ParameterChar]] if %? == 1 goto help for %a in (%rev_args) do shift /%a for %c in (%options) do gosub parse :: End the "getopts" block. :: Note: Any and all switches have been removed from the command tail. if "%1" eq "" .and. %_pipe == 0 goto noargs iff defined N .and. %N LT 1 then :: A numeric option cannot be zero (-0) or negative (--32) beep echo Error! Numeric option -n (%n) must be 1 or greater! echo Quitting ... goto end endiff iff "%dump" == "TRUE" then :: If -d is used and -n was not defined, dump the entire file :: echo DIAG: -d was used so dump is true, and N was not defined iff not defined N then :: Syntax for od (octal dump, GNU version): :: -An suppress printing offset numbers before each line :: -tx1 type x = hex, 1 = 1 byte per char :: -a include ASCII equivalents below each line (same as -t a) :: -N i only read i (integer) number of bytes :: -v verbose; do not use '*' to mark line duplication :: -w i output width of i bytes/line (i defaults to 16 chars) od -An -tx1 -a -v %$ | sed "s/ / /g;N;s/$/\n/" :: od -An -tx1 -a %$ | sed "/^\*$/{g;b;};s/ / /g;N;s/$/\n/;h" else :: echo DIAG: -d was used so dump is TRUE, and N was defined as %N :: Limit the output to N lines which = N * 16 bytes. od -An -tx1 -a -v -N %@eval[%N * 16] %$ | sed "s/ / /g;N;s/$/\n/" :: od -An -tx1 -a -N %@eval[%N * 16] %$ | sed "/^\*$/{g;b;};s/ / /g;N;s/$/\n/;h" endiff else :: Just check the file for unprintable characters. :: -d was not used. Set N to 2 unless the user chooses differently if not defined N set N=2 :: echo DIAG: -d was NOT used, and N is set to %N gosub build_sed_script :: Script logic: :: od runs through the entire file and passes the output to sed, :: which looks for forbidden characters. If there are none, the :: output file _NON_ASC.DEL will be empty (0 bytes). If there are :: illegal chars, display the first N lines of the file and then :: delete the sed script and the temp file _NON_ASC.DEL :: od -An -tx1 -a -v %$ | sed -nf NONASC.SED >_NON_ASC.DEL iff %@filesize[_NON_ASC.DEL] gt 0 then :: If no file on command line, print "[stdin]"; else, print [filename]. echo %@if[isfile %1,"%1",[stdin]] contains control codes or graphics characters! echo od -An -tx1 -a %@if[isfile %1,"%1",[stdin]]: (octdump -no_offset -hexcodes -ascii) :: multiply by 3 because each line of "display" is actually 3 lines: :: one hex, one ascii, one cosmetic blank line sed "%@eval[%N * 3]q" _NON_ASC.DEL | type del /q NONASC.SED _NON_ASC.DEL :: issue exit code 1 quit 1 else :: The file looks clean to me. echo %@if[isfile %1,"%1",[stdin]] contains only printable ASCII chars%and_no%. del /q NONASC.SED _NON_ASC.DEL endiff endiff goto end :noargs :: generate 3 autible beeps at the console beep 800 8 2 6 800 8 2 6 800 10 echo `-----[ ERROR! No file to look for, nor any values piped from STDIN! ]-----` echo. :: Then show the help message below :help TEXT NONASC.BTM v1.43 - Look for non-printable chars in file or input. Valid chars include 20h-7E, TAB, CR, and LF. Control codes or graphic chars are considered "non-ASCII". The input file is not changed. Exit code 0 if pure ASCII, or 1 otherwise. Usage: nonasc [-options] [filename] Only 1 filename allowed. If filename is omitted, read from stdin. If no switches are used, look for non-printing characters and display the first 2 lines of non-printing chars, if any. Hits are shown in reverse video. Options: -d Dump the input file in both hex and ASCII to the screen without checking for invalid characters. Works on pure binary files. Long files should be piped through a file pager (e.g., "less"). If the -{n} switch is used, limit the output to {n} lines. -{n} Display {n} lines of input, where {n} is an integer (default: 2) If -d is omitted, display {n} lines of invalid input. If -d is used, display {n} lines beginning at top of file. Numeric switch must be separate from other options! Thus, "-d -5" and "-7 -t" are valid, but "-d5" or "-7t" are not. -t Include TAB (0x09) as invalid char. -t and -d are incompatible. If both are present, -t will be ignored. -r Include CR (0x0D) as invalid char. -z Allow Ctrl-Z (0x0A), but only at EOF. -?, --help Display this help message ENDTEXT goto end :parse switch %c case d set dump=TRUE case t set no_tab=9 set and_no= (and no TABs!) case r set no_cr=d case z set z_at_eof=TRUE endswitch return :build_sed_script TEXT >nonasc.sed # Note that changing each space to 2 spaces on odd-numbered lines # causes the hex codes to line up perfectly with the ASCII values # underneath them. A blank line was added with "G" to make the # output easier to read. The hex values a-f must be entered in # lower-case to match the output of the od utility. # # NB: od output may contain only a single asterisk, which means the # line is a duplicate of the line above it. So we use the hold space # in sed to retain a copy of the previous line. # # NO LONGER NEEDED NOW THAT I KNOW ABOUT THE -v SWITCH! # /^\*$/{ g; # IF asterisk in col 1, replace patt sp with hold sp # b; # Now we're done. Jump to end of script. # }; # ELSE, # s/ / /g; # For odd-numbered lines, change each space to 2. # Converts " 23 21 2f 62 69 6e 2f 73 ..." to # " 23 21 2f 62 69 6e 2f 73 ..." ENDTEXT :: If line has illegal hex codes (below), branch to label :sub :: ILLEGAL: hex 00-08, 0b (VerticalTab), 0c (FF), 0e-0f :: hex 09 (TAB) may optionally be invalid :: hex 0d (CR) may optionally be invalid :: hex 0a (LF) is okay :: -t and -r switches are set here echo /0[0-8%no_tab%bc%no_cr%ef]/{ >> nonasc.sed TEXT >>nonasc.sed s//\x1b[7m&\x1b[0m/g b sub } /1[0-9b-f]/{ # hex 10-19, 1b (ESC), 1e-1f s//\x1b[7m&\x1b[0m/g b sub } ENDTEXT iff "%z_at_eof" == "TRUE" then echo NB: -z switch was used, permitting one Ctrl-Z only at EOF :: Permit ^Z if it is the very last byte of the file. :: on the last line, alert only if 0x1a is NOT the last byte TEXT >>nonasc.sed /1a/ { N; $! { s/1a/\x1b[7m&\x1b[0m/g; s/$/\n/; h;p; } # The last line will contain "sub", not "1a" $ { /sub /{ s/$/\n/;h;p; }; } } ENDTEXT else :: DEFAULT: forbid ^Z everywhere :: on the last line, alert if 0x1a is anywhere on the line TEXT >>nonasc.sed /1a/ { s//\x1b[7m&\x1b[0m/g b sub } ENDTEXT endiff TEXT >>nonasc.sed /7f/ { # hex 7F (DEL) s//\x1b[7m&\x1b[0m/g b sub } /[89a-f][0-9a-f]/ { # hex 80-FF (any high-bit character) s//\x1b[7m&\x1b[0m/g b sub } # Else, line has no forbidden characters, so: n; # skip the even-numbered lines (with ASCII chars) b; # branch to end of this script : sub N;s/$/\n/; # Get Next line, add blank line # h, copy to hold space removed p; # Print the result to stdout # # ENDIFF # ---end of sed script--- ENDTEXT return :end unset /q f0 chr_opts num_opts options rev_args dump n no_tab no_cr z_at_eof endlocal quit 0