#! /usr/bin/perl -w # # checkbashisms.perl # # Version: 2.0.0.2 # Date: 30th January 2011 # # (C) Copyright 1998-2003 Richard Braakman, Josip Rodin and Julian Gilbey # Additional programming by Mark Hobley # # This script is based on source code taken from the lintian project # # This program can be redistributed under the terms of version 2 of the # GNU General Public Licence as published by the Free Software Foundation # use strict; use Getopt::Long; sub init_hashes; (my $progname = $0) =~ s|.*/||; my $usage = <<"EOF"; Usage: $progname [-n] [-f] [-x] script ... or: $progname --help or: $progname --version This script performs basic checks for the presence of bashisms in /bin/sh scripts. EOF my $version = <<"EOF"; This is $progname version 2.0.0.1 (C) Copyright 1998-2003 Richard Braakman, Josip Rodin and Julian Gilbey Additional programming by Mark Hobley EOF my ($opt_echo, $opt_force, $opt_extra, $opt_posix); my ($opt_help, $opt_version); ## ## handle command-line options ## $opt_help = 1 if int(@ARGV) == 0; GetOptions("help|h" => \$opt_help, "version|v" => \$opt_version, "newline|n" => \$opt_echo, "force|f" => \$opt_force, "extra|x" => \$opt_extra, "posix|p" => \$opt_posix, ) or die "Usage: $progname [options] filelist\nRun $progname --help for more details\n"; if ($opt_help) { print $usage; exit 0; } if ($opt_version) { print $version; exit 0; } $opt_echo = 1 if $opt_posix; my $status = 0; my $makefile = 0; my (%bashisms, %string_bashisms, %singlequote_bashisms); my $LEADIN = qr'(?:(?:^|[`&;(|{])\s*|(?:if|then|do|while|shell)\s+)'; init_hashes; foreach my $filename (@ARGV) { my $check_lines_count = -1; if (!$opt_force) { $check_lines_count = script_is_evil_and_wrong($filename); } if ($check_lines_count == 0 or $check_lines_count == 1) { warn "script $filename does not appear to be a /bin/sh script; skipping\n"; next; } if ($check_lines_count != -1) { warn "script $filename appears to be a shell wrapper; only checking the first " . "$check_lines_count lines\n"; } unless (open C, '<', "$filename") { warn "cannot open script $filename for reading: $!\n"; $status |= 2; next; } my $cat_string = ""; my $cat_indented = 0; my $quote_string = ""; my $last_continued = 0; my $continued = 0; my $found_rules = 0; my $buffered_orig_line = ""; my $buffered_line = ""; while () { next unless ($check_lines_count == -1 or $. <= $check_lines_count); if ($. == 1) { # This should be an interpreter line if (m,^\#!\s*(\S+),) { my $interpreter = $1; if ($interpreter =~ m,/make$,) { init_hashes if !$makefile++; $makefile = 1; } else { init_hashes if $makefile--; $makefile = 0; } next if $opt_force; if ($interpreter !~ m,/(sh|ash|hsh|posh)$,) { warn "script $filename does not appear to be a /bin/sh script\n"; } } else { warn "script $filename does not appear to have a \#! interpreter line\n"; } } chomp; my $orig_line = $_; # We want to remove end-of-line comments, so need to skip # comments that appear inside balanced pairs # of single or double quotes # Remove comments in the "quoted" part of a line that starts # in a quoted block? The problem is that we have no idea # whether the program interpreting the block treats the # quote character as part of the comment or as a quote # terminator. We err on the side of caution and assume it # will be treated as part of the comment. # s/^(?:.*?[^\\])?$quote_string(.*)$/$1/ if $quote_string ne ""; # skip comment lines if (m,^\s*\#, && $quote_string eq '' && $buffered_line eq '' && $cat_string eq '') { next; } # Remove quoted strings so we can more easily ignore comments # inside them s/(^|[^\\](?:\\\\)*)\'(?:\\.|[^\\\'])+\'/$1''/g; s/(^|[^\\](?:\\\\)*)\"(?:\\.|[^\\\"])+\"/$1""/g; # If the remaining string contains what looks like a comment, # eat it. In either case, swap the unmodified script line # back in for processing. if (m/(?:^|[^[\\])[\s\&;\(\)](\#.*$)/) { $_ = $orig_line; s/\Q$1\E//; # eat comments } else { $_ = $orig_line; } # Handle line continuation if (!$makefile && $cat_string eq '' && m/\\$/) { chop; $buffered_line .= $_; $buffered_orig_line .= $orig_line . "\n"; next; } if ($buffered_line ne '') { $_ = $buffered_line . $_; $orig_line = $buffered_orig_line . $orig_line; $buffered_line =''; $buffered_orig_line =''; } if ($makefile) { $last_continued = $continued; if (/[^\\]\\$/) { $continued = 1; } else { $continued = 0; } # Don't match lines that look like a rule if we're in a # continuation line before the start of the rules if (/^[\w%-]+:+\s.*?;?(.*)$/ and !($last_continued and !$found_rules)) { $found_rules = 1; $_ = $1 if $1; } # Fixes for makefiles by Raphael Geissert last if m%^\s*(override\s|export\s)?\s*SHELL\s*:?=\s*(/bin/)?bash\s*%; # Remove "simple" target names s/^[\w%.-]+(?:\s+[\w%.-]+)*::?//; s/^\t//; s/(?|<|;|\Z)/ and m/$LEADIN(\.\s+[^\s;\`:]+\s+([^\s;]+))/) { if ($2 =~ /^(\&|\||\d?>|<)/) { # everything is ok ; } else { $found = 1; $match = $1; $explanation = "sourced script with arguments"; output_explanation($filename, $orig_line, $explanation); } } # Remove "quoted quotes". They're likely to be inside # another pair of quotes; we're not interested in # them for their own sake and removing them makes finding # the limits of the outer pair far easier. $line =~ s/(^|[^\\\'\"])\"\'\"/$1/g; $line =~ s/(^|[^\\\'\"])\'\"\'/$1/g; while (my ($re,$expl) = each %singlequote_bashisms) { if ($line =~ m/($re)/) { $found = 1; $match = $1; $explanation = $expl; output_explanation($filename, $orig_line, $explanation); } } my $re='(?); } } # $cat_line contains the version of the line we'll check # for heredoc delimiters later. Initially, remove any # spaces between << and the delimiter to make the following # updates to $cat_line easier. my $cat_line = $line; $cat_line =~ s/(<\<-?)\s+/$1/g; # Ignore anything inside single quotes; it could be an # argument to grep or the like. $line =~ s/(^|[^\\\"](?:\\\\)*)\'(?:\\.|[^\\\'])+\'/$1''/g; # As above, with the exception that we don't remove the string # if the quote is immediately preceeded by a < or a -, so we # can match "foo <<-?'xyz'" as a heredoc later # The check is a little more greedy than we'd like, but the # heredoc test itself will weed out any false positives $cat_line =~ s/(^|[^<\\\"-](?:\\\\)*)\'(?:\\.|[^\\\'])+\'/$1''/g; $re='(?); } } while (my ($re,$expl) = each %string_bashisms) { if ($line =~ m/($re)/) { $found = 1; $match = $1; $explanation = $expl; output_explanation($filename, $orig_line, $explanation); } } # We've checked for all the things we still want to notice in # double-quoted strings, so now remove those strings as well. $line =~ s/(^|[^\\\'](?:\\\\)*)\"(?:\\.|[^\\\"])+\"/$1""/g; $cat_line =~ s/(^|[^<\\\'-](?:\\\\)*)\"(?:\\.|[^\\\"])+\"/$1""/g; while (my ($re,$expl) = each %bashisms) { if ($line =~ m/($re)/) { $found = 1; $match = $1; $explanation = $expl; output_explanation($filename, $orig_line, $explanation); } } # Only look for the beginning of a heredoc here, after we've # stripped out quoted material, to avoid false positives. if ($cat_line =~ m/(?:^|[^<])\<\<(\-?)\s*(?:[\\]?(\w+)|[\'\"](.*?)[\'\"])/) { $cat_indented = ($1 && $1 eq '-')? 1 : 0; $cat_string = $2; $cat_string = $3 if not defined $cat_string; } } } warn "error: $filename: Unterminated heredoc found, EOF reached. Wanted: <$cat_string>\n" if ($cat_string ne ''); warn "error: $filename: Unterminated quoted string found, EOF reached. Wanted: <$quote_string>\n" if ($quote_string ne ''); warn "error: $filename: EOF reached while on line continuation.\n" if ($buffered_line ne ''); close C; } exit $status; sub output_explanation { my ($filename, $line, $explanation) = @_; warn "possible bashism in $filename line $. ($explanation):\n$line\n"; $status |= 1; } # Returns non-zero if the given file is not actually a shell script, # just looks like one. sub script_is_evil_and_wrong { my ($filename) = @_; my $ret = -1; # lintian's version of this function aborts if the file # can't be opened, but we simply return as the next # test in the calling code handles reporting the error # itself open (IN, '<', $filename) or return $ret; my $i = 0; my $var = "0"; my $backgrounded = 0; local $_; while () { chomp; next if /^#/o; next if /^$/o; last if (++$i > 55); if (m~ # the exec should either be "eval"ed or a new statement (^\s*|\beval\s*[\'\"]|(;|&&|\b(then|else))\s*) # eat anything between the exec and $0 exec\s*.+\s* # optionally quoted executable name (via $0) .?\$$var.?\s* # optional "end of options" indicator (--\s*)? # Match expressions of the form '${1+$@}', '${1:+"$@"', # '"${1+$@', "$@", etc where the quotes (before the dollar # sign(s)) are optional and the second (or only if the $1 # clause is omitted) parameter may be $@ or $*. # # Finally the whole subexpression may be omitted for scripts # which do not pass on their parameters (i.e. after re-execing # they take their parameters (and potentially data) from stdin .?(\${1:?\+.?)?(\$(\@|\*))?~x) { $ret = $. - 1; last; } elsif (/^\s*(\w+)=\$0;/) { $var = $1; } elsif (m~ # Match scripts which use "foo $0 $@ &\nexec true\n" # Program name \S+\s+ # As above .?\$$var.?\s* (--\s*)? .?(\${1:?\+.?)?(\$(\@|\*))?.?\s*\&~x) { $backgrounded = 1; } elsif ($backgrounded and m~ # the exec should either be "eval"ed or a new statement (^\s*|\beval\s*[\'\"]|(;|&&|\b(then|else))\s*) exec\s+true(\s|\Z)~x) { $ret = $. - 1; last; } elsif (m~\@DPATCH\@~) { $ret = $. - 1; last; } } close IN; return $ret; } sub init_hashes { my $LEADIN = qr'(?:(^|[`&;(|{])\s*|(if|then|do|while|shell)\s+)'; %bashisms = ( qr'(?:^|\s+)function \w+(\s|\(|\Z)' => q<'function' is useless>, $LEADIN . qr'select\s+\w+' => q<'select' is not portable>, qr'(test|-o|-a)\s*[^\s]+\s+==\s' => q, qr'\[\s+[^\]]+\s+==\s' => q, qr'\s\|\&' => q, qr'[^\\\$]\{([^\s\\\}]*?,)+[^\\\}\s]*\}' => q, qr'\{\d+\.\.\d+\}' => q, qr'(?:^|\s+)\w+\[\d+\]=' => q, $LEADIN . qr'read\s+(?:-[a-qs-zA-Z\d-]+)' => q, $LEADIN . qr'read\s*(?:-\w+\s*)*(?:\".*?\"|[\'].*?[\'])?\s*(?:;|$)' => q, $LEADIN . qr'echo\s+(-n\s+)?-n?en?\s' => q, $LEADIN . qr'exec\s+-[acl]' => q, $LEADIN . qr'let\s' => q, qr'(? q<'((' should be '$(('>, qr'(?:^|\s+)(\[|test)\s+-a' => q, qr'\&>' => qword 2\>&1>, qr'(<\&|>\&)\s*((-|\d+)[^\s;|)}`&\\\\]|[^-\d\s]+(? qword 2\>&1>, $LEADIN . qr'kill\s+-[^sl]\w*' => q, $LEADIN . qr'trap\s+["\']?.*["\']?\s+.*[1-9]' => q, $LEADIN . qr'trap\s+["\']?.*["\']?\s+.*ERR' => q, qr'\[\[(?!:)' => q, qr'/dev/(tcp|udp)' => q, $LEADIN . qr'alias\s' => q, $LEADIN . qr'unalias\s' => q, $LEADIN . qr'builtin\s' => q, $LEADIN . qr'caller\s' => q, $LEADIN . qr'complete\s' => q, $LEADIN . qr'compgen\s' => q, $LEADIN . qr'declare\s' => q, $LEADIN . qr'dirs(\s|\Z)' => q, $LEADIN . qr'disown\s' => q, $LEADIN . qr'enable\s' => q, $LEADIN . qr'export\s+-[^p]' => q, $LEADIN . qr'export\s+.+=' => q, $LEADIN . qr'mapfile\s' => q, $LEADIN . qr'readarray\s' => q, $LEADIN . qr'readonly\s+-[af]' => q, $LEADIN . qr'(push|pop)d(\s|\Z)' => q<(push|pop)d>, $LEADIN . qr'set\s+-[BHT]+' => q, $LEADIN . qr'shopt(\s|\Z)' => q, $LEADIN . qr'suspend\s' => q, $LEADIN . qr'time\s' => q