#!/usr/bin/perl

=begin metadata

Name: grep
Description: search for regular expressions and print
Author: Tom Christiansen, tchrist@perl.com
Author: Greg Bacon, gbacon@itsc.uah.edu
Author: Paul Grassie
License: perl

=end metadata

=cut

###
### DON'T FORGET TO UPDATE THE POD IF YOU ADD TO THE HISTORY!!!
###

# tcgrep: tom christiansen's rewrite of grep
# v1.0: Thu Sep 30 16:24:43 MDT 1993
# v1.1: Fri Oct  1 08:33:43 MDT 1993
#
# Revision by Greg Bacon <gbacon@cs.uah.edu>
# Fixed up highlighting for those of us trapped in terminfo
# implemented -f
# v1.2: Fri Jul 26 13:37:02 CDT 1996
#
# Revision by Greg Bacon <gbacon@cs.uah.edu>
# Avoid super-inefficient matching (almost twice as fast! :-)
# v1.3: Sat Aug 30 14:21:47 CDT 1997
#
# Revision by Paul Grassie <grassie@worldnet.att.net>
# Removed vestiges of Perl4, made strict
# v1.4: Mon May 18 16:17:48 EDT 1998
#
# Revision by Greg Bacon <gbacon@cs.uah.edu>
# Add fgrep functionality for PPT
# v1.5: Mon Mar  8 12:05:29 CST 1999
#
# Revision by Bill Benedetto <bbenedetto@goodyear.com>
# Added CLOSE after each file is done to reset counters
# v1.6: Sat Mar 20 23:05:35 1999
#
# Revision by Paul Johnson <paul@pjcj.net>
# Add support for bzip2 and zip files
# File names are printed by default when using -r and supplying no directory
# v1.7: Thu May 27 00:25:30 BST 1999

use strict;

use File::Basename qw(basename);
use File::Spec;
use File::Temp qw();
use Getopt::Std;

use constant EX_MATCHED => 0;
use constant EX_NOMATCH => 1;
use constant EX_FAILURE => 2;

our $VERSION = '1.019';

$| = 1;                   # autoflush output

my $Me = basename($0);
my $Errors = 0;
my $Grand_Total = 0;
my $Matches;
my $Mult = '';

my %Compress = (
	'z'   => 'zcat <',        # for uncompressing
	'gz'  => 'zcat <',
	'Z'   => 'zcat <',
	'bz2' => 'bzcat <',
	'zip' => 'unzip -c',
);

my ($opt, $matcher) = parse_args();    # get command line options and patterns
matchfile( $opt, $matcher, @ARGV );   # process files

exit(EX_FAILURE) if $Errors;
exit(EX_MATCHED) if $Grand_Total;
exit(EX_NOMATCH);

###################################

sub VERSION_MESSAGE {
	print "$Me version $VERSION\n";
	exit EX_MATCHED;
}

sub usage {
	die <<EOF;
usage: $Me [-acFgHhIiLlnqRrsTuvwxZ] [-e pattern] [-A NUM] [-B NUM] [-C NUM]
           [-m NUM] [-f pattern-file] [pattern] [file...]

Options:
	-A   show lines after each matching line
	-a   treat binary files as plain text files
	-B   show lines before each matching line
	-C   show lines of context around each matching line
	-c   give count of lines matching
	-e   expression (for exprs beginning with -)
	-F   search for fixed strings (disable regular expressions)
	-f   file with expressions
	-g   highlight matches
	-H   show filenames
	-h   hide filenames
	-I   ignore binary files
	-i   case insensitive
	-L   list filenames which do not match
	-l   list filenames matching
	-m   limit total matches per file
	-n   number lines
	-q   quiet; nothing is written to standard output
	-r   recursive on directories or dot if none
	-s   suppress errors for failed file and dir opens
	-T   trace files as opened
	-u   underline matches
	-v   invert search sense (lines that DON'T match)
	-w   word boundaries only
	-x   exact matches only
	-Z   force grep to behave as zgrep
EOF
	}

###################################
sub run_tput {
	my $arg = shift;
	my $term = $ENV{'TERM'} || 'vt100';

	open my $oldout, '>&', STDOUT or die "$Me: can't save stdout: $!\n";
	my $tmp = File::Temp->new;
	open STDOUT, '>&', $tmp or die "$Me: can't redirect stdout: $!\n";
	system 'tput', '-T', $term, $arg;
	seek $tmp, 0, 0 or die "$Me: can't rewind temporary file: $!\n";
	my $bin = <$tmp>;
	close $tmp or die "$Me: can't close temporary file: $!\n";
	open STDOUT, '>&', $oldout or die "$Me: can't restore stdout: $!\n";
	return $bin;
}

sub parse_args {
	my (%opt, $pattern, @patterns, $SO, $SE);

	my $cls_grep = sub {
		for my $pattern (@patterns) {
			$Matches++ if m/$pattern/;
		}
	};
	my $cls_grep_v = sub {
		for my $pattern (@patterns) {
			$Matches += !/$pattern/;
		}
	};
	my $cls_grep_g = sub {
		for my $pattern (@patterns) {
			$Matches += s/($pattern)/${SO}$1${SE}/g;
		}
	};
	my $cls_fgrep = sub {
		for my $pattern (@patterns) {
			$Matches++ if index($_, $pattern) != -1;
		}
	};
	my $cls_fgrep_v = sub {
		for my $pattern (@patterns) {
			$Matches++ if index($_, $pattern) == -1;
		}
	};
	my $cls_fgrep_i = sub {
		my $s = lc $_;
		for my $pattern (@patterns) {
			$Matches++ if index($s, lc $pattern) != -1;
		}
	};
	my $cls_fgrep_iv = sub {
		my $s = lc $_;
		for my $pattern (@patterns) {
			$Matches++ if index($s, lc $pattern) == -1;
		}
	};
	my $cls_fgrep_x = sub {
		my $s = $_;
		chomp $s;
		for my $pattern (@patterns) {
			$Matches++ if $s eq $pattern;
		}
	};
	my $cls_fgrep_xv = sub {
		my $s = $_;
		chomp $s;
		for my $pattern (@patterns) {
			$Matches++ if $s ne $pattern;
		}
	};
	my $cls_fgrep_xi = sub {
		my $s = lc $_;
		chomp $s;
		for my $pattern (@patterns) {
			$Matches++ if $s eq lc($pattern);
		}
	};
	my $cls_fgrep_xiv = sub {
		my $s = lc $_;
		chomp $s;
		for my $pattern (@patterns) {
			$Matches++ if $s ne lc($pattern);
		}
	};

	# multiple -e/-f options
	my $opt_f = 0;
	my @tmparg;
	while (@ARGV) {
		my $arg = shift @ARGV;
		if ($arg =~ s/\A\-e//) {
			$pattern = length($arg) ? $arg : shift(@ARGV);
			usage() unless defined $pattern;
			push @patterns, $pattern;
		}
		elsif ($arg =~ s/\A\-f//) {
			$opt_f = 1;
			my $file = length($arg) ? $arg : shift(@ARGV);
			usage() unless defined $file;
			die "$Me: $file: is a directory\n" if -d $file;
			my $fh;
			open($fh, '<', $file) or die "$Me: Can't open '$file': $!\n";
			my $line;
			while (defined($line = <$fh>)) {
				chomp $line;
				push @patterns, $line;
			}
			close $fh;

		}
		else {
			push @tmparg, $arg;
		}
	}
	@ARGV = @tmparg;

	getopts('A:aB:C:cFgHhIiLlm:nqRrsTuvwxZ', \%opt) or usage();

	$opt{'r'} ||= $opt{'R'};
	if (defined $opt{'m'} && $opt{'m'} !~ m/\A[0-9]+\z/) {
		die "$Me: invalid max count\n";
	}
	foreach my $o (qw(A B C)) {
		if (defined $opt{$o} && $opt{$o} !~ m/\A[0-9]+\z/) {
			die "$Me: bad context count for -$o\n";
		}
	}
	if (defined $opt{'C'}) {
		$opt{'A'} = $opt{'B'} = $opt{'C'};
	}
	$opt{'l'} = 0 if $opt{'L'};
	my $no_re = $opt{F} || ( $Me =~ /\bfgrep\b/ );

	if ($opt_f && scalar(@patterns) == 0) { # empty -f file allowed
		exit EX_NOMATCH;
	}
	unless (@patterns) {
		$pattern = shift @ARGV;
		usage() unless defined $pattern;
		push @patterns, $pattern;
	}
	unless ($no_re) {
		foreach $pattern (@patterns) {
			eval { 'foo' =~ /$pattern/, 1 }
				or die "$Me: bad pattern: $@\n";
		}
	}
	if ($opt{'H'}) {
		$Mult = 1;
	}
	elsif ($opt{'h'}) {
		$Mult = 0;
	}
	else {
		$Mult = $opt{'r'} || (scalar(@ARGV) > 1);
	}
	@ARGV = ($opt{'r'} ? '.' : '-') unless @ARGV;

	if ($opt{'g'} || $opt{'u'}) {    # highlight or underline
		my $terminal;

		eval {                     # try to look up escapes for stand-out
			require POSIX;         # or underline via Term::Cap
			require Term::Cap;

			my $termios = POSIX::Termios->new();
			$termios->getattr;
			my $ospeed = $termios->getospeed;

			$terminal = Tgetent Term::Cap { TERM => undef, OSPEED => $ospeed };
			};

		unless ($@) {    # if successful, get escapes for either
			local $^W = 0;    # stand-out (-g) or underlined (-u)
			( $SO, $SE ) =
				$opt{'g'}
				? ( $terminal->Tputs('so'), $terminal->Tputs('se') )
				: ( $terminal->Tputs('us'), $terminal->Tputs('ue') );
			}
		else {                # if use of Term::Cap fails,
			if ($opt{'g'}) {
				$SO = run_tput('smso');
				$SE = run_tput('rmso');
			}
			else {
				$SO = run_tput('smul');
				$SE = run_tput('rmul');
			}
		}
	}

	if ($no_re) {
		if ($opt{'g'} || $opt{'u'} || $opt{'w'}) {
			die "$Me: -g, -u and -w are incompatible with -F\n";
		}
		if ($opt{'x'}) { # exact match
			if ($opt{'i'}) {
				$matcher = $opt{'v'} ? $cls_fgrep_xiv : $cls_fgrep_xi;
			}
			else { # case sensitive
				$matcher = $opt{'v'} ? $cls_fgrep_xv : $cls_fgrep_x;
			}
		}
		else { # regular match
			if ($opt{'i'}) {
				$matcher = $opt{'v'} ? $cls_fgrep_iv : $cls_fgrep_i;
			}
			else { # case sensitive
				$matcher = $opt{'v'} ? $cls_fgrep_v : $cls_fgrep;
			}
		}
		return (\%opt, $matcher);
	}

	# mumble mumble DeMorgan mumble mumble
	if ( $opt{v} ) {
		@patterns = join '|', map "(?:$_)", @patterns;
		}

	if ( $opt{i} ) {
		@patterns = map {"(?i)$_"} @patterns;
		}

	$opt{w}   && ( @patterns = map { '(?:\b|(?!\w))' . $_ . '(?:\b|(?<!\w))' } @patterns );
	$opt{'x'} && ( @patterns = map {"^$_\$"} @patterns );
	$opt{'g'} ||= $opt{'u'};

	foreach (@patterns) {s(/)(\\/)g}

	if    ($opt{'g'}) { $matcher = $cls_grep_g; }
	elsif ($opt{'v'}) { $matcher = $cls_grep_v; }
	else              { $matcher = $cls_grep;   }

	return ( \%opt, $matcher );
	}

###################################

sub matchfile {
	$opt     = shift;    # reference to option hash
	$matcher = shift;    # reference to matching sub

	my ( $file, $total, $name );
	local ($_);
	$total = 0;

FILE: while ( defined( $file = shift(@_) ) ) {
		my $is_binary = 0;
		my $compressed = 0;

		if ( $file eq '-' ) {
			$name = '<STDIN>';
			}
		elsif ( -d $file ) {
			if ( -l $file && @ARGV != 1 ) {
				warn qq($Me: "$file" is a symlink to a directory\n)
					if $opt->{T};
				next FILE;

				}
			if ( !$opt->{r} ) {
				warn qq($Me: "$file" is a directory\n);
				next FILE;
				}
			my $dh;
			unless (opendir $dh, $file) {
				unless ( $opt->{'s'} ) {
					warn "$Me: can't opendir $file: $!\n";
					$Errors++;
					}
				next FILE;
				}
			my @list;
			for (readdir $dh) {
				next if $_ eq '.' or $_ eq '..';
				push @list, File::Spec->catfile($file, $_);
				}
			closedir $dh;
			matchfile( $opt, $matcher, @list );    # process files
			next FILE;
			}
		else {
			$name = $file;
			unless ( -e $file ) {
				warn qq($Me: "$file" does not exist\n) unless $opt->{'s'};
				$Errors++;
				next FILE;
				}
			unless ( -f $file || $opt->{a} ) {
				warn qq($Me: skipping non-plain file "$file"\n) if $opt->{T};
				next FILE;
				}

			my ($ext) = $file =~ /\.([^.]+)$/;
			if ($opt->{'Z'} && defined $ext && exists $Compress{$ext}) {
				$file       = "$Compress{$ext} $file |";
				$compressed = 1;
				}
			$is_binary = 1 if -B $file;
			}

		warn "$Me: checking $name\n" if $opt->{'T'};

		my $fh;
		if ( $file eq '-' ) {
			$fh = *STDIN;
			}
		else {
			my $ok;
			if ($compressed) {
				$ok = open $fh, $file;    # pipe
				}
			else {
				$ok = open $fh, '<', $file;
				}
			if (!$ok) {
				warn "$Me: $file: $!\n" unless $opt->{'s'};
				$Errors++;
				next FILE;
				}
			if ($is_binary) {
				next FILE if $opt->{'I'};
				binmode $fh;
				}
			}

		$total = $Matches = 0;
		my $ctx_a = 0;
		my @ctx_b;

	LINE: while (<$fh>) {
			my $ctxb_dump = 0;
			if (defined $opt->{'m'}) { # maximum may be zero
				last LINE if $total >= $opt->{'m'} && $ctx_a == 0;
			}
			$Matches = 0;

			##############
			&{$matcher}();    # do it! (check for matches)
			##############

			if (!$Matches && defined($opt->{'B'})) {
				push @ctx_b, $_;
				shift @ctx_b if scalar(@ctx_b) > $opt->{'B'};
			}
			if ($ctx_a > 0) {
				$ctx_a--; # show context line
			} elsif (!$Matches) {
				next LINE;
			}

			if ($Matches && @ctx_b) {
				$ctxb_dump = 1;
				my $n = $. - $opt->{'B'};
				foreach my $bline (@ctx_b) {
					print($n++, '-') if $opt->{'n'};
					print $bline;
				}
				@ctx_b = ();
			}

			$total += $Matches;
			last FILE if $opt->{'q'}; # single match for all files
			last LINE if $opt->{'L'}; # one match is enough

			if (defined $opt->{'A'} && $Matches) {
				unless (defined($opt->{'m'}) && $opt->{'m'} < $total) {
					$ctx_a = $opt->{'A'};
				}
			}
			if ($opt->{'l'}) {
				print $name, "\n";
				last LINE;
			}
			if ($is_binary && !$opt->{'a'}) {
				warn "$Me: $file: binary file matches\n";
				last LINE; # single match for binfile unless -a
			}
			unless ($opt->{'c'}) {
				print($name, ':') if $Mult;
				if ($opt->{'n'}) {
					print $.;
					if ($Matches) {
						print ':';
					} else {
						print '-';
					}
				}
				print $_;
			}
			if ($ctx_a == 0 && !$Matches) {
				print "--\n";
			}
			if ($ctxb_dump && $ctx_a == 0) {
				print "--\n";
			}
			}
			close $fh;
		}
	continue {
		if ($opt->{'c'}) {
			print($name, ':') if $Mult;
			print $total, "\n";
		}
		if ($opt->{'L'} && !$total) {
			print $name, "\n";
		}
		}
	$Grand_Total += $total;
	}

__END__

=pod

=head1 NAME

grep - search for regular expressions and print

=head1 SYNOPSIS

    grep [-acFgHhIiLlnqRrsTuvwxZ] [-e pattern] [-A NUM] [-B NUM] [-C NUM]
         [-m NUM] [-f pattern-file] [pattern] [file ...]

=head1 DESCRIPTION

B<grep> searches for lines in files
that satisfy the criteria specified by the user-supplied patterns.
Because this B<grep> is a Perl program, the user has full access to
Perl's rich regular expression engine.  See L<perlre>.

The first argument after the options (assuming the user did not specify
the B<-e> option or the B<-f> option) is taken as I<pattern>.
If the user does not supply a list of file or directory names to
search, B<tcgrep> will attempt to search its standard input.

With no arguments, B<grep> will output its option list and exit.

=head1 OPTIONS

The following options are accepted:

=over 4

=item B<-m> I<NUM>

Display a maximum of NUM matches per file.

=item B<-A> I<NUM>

Display NUM lines of context after each matching line.

=item B<-a>

List matching lines from binary files as if they were plain text files.

=item B<-B> I<NUM>

Display NUM lines of context before each matching line.

=item B<-C> I<NUM>

Display NUM lines of context surrounding each matching line.

=item B<-c>

Output only the count of the matching lines.

=item B<-e> I<pattern>

Treat I<pattern> as a pattern.  This option is most useful when
I<pattern> starts with a C<-> and the user wants to avoid confusing
the option parser.

The B<-f> option supercedes the B<-e> option.

=item B<-F>

B<fgrep> mode.  Disable regular expressions and perform Boyer-Moore
searches.  (Whether it lives up to the 'f' in B<fgrep> is another
issue).

=item B<-f> I<pattern-file>

Treat I<pattern-file> as a newline-separated list of patterns to use
as search criteria.

the B<-f> option supercedes the B<-e> option.

=item B<-g>

Highlight matches.  This option causes B<grep> to attempt to use
your terminal's stand-out (emboldening) functionality to highlight
those portions of each matching line that actually
triggered the match.  This feature is very similar to the way the
less(1) pager highlights matches.  See also B<-u>.

=item B<-H>

Always include filename headers for matching lines.

=item B<-h>

Hide filenames.  Only print matching lines.

=item B<-I>

Ignore binary files.

=item B<-i>

Ignore case while matching.  This means, for example, that the pattern
C<unix> would match C<unix> as well as C<UniX> (plus the other fourteen
possible capitalizations).  This corresponds to the C</i> Perl regular
expression switch.  See L<perlre>.

=item B<-L>

List files which do no match any pattern.  This option takes precedence
over B<-l>.

=item B<-l>

List files containing matches.  This option tells B<grep> not to
print any matches but only the names of the files containing matches.
This option implies the B<-1> option.

=item B<-n>

Number lines.  Before outputting a given match, B<grep>
will first output its line corresponding to the
value of the Perl magic scalar $. (whose documentation is in L<perlvar>).

=item B<-s>

Suppress diagnostic messages to the standard error.

=item B<-R> and B<-r>

Recursively scan directories.  This option causes B<grep> to
descend directories in a left-first, depth-first manner and search
for matches in the files of each directory it encounters.  The
presence of B<-r> implies a file argument of F<.>, the current
directory, if the user does not provide filenames on the command line.
See L<"EXAMPLES">.

=item B<-q>

Quiet mode.  Do not write to the standard output.  This option would
be useful from a shell script, for example, if you are only interested
in whether or not there exists a match for a pattern.

=item B<-T>

Trace files as processed.  This causes B<grep> to send diagnostic
messages to the standard error when skipping symbolic links to directories,
when skipping directories because the user did not give the B<-r> switch,
when skipping non-plain files (see L<perlfunc/-f>),
when skipping non-text files (see L<perlfunc/-T>), and
when opening a file for searching

=item B<-u>

Underline matches.  This option causes B<grep> to attempt to use
your terminal's underline functionality to underline those portions of
each matching line that actually triggered the match.
See also B<-H>.

=item B<-v>

Invert the sense of the match, i.e. print those lines
that do B<not> match.  When using this option in conjunction with B<-f>,
keep in mind that the entire set of patterns are grouped together in
one pattern for the purposes of negation.  See L<"EXAMPLES">.

=item B<-w>

Matches must start and end at word boundaries.  This is currently
implemented by surrounding each pattern with a pair of C<\b>, the
Perl regular expression word boundary metasequence.  See L<perlre>
for the precise definition of C<\b>.

=item B<-x>

Exact matches only.  The pattern must match the entire line.

=item B<-Z>

Operate in zgrep mode.  Compressed file arguments are decompressed before
searching for the pattern.

=back

=head1 ENVIRONMENT

The TERM variable is used for output formatting in the -g and -u options.

=head1 EXAMPLES

Search all files under F</etc/init.d> for a particular pattern:

    % grep -r tcgrep /etc/init.d

Use of B<-v> and B<-f> options in conjunction with one another:

    % cat fruits
    pomme
    banane
    poire
    % cat pats
    pomme
    poire
    % grep -vf pats fruits
    banane

=head1 TODO

=over 4

=item *

Add more cool examples. :-)

=item *

Perhaps allow the user to provide an exclusion pattern for skipping over
files whose names match the pattern.

=back

=head1 AUTHOR

B<tcgrep> was written by Tom Christiansen with updates by Greg Bacon
and Paul Grassie.

=head1 COPYRIGHT and LICENSE

Copyright (c) 1993-1999. Tom Christiansen.

This program is free and open software. You may use, copy, modify, distribute,
and sell this program (and any modified variants) in any way you wish,
provided you do not restrict others from doing the same.

=cut
