#! /usr/bin/perl -w
#
# psh, copyright (c) 1997 Neil Moore <neil@cs.uky.edu>
# portions copyright (c) 1994 Hal Pomeranz <pomeranz@netcom.com>
#
# This program may be distributed under the terms of the GNU General
# Public License.
#
# version 0.0.3 - 4 December 1997:
#  * Folded &first_tail into &shell_parse.  Now &parseline calls
#    &shell_parse, rather than the individual &do_* subs doing so.
#  * Unknown commands are no longer parsed as Perl.
#  * &parsefile now stops parsing upon encountering /^__END__$/.
#  * Removed the -e option: file execution is now performed on
#    the first non-option argument, with the remaining arguments
#    as members of @ARGV.
#  * Do argument processing by hand, rather than using Getopt::Mixed.
#
# version 0.0.2 - 23 November 1997:
#  * Made `set' and `export' mean the same thing as in bash.
#  * perl 5.004_04 still crashes when certain environment variables
#    are reset.  gdb says that the segfault occurs in free().
#
# version 0.0.1 - 25 August 1997:
#  * First numbered version.
#  * do_parseprompt() now handles backslashes in a somewhat normal
#    fashion.
#  * do_alias(), do_command(), and do_set() now shell_parse() their args.
#  * do_set() no longer eval()s the right-hand side of assignments.  Now
#    that we have backticks, this is not necessary.
#  * On a related note, do_setq() and the setq command are gone.
#  * Removed `prog !<foo>' -- `command <foo>=' offers the same functionality.
#
# Finally, a Perl-based shell.  This program is VERY alpha, and things
# may disappear from beneath your feet between versions.  You should
# really keep in contact with the author if you plan on using this very
# much (though I don't know why you'd want to).
#
# This program requires the following modules, available from CPAN:
#   * Term::Readline
#   * Term::Readline::Gnu
#
# It also requires other modules, which come with Perl.  Version 5.004
# or later is required.  I have only tested it with 5.004_02, so earlier
# patchlevels of 5.004 may not work.
#
# TODO:
#  * Allow more command-line options.
#  * Allow multi-line input (a single command spanning lines).
#  * Modularize the whole thing.
#  * Support job control, or at least change process groups.
#  * Support piping and redirection (this is going to be a pain to get
#    right, but will be worth it; this is the single greatest obstacle
#    to this becoming a real shell).
#  * Figure out why modifying $ENV{...} causes core dumps.
#  * Change shell_parse to use non-greedy regexps.
#  * Document it.
#  * Make it readable.
#
# If you have any suggestions, please email the author.

use Term::ReadLine;
use Carp;
use English;
use IO::File;
use Cwd;
use strict;
use vars qw($rdline $prompt $commands $descriptions $interactive $lastdir);

$interactive = 1;

# Hash of command names (keys) and associated subs (values).  subs are called
# with one parameter: the tail of the command line.
$commands = {
	'exit'      => \&do_exit,
	cd          => \&do_cd,
	set         => \&do_set,
	echo        => \&do_echo,
	prog        => \&do_prog,
	alias       => \&do_alias,
	command     => \&do_command,
	export      => \&do_export,
	type        => \&do_type,
	which       => \&do_which,
	builtin     => \&do_builtin,
	'!'         => \&do_runprog,
	':'         => \&shell_parse,
	# due to the way Perl works, you can get an eval by beginning the
	# line with `+'.
};
    
$descriptions = {
	'exit'      => "builtin function (do_exit)",
	cd          => "builtin function (do_cd)",
	set         => "builtin function (do_set)",
	echo        => "builtin function (do_echo)",
	prog        => "builtin function (do_prog)",
	alias       => "builtin function (do_alias)",
	command     => "builtin function (do_command)",
	export      => "builtin function (do_export)",
	type        => "builtin function (do_type)",
	which       => "builtin function (do_which)",
	'!'         => "builtin function (do_runprog)",
	':'         => "builtin function (shell_parse)",
};

sub interpolate {
	my $word = shift;
	
	local $WARNING = 0;
	no strict;
		
	return eval "qq($word)";
}

sub backtick_parse {                 # used for backtick evaluation
	my $code = shift;
	my $pid;
	my $result = "";
	my $sleep_count = 0;
	
	local $_;
	
	do {
		$pid = open(CHILD, "-|");
		unless (defined $pid) {
			warn "cannot fork: $!";
			die "bailing out" if $sleep_count++ > 6;
			sleep 10;
		}
	} until defined $pid;
	
        unless ($pid) {
		parseline($code);
		exit;
	} else {
		if (defined ($_ = <CHILD>)) {
			chomp;
			$result .= $_;
		}
		while (<CHILD>) {
			chomp;
			$result .= " " . $_;
		}
		close CHILD;
	}
	
	return $result;
}

# Parse a line into shell words.  This is basically Text::ParseWords::shellwords,
# except that interpolation is done for barewords and words in double quotes, and
# that backticks cause evaluation.  Quoting is also a bit different.
#
# Note that interpolation is done differently depending on whether or not the
# variable to be interpolated is in quotes.  $var is split into words, while
# "$var" is not.  The @rest and $rest variables exist to allow carry-over.
sub shell_parse {
	local($_) = join('', @_);
	
	my ($snippet, @words, $field, $rest, @rest);
	
	$rest = '';
	@rest = ();
	
	s/^\s+//;
	while ($_ ne '') {
		$field = '';
		for (;;) {
			if (@rest) {
				push @words, $field;
				$rest = pop @rest;
				push @words, @rest;
				@rest = ();
				$field = $rest;
			}
			if (s/^\"(([^\"\\]|\\.)*)\"//) {
				$snippet = interpolate $1;
			} elsif (/^\"/) {
				print STDERR "Unmatched double quote: $_\n";
				return "";
			} elsif (s/^\'(([^\'\\]|\\.)*)\'//) {
				($snippet = $1) =~ s,\\([\'\\]),$1,g;
			} elsif (/^\'/) {
				print STDERR "Unmatched single quote: $_\n";
				return "";
			} elsif (s/^\`(([^\`\\]|\\.)*)\`//) {
				local $WARNING = 0;
				no strict;
				($snippet = $1) =~ s|\\(.)|$1|g;
				$snippet = backtick_parse $snippet;
			} elsif (/^\`/) {
				print STDERR "Unmatched backtick: $_\n";
				return "";
			} elsif (s/^\\(.)//) {
				$snippet = $1;
			} elsif (s/^([^\s\\\'\"]+)//) {
				$snippet = $1;
				
				($snippet, @rest) = glob($snippet);
				
				unless (@rest) {
					local $_ = interpolate $snippet;
					($snippet, @rest) = split;
					if (!defined $snippet) {
						$snippet = "";
					}
				}
			} else {
				s/^\s+//;
				last;
			}
			
			$field .= $snippet;
		}
		push @words, $field;
	}	
	return @words;
}


# Translate backslashed prompt characters.  This sub is called with the
# backslashed character only (i.e., the backslash should not be included).
#
# The following translations are supported so far:
#   \w => current working directory.
#   \$ => ``#'' if you are root, ``$'' otherwise.
#
# Any unrecognised escape is treated at the escaped character itself (i.e.,
# `\\' becomes `\', and `\P' becomes `P'.
#
sub prompt_backslash {
	my $char=shift;

	if ($char eq "w") {
		my $cwd = cwd;
		$cwd =~ s/^$::HOME/~/;
		return $cwd;
	}

	if ($char eq "\$") {
		return $> ? "\$" : "#";
	}

	return $char;
}

# Parse a prompt string into its final version.  Handles prompt escapes
# with prompt_backslash() (q.v.).
sub do_parseprompt {
	my $pstr = shift;
	
	$pstr =~ s|\\(.)|prompt_backslash($1)|ge;

	return $pstr;
}


sub do_cd {
	my $savedir = cwd;
	my $dir = $_[-1];
	if (defined $dir) {
		if ($dir eq "-") {
			unless ($lastdir) {
				print STDERR "cd: lastdir not set.\n";
				return;
			}
			chdir $lastdir and ($lastdir=$savedir, return);
			print STDERR "cd: $lastdir: $!\n";
		} else {
			chdir $dir and ($lastdir=$savedir, return);
			print STDERR "cd: $dir: $!\n";
		}
	} else {
		# We could do chdir() with no arguments, but we want
		# to use $::HOME, not $ENV{HOME} (one of them may have
		# changed).
		chdir $::HOME and ($lastdir=$savedir, return);
		print STDERR "cd: $::HOME: $!\n";
	}
}

# Export an environment variable.  For some reason, modifying an already-
# existing environment variable core dumps (whether done here or somewhere
# else).
sub do_export {
	local $_;
		
	foreach (@_) {
		if (s/^!//) {
			delete $ENV{$_} if (exists $ENV{$_});
		}
		elsif (/^([^\d\W]\w*)=(.*)/s) {
			local $WARNING = 0;
			no strict;
			$ENV{$1} = $2;
		} elsif (/[^\d\W]\w*/) {
			no strict;
			if (defined $$_) {
				$ENV{$_} = $$_;
			}
		} else {
			print STDERR "export: `$_': not a valid identifier\n";
		}
	}
}


# Set an environment variable.
sub do_set {
	local $_;
	
	if ($#_ < 0) {
		print map { "$_=$ENV{$_}\n" } sort keys %ENV;
		return 0;
	}

	foreach (@_) {
		if (/^([^\d\W]\w*)=(.*)/s) {
			local $WARNING = 0;
			no strict;
			$$1 = $2;
		} elsif (/^(.+)=.*$/s) {
			print STDERR "set: `$1': not a valid identifier\n";
		}
	}
}


# Then, my friend, you die..
sub do_exit {
	local $WARNING = 0;
	print "exit\n";
	exit $_[-1];
}


# Print things, doing shell parsing.
sub do_echo {
#	my $tail = shift;

	print "@_\n";
}

# Mark a command as being or not being a program.  Programs are always
# executed via system().
#
# `prog foo' is the same as `alias foo=foo', except for the description
# given by `type foo'.  This will not hold true later, when alias is
# modified to treat the expansion as a psh expression, rather than a
# command.
sub do_prog {
	local $_;
	
	foreach (@_) {
		my $cname = $_;
		
		$commands->{$_} = sub {
			my @t = @_;
			do_runprog($cname, @t);
		};
		
		$descriptions->{$_} = "program";
	}
}


# Add an alias.
#
# This needs to be changed to run parseline() rather than do_runprog().
# First, though, there needs to be a way to prevent recursion.  Also,
# we need to implement the Bourne shell behaviour of alias-expanding
# the *next* word when the last character of the alias is ` '.
sub do_alias {
	local $_;
	
	foreach (@_) {
		if (/^(.+?)=(.*)$/s) {
			my $als = $2;
			$commands->{$1} = sub {
				my @t = @_;
				do_runprog(shell_parse($als), @t);
			};
			$descriptions->{$1} = "alias for `$als'";
		} else {
			if (exists($descriptions->{$_})
			    && $descriptions->{$_} =~ /^alias/) {
				$descriptions->{$_} =~ /^alias for \`(.*)\'$/;
				print "alias $_='" . $1 . "'\n";
			} else {
				print STDERR "alias: `$_': Homey don't grok that\n";
			}
		}
	}
}

# Add a command.
sub do_command {
	local $_;
	
	foreach (@_) {
		if (/^(.+?)=(.*)$/s) {
			my $rhs = $2;
			if ($rhs) {
				$commands->{$1} = sub { eval "$rhs" };
				$descriptions->{$1} = "command: `$2'";
			} else {
				delete $commands->{$1};
				delete $descriptions->{$1};
			}
		} else {
			if (exists($descriptions->{$_})
			    && $descriptions->{$_} =~ /^command/) {
				$descriptions->{$_} =~ /^command: \`(.*)\'$/;
				print "command $_='" . $1 . "'\n";
			} else {
				print STDERR "command: `$_': Homey don't grok that\n";
			}
		}
	}
}


# Change a command back to a builtin.
sub do_builtin {
	local $_;
	
      SWITCH: foreach (@_) {
	      if ($_ eq "builtin") {
		      print STDERR "psh: I'm not going to ask why you "
			  . "re-builtin `builtin'.\n";
	      }
	      if (/^(  exit
		     | cd
		     | set
		     | echo
		     | prog
		     | alias
		     | command
		     | export
		     | type
		     | which
		     | builtin
		  )$/x) {
		      eval "\$commands->{$_} = \\&do_$_";
		      $descriptions->{$_} = "builtin function (do_$_)";
	      } elsif ($_ eq "!") {
		      $commands->{$_} = \&do_runprog;
		      $descriptions->{$_} = "builtin function (do_runprog)";
	      } elsif ($_ eq ":") {
		      $commands->{$_} = \&do_shellparse;
		      $descriptions->{$_} = "builtin function (do_shellparse)";
	      } else {
		      print STDERR "psh: `$_': never was a cornflake girl "
			  .  "(perhaps you mispelled something?).\n";
	      }
      }
}

# Run a program, doing alias and variable expansion.  This is used for !,
# for programs marked with prog, for programs executed with an absolute
# or relative path, and for programs executed because the line wouldn't
# parse as Perl.
sub do_runprog {
	system @_;
}

# Describe a command -- indicates whether it is a program, alias, command,
# builtin, etc., and what it evaluates to.
sub do_type {
	local $_;
	
	my $desc;
	
	foreach (@_) {
		if (exists $descriptions->{$_}) {
			$desc = $descriptions->{$_};
			if ($desc eq "program") {
				if (my $path = find_prog($_)) {
					$desc = "program in $path";
				}
			}
		} else {			
			if (my $path = find_prog($_)) {
				$desc = "program in $path";
			}
		}

		if (defined $desc) {
			print "$_: $desc\n";
		} else {
			print STDERR "type: $_: not found\n";
		}
	}
}

# Locate programs in $PATH, and print the results.
sub do_which {
	local $_;
	
	foreach (@_) {
		if (defined (my $path = find_prog($_))) {
			print "$path\n";
		}
	}
}

# Find a program in $PATH.
sub find_prog {
	local $_;
	
	my $cmd = shift;
	foreach (split /:/, $::PATH) {
		if (-x "$_/$cmd" && -f "$_/$cmd") { return "$_/$cmd" }
	}
	return undef;
}


sub parseline {
	local $_ = shift;
	return unless $_ || $_ eq "0";
	
	if (/^!!(.*)$/s) {
		system ($1 || "/bin/sh");
		return;
	}
	
	s/^!(\S.*)/! $1/s;

	return if /^\#/;
	
	my $command = $_;
	
	my ($cmd, @tail) = shell_parse($_);
	
	if (exists $commands->{$cmd}) {
		&{$commands->{$cmd}}(@tail);
		return;
	}
	
	if (find_prog $cmd) {
		do_runprog($cmd, @tail);
		return;
	}
	
	print STDERR "Unknown command: $cmd\n";
}

sub parsefile {
	local $_;
	
	my $filename = shift;
	my $script = new IO::File;
	unless ($script->open("<$filename")) {
		print(STDERR "psh: squirrels ran away with $filename: $!\n");
		return 0;
	}
	
	while (<$script>) {
		chomp;
		last if /^__END__$/;
		parseline $_;
	}
	
	$script->close;
	return 1;
}

sub parseopts {
	while ($_ = shift @ARGV) {
		if ($_ eq "-c") {
			$interactive = 0;
			parseline(shift @ARGV);
		} else {     # first non-option argument
			$interactive = 0;
			exit !parsefile($_);
		}
	}
}


################################
# Execution begins here.
################################

# Import environment variables into our namespace
foreach (keys %ENV) {
	no strict;
	$$_ = $ENV{$_};
}

# If the prompt isn't set, set it.
if (!defined $::PS1) {
	$::PS1 = '\w\$ ';
}

# And the path.
if (!defined $::PATH) {
	$::PATH = "/bin:/usr/bin:/usr/local/bin:.";
}

parseopts();

if (!$interactive) {
	exit 0;
}

# Source the .pshrc file.  This file may contain arbitrary psh code.
if (-f "$::HOME/.pshrc") {
	parsefile "$::HOME/.pshrc";
}

# The primary ReadLine object.
$rdline = new Term::ReadLine 'psh';

# The main loop.
MAIN: while (defined ($_ = $rdline->readline(do_parseprompt($::PS1)))) {
	chomp;
	parseline $_;
}
