#!/usr/bin/perl require 5.003; # # Copyright (c) 1998 Gabor Egressy, gabor@vmunix.com # All rights reserved. All wrongs reversed. # This program is free software; you can redistribute # and/or modify it under the same terms as Perl itself. # use Getopt::Std; use strict; use vars qw( %opts $lines $page $cr ); # subroutines sub main_sub; sub read_file; sub reformat; sub get_length; sub justify; sub output_line; sub check_lines; sub rem_lead; my @path = split /\// , $0; $0 = $path[ $#path ]; if( ! getopts "FIMPSTbcefhi:jm:n:pst:w:" , \%opts ) { print "Usage:\t$0 [-h]\n", "$0 [-FIMPSTabcefjps] [-i num] [-m num] [-n num] [-t num] [-w num] [file]\n", exit; } if( exists $opts{'h'} && keys %opts > 1 ) { print "-h cannot be be combined with other flags, it\'s used for help\n"; print "Usage:\t$0 [-h]\n", "$0 [-FIMPSTabcefjps] [-i num] [-m num] [-n num] [-t num] [-w num] [file]\n", exit; } if( exists $opts{'h'} ) { print <<"END"; This program is a significantly more powerful version of the fold utility with a lot more functionality. In fact, it is more of a formatting utility. If multiple files are specified on the command line, an empty line is inserted between each. Can justify text, format line by line, by paragraph or the entire input as one paragraph. Print name of file, page number. Add carriage returns, form feed. Do block indent, specify left margin width. Squeeze out extra lines, squeeze out extra spaces. Add indentation to first line of paragraph. See the different flags to get a better idea. Output always goes to stdout unless input files are specified on the command line and -I flag is used. This paragraph was formatted with the -pj options of this utility. Some options cannot be used with others. Usage: $0 [-FIMPSTbcefjps] [-i num] [-m num] [-n num] [-t num] [-w num] [file] -F Print a formfeed after each page; page size is specified with -n -I In place edit the file; only works if files are specified on the command line; no need to redirect output -M Format in merge mode; the entire input is considered to be one paragraph; all lines are merged into a single paragraph exclusive of -p -P Print the page number after every n - 2 lines of text; number of lines is specified with -n -S Squeeze out extra empty lines -T Do not expand tabs; by default tabs are expanded according to -t tabstops -b Block indent, keeps the same indent on every line as the first -c Force a carriage return; useful when printing to a printer, or want to convert from Unix to DOS -e A paragraph is delimited by an empty line only; this also turns on -p option for paragraph formatting; see -p -f Print the name of the file; only works if input isn't read from stdin -in Set first line indentation to 'n' spaces -j Justify the right side to align at -w characters; cannot be used with -T or -w0; inserts extra interstitial(between words) space to expand the line -mn Set left margin to 'n' spaces; default is 0 -nn Set page size to 'n'; default is 59; minimum is 24; default should be good for a printer -p Format in paragraph mode; paragraph defined as a bunch of lines of text followed by an empty line or a line that begins with white space(indented line), whichever comes first; exclusive of -a; see -e -s Squeeze out extra interstitial white space(between words); except at start of line -tn Set tabstops to 'n'; default is 8 -wn Set text width to 'n'; default is 72; if -w0 is specified then lines will not be wrapped; -w0 cannot be used with -j option END exit; } if( exists $opts{'I'} && @ARGV == 0 ) { print "-I cannot be used when reading from stdin\n"; exit; } if( exists $opts{'M'} && ( exists $opts{'p'} || exists $opts{'e'} ) ) { print "-M and (-p or -e) are exclusive\nUse -h to get help\n"; exit; } if( exists $opts{'t'} && ( $opts{'t'} !~ /^\d+$/ || $opts{'t'} == 0 ) ) { print "-t takes an integer argument; argument must be greater than 0\n"; exit; } $opts{'t'} = 8 unless exists $opts{'t'}; if( exists $opts{'w'} && $opts{'w'} !~ /^\d+$/ ) { print "-w takes an integer argument\n"; exit; } $opts{'w'} = 72 unless exists $opts{'w'}; if( exists $opts{'n'} && ( $opts{'n'} !~ /^\d+$/ || $opts{'n'} < 24 ) ) { print "-n takes an integer argument; argument must be greater than 23\n"; exit; } $opts{'n'} = 59 unless exists $opts{'n'}; if( exists $opts{'m'} && $opts{'m'} !~ /^\d+$/ ) { print "-m takes an integer argument\n"; exit; } $opts{'m'} = 0 unless exists $opts{'m'}; if( exists $opts{'i'} && $opts{'i'} !~ /^\d+$/ ) { print "-i takes an integer argument\n"; exit; } if( exists $opts{'j'} && ( exists $opts{'T'} || $opts{'w'} == 0 ) ) { print "The -j option is exclusive of -T or -w0\nUse -h to get help\n"; exit; } if( $opts{'w'} > 0 && $opts{'m'} >= $opts{'w'} ) { print "-m must be less than -w, unless -w is 0\n"; exit; } if( exists $opts{'i'} && $opts{'w'} > 0 && $opts{'m'} + $opts{'i'} >= $opts{'w'} ) { print "-m + -i must be less than -w, unless -w is 0\n"; exit; } $opts{'p'} = 1 if exists $opts{'e'} && ! exists $opts{'p'}; $opts{'s'} = 1 if exists $opts{'j'}; $| = 1; $lines = 0; $page = 1; $cr = exists $opts{'c'} ? "\r" : ''; main_sub( @ARGV ); # # pseudo main routine to cut down on global vars # # parameters # argv - command line args # # returns # nothing # sub main_sub { my @argv = @_; my ( $file , $indent ); if( @argv == 0 ) { read_file( \*STDIN ); check_lines( 1 ); } else { while( defined( $file = shift @argv ) ) { open FILE , $file or next; if( length( $file ) < $opts{'w'} ) { $indent = ( $opts{'w'} - length( $file ) ) / 2; $indent = ' ' x $indent; } else { $indent = ''; } if( exists $opts{'I'} ) { open OUTPUT , ">$file.~~~"; select OUTPUT; } if( exists $opts{'f'} ) { print $indent , $file , "$cr\n$cr\n"; $lines = 2; } read_file( \*FILE ); close FILE; if( exists $opts{'I'} ) { close OUTPUT; rename "$file.~~~" , $file; } check_lines( 1 ); if( @argv > 0 ) { $lines = 0; $page = 1; if( exists $opts{'F'} ) { print "\f"; } else { print "$cr\n"; } } } } } # # reads the file and calls reformat() as appropriate # # parameters # fh - the file handle to read from # # returns # nothing # sub read_file { my $fh = shift; local ( $_ ); my ( @lines , $was_empty ); # all lines mode format if( exists $opts{'M'} ) { @lines = <$fh>; @lines = rem_lead( @lines ); reformat( @lines ); } # single line format elsif( ! exists $opts{'p'} ) { while( <$fh> ) { if( /^\s*$/ ) { unless( exists $opts{'S'} && $was_empty ) { print "$cr\n"; check_lines( 0 ); } $was_empty = 1; } else { s/\s+$//; reformat( $_ ); $was_empty = 0; } } } # paragraph format else { while( <$fh> ) { if( /^\s*$/ && @lines == 0 && ( ! exists $opts{'S'} || $was_empty ) ) { print "$cr\n"; check_lines( 0 ); } if( @lines > 0 && ( ( exists $opts{'e'} && /^\s*$/ ) || ( ! exists $opts{'e'} && ( /^\s+\S/ || /^\s*$/ ) ) ) ) { @lines = rem_lead( @lines ) if exists $opts{'e'}; reformat( @lines ); @lines = (); if( /^\s*$/ ) { print "$cr\n"; check_lines( 0 ); } } unless( /^\s*$/ ) { s/\s+$/ /; push @lines , $_; $was_empty = 0; } } reformat( @lines ) if @lines > 0; } } # # formats the data sent to it # # paramaters # @_ - the lines? to reformat # # returns # nothing, prints the lines instead # sub reformat { my $line = join '' , @_; my @words = split /(\s+)/ , $line; my ( $word , $len , @white , $white , $lead , $margin ); $margin = ' ' x $opts{'m'}; $line = $margin; $lead = ''; shift @words if length( $words[ 0 ] ) == 0; if( exists $opts{'i'} ) { unshift @words , ' ' if $words[ 0 ] !~ /^\s+$/; $words[ 0 ] = ' ' x $opts{'i'}; } $lead = $words[ 0 ] if exists $opts{'b'} && $opts{'w'} > 0 && $words[ 0 ] =~ /^\s+$/; for $word ( @words ) { if( $word =~ /^\s+$/ ) { if( ! exists $opts{'s'} || length( $line ) == 0 ) { @white = split // , $word; for $white ( @white ) { if( $white eq ' ' || $white eq "\n" ) { $line .= ' '; } elsif( $white eq "\t" ) { if( ! exists $opts{'T'} ) { $len = length $line; $len = $opts{'t'} - ( $len % $opts{'t'} ); $line .= ' ' x $len; } else { $line .= "\t"; } } } } else { $line .= ' '; } } else { if( get_length( $line ) + length( $word ) <= $opts{'w'} || $line =~ /^\s*$/ || $opts{'w'} == 0 ) { $line .= $word; } else { output_line( $line , 1 ); $line = $margin . $lead . $word; } } } if( $line =~ /\S/ ) { output_line( $line , 0 ); } } # # computes the length of a line, accounts for tabstops if -T is present # # paramaters # line - the line whose length needs computing # # returns # length of line # sub get_length { my $line = shift; my $len; if( $line =~ tr/\t// ) { my @ch = split // , $line; my $ch; $len = 0; for $ch ( @ch ) { if( $ch ne "\t" ) { ++$len; } else { $len += $opts{'t'} - ( $len % $opts{'t'} ); } } } else { $len = length( $line ); } $len; } # # justifies text to both margins # # parameters # line - the line to justify # # returns # the line # sub justify { my $line = shift; my @words = split /\s+/ , $line; shift @words if length( $words[ 0 ] ) == 0; return $line if @words < 2; my $spaces = $opts{'w'} - length( $line ); my $space = $spaces / ( @words - 1 ); $space = ' ' x $space; $spaces %= $#words; $line =~ s/(\S) /$1 $space/g; if( $spaces ) { my @words = split /(\s+)/ , $line; shift @words if length( $words[ 0 ] ) == 0; for( my $i = $#words - 1 ; $spaces ; $i -= 2 , --$spaces ) { $words[ $i ] .= ' '; } $line = join '' , @words; } $line; } # # just checks to see if certain options are set and calls subroutines as # needed, then it prints the line # # parameters # line - line to print # # returns # nothing # sub output_line { my ( $line , $justify ) = @_; $line =~ s/\s+$//s; if( exists $opts{'j'} && $justify && ! exists $opts{'T'} && $opts{'w'} > 0 ) { $line = justify( $line ); } print $line , "$cr\n"; check_lines( 0 ); } # # check if we need to print page number of form feed # # parameters # $_[ 0 ] - 0 if we're in the middle of file, 1 at the end # # returns # nothing # sub check_lines { ++$lines unless $_[ 0 ]; my ( $indent , $output ); if( ( exists $opts{'P'} && $lines == $opts{'n'} - 2 ) || ( exists $opts{'F'} && $lines == $opts{'n'} ) || $_[ 0 ] && $lines > 0 ) { if( exists $opts{'P'} ) { $output = sprintf "Page %d" , $page; $indent = ( $opts{'w'} - length( $output ) ) / 2; print "$cr\n"; print ' ' x $indent; print "$output$cr\n"; ++$page; } if( $opts{'F'} ) { print "\f"; } $lines = 0; } } # # remove leading and trailing white space from all lines except first # # parameters # @lines - lines from which to remove leading white space # # returns # the modified lines # sub rem_lead { my @lines = @_; my $i; $lines[ 0 ] =~ s/\s+$/ /; for( $i = 1 ; $i < @_ ; ++$i ) { $lines[ $i ] =~ s/^\s+//; $lines[ $i ] =~ s/\s+$/ /; } @lines; }