#!/usr/bin/perl -w # View an MSDN web page as a man page. For documentation, run this script # through pod2man. # Copyright (C) 2001 Nick Duffek # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # For a copy of the GNU General Public License, browse one of these URLs: # # http://www.gnu.org/licenses/gpl.html # http://nick.duffek.com/software/gpl.html # # or write to the Free Software Foundation, Inc., 59 Temple Place - Suite # 330, Boston, MA 02111-1307, USA. # Version number. $version = "msman version 0.3"; # Directories in which to store cached HTML and man pages. $cachedir = "/usr/local/share/msman"; $htmldir = "$cachedir/html"; $mandir = "$cachedir/man"; # Conversion engines, in order of preference. @converters = # docbook2man does the best job. (['docbook', \&docbook_init], # pod2man works fairly well. ['pod', \&pod_init]); # Regular expressions matching less desirable msdn.microsoft.com search hits, # sorted by increasing desirability. @srchjunk = ('/wce.*/', # Windows CE '/mapi/', # MAPI '/com/'); # COM # Path to temporary directory, created and deleted on the fly. $tmpdir = "/tmp/msman-tmp"; sub usage { my $me = $0; $me =~ s/^.*\///; $_[0] && print STDERR "$me: $_[0]\n"; print STDERR <<";"; Usage: $me [-cdehinoqs] -c force conversion -d turn on debugging -e only try conversion using (docbook or pod) -h display this message -i read HTML page from -n do not display page -o write man page to -q run quietly -s
use section number
(default 3) -v display the version number ; exit 1; } use Getopt::Long; Getopt::Long::Configure('bundling'); Getopt::Long::GetOptions('force|c' => \$opt_force, 'debug|d' => \$opt_debug, 'engine|e:s' => \$opt_engine, 'help|h' => \$opt_help, 'infile|i:s' => \$opt_infile, 'nodisplay|n' => \$opt_nodisplay, 'outfile|o:s' => \$opt_outfile, 'quiet|q' => \$opt_quiet, 'section|s:s' => \$opt_section, 'version|v' => \$opt_version); $opt_help && &usage(); $opt_version && &version; @ARGV == 1 || &usage("wrong number of arguments"); $arg_name = $ARGV[0]; use File::Path; &msman; exit 0; # Display this program's version number, then exit. sub version { print "$version\n"; exit 0; } # Download, convert, and display MSDN page $arg_name. sub msman { if ($opt_force || !-e &outpath) { my $in = &input; my($i, @cnvs); if (!$opt_engine) { @cnvs = @converters; } else { @cnvs = grep { $_->[0] eq $opt_engine } @converters; @cnvs || &usage("unrecognized engine \"$opt_engine\""); } for ($i = 0; $i < @cnvs; $i++) { my $cnv = &convert_init($cnvs[$i][1]) || next; &convert($cnv, $in); &output($cnv) || next; last; } $i < @cnvs || die "no conversion engine found\n"; } $opt_nodisplay || &display; } # Read the HTML page and return its contents. sub input { my $htmlpath = "$htmldir/$arg_name.html"; if ($opt_infile) { $in = &readfile($opt_infile); } elsif (!$opt_force && -e $htmlpath) { $in = &readfile($htmlpath); } else { use LWP::UserAgent; use HTTP::Request; my $ua = new LWP::UserAgent; $ua->agent("$version " . $ua->agent); # Search for $arg_name at microsoft.com. my $srchhost = "http://search.microsoft.com"; my $srchurl = "$srchhost/us/dev/default.asp?" . join("&", "qu=$arg_name", "boolean=ALL", "nq=NEW", "so=RECCNT", "p=1", "ig=01", map { sprintf "i=%02d", $_ } (0..51)); my @urls; # Iterate through pages of 10 search results each. for (my $i = 1;; $i++) { &info("Querying " . ($opt_debug ? $srchurl : $srchhost) . ($i > 1 ? " page $i" : "")); my($req, $res) = &webget($ua, $srchurl); # Find hits whose titles are exactly $arg_name. @urls = $res->content =~ m!]*href="([^\"]+)"[^<>]*>$arg_name!g; $opt_debug && &writefile("msman-dbg$i.html", $res->content); # Choose among multiple candidates. if (@urls > 1) { my @weighted = map { my $i; for ($i = 0; $i < @srchjunk; $i++) { last if /$srchjunk[$i]/; } [$i, $_]; } @urls; @urls = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @weighted; } last if @urls; # Try the next 10 hits. if ($res->content !~ m!]*href="([^\"]+)" ([^<]|<(font|img))*right-arrow!x) { &webdie($req, $res, "unable to find $arg_name in search results"); } $srchurl = "$srchhost$1"; $srchurl =~ s/&/&/g; } # Follow the best search hit. &info("Fetching frame"); ($req, $res) = &webget($ua, $urls[0]); # The returned page should be a frame layout, with the page we want # referenced in the right-hand frame. if ($res->content !~ m!fraRightFrame.*url=([^\s\042]+)!) { &webdie($req, $res, "unable to find page frame"); } my $url = "http://msdn.microsoft.com$1"; &info("Fetching page"); ($req, $res) = &webget($ua, $url); $in = $res->content; # Store the page in the cache. -d $htmldir || File::Path::mkpath($htmldir, 0, 0775) || die "mkpath $htmldir: $!"; open(HTML, ">$htmlpath") || die "error opening $htmlpath: $!\n"; print(HTML "\n", $in) || die "write $htmlpath: $!\n"; close(HTML) || die "close $htmlpath: $!\n"; } # Simplify regex parsing. $in =~ s/\r//g; $in; } # Attempt to get URL $_[1] using user agent $_[0]. If successful, return the # request and result objects, else exit with an appropriate error message. sub webget { my($ua, $url) = @_; my $req = HTTP::Request->new(GET => $url); my $res = $ua->request($req); if (!$res->is_success) { &webdie($req, $res, "error fetching URL: ", $res->message); } ($req, $res); } # URL request $_[0] has failed with result $_[1]. Print message @_[2..], # then exit. sub webdie { my($req, $res, @msg) = @_; if ($opt_debug) { foreach my $key (keys %{$res->headers}) { print STDERR "header $key => " . $res->headers->{$key} . "\n"; } print STDERR "code = <" . $res->code . ">\n"; &writefile("/tmp/msman-dbg.html", $res->content); } die join('', @msg) . "\n"; } # Convert the document in conversion object $_[0] from intermediate to man # format. Return whether the conversion was successful. sub output { my $cvt = $_[0]; &info("Converting $cvt->{Name} to man"); my $out = &{$cvt->{ToMan}}($cvt, &outsect) || return 0; # Create the man page cache directory if needed. if (!$opt_outfile) { my $dir = &outddir; -d $dir || File::Path::mkpath($dir, 0, 0775) || die "mkpath $dir: $!"; } &writefile(&outpath, $out); 1; } # Return the man page section. sub outsect { $opt_section || 3; } # Return the directory portion of the default man page path. sub outddir { "$mandir/man" . &outsect; } # Return the path to the man page. sub outpath { $opt_outfile || do { my $section = &outsect; &outddir . "/$arg_name.$section"; }; } # If intermediate format initializer $_[0] is valid, return a conversion # object initialized by $_[0]. Otherwise, return false. sub convert_init { my $init = $_[0]; my %cnv = # Accumulated output. (Out => [], # Whether we're within the SYNOPSIS section. InSynopsis => 0); &{$init}(\%cnv) ? \%cnv : 0; } # Return whether DocBook can be used as an intermediate format, and if so, # initialize conversion object $_[0] accordingly. sub docbook_init { my $cvt = $_[0]; # Check whether nsgmls is available. &findbin("nsgmls") || return 0; # Check for docbook2man.pl. system("sgmlspl docbook2man-spec.pl /dev/null 2>/dev/null"); &run_postmortem() || return 0; # For displaying informational messages. $cvt->{Name} = 'DocBook'; # DocBook-to-man conversion function. $cvt->{ToMan} = \&docbook_toman; # Initial text to output. $cvt->{Preamble} = <<";"; $arg_name 3 $arg_name ; # Final text to output. $cvt->{Postlude} = sub { $_[0]{DbInRefSect1} && "\n\n" }; # Whether we're within a RefSect1. $cvt->{DbInRefSect1} = 0; # Formatting directives before the NAME section's first line, before # its second and subsequent lines, and after its second and # subsequent lines. $cvt->{AfterName1} = "\n\n"; $cvt->{BeforeName2} = "\n\n\n"; $cvt->{AfterName2} = "\n"; # Formatting directives before and after the SYNOPSIS section. $cvt->{BeforeSynopsis} = "\n\nSynopsis\n\n"; $cvt->{AfterSynopsis} = "\n"; # Function to convert its entity argument to DocBook format. $cvt->{Entity} = \&docbook_entity; # Function to convert its tag, begin, and attributes arguments to # DocBook format. $cvt->{Tag} = \&docbook_tag; # HTML tags versus DocBook equivalents. Each equivalent is one of: # string: an equivalent DocBook begin and end tag # array: a pair of equivalent DocBook begin/end tags or tag lists # code: a function that outputs DocBook code for HTML tag $_[0] $cvt->{DbHtmlRpl} = { UL => ItemizedList, OL => OrderedList, DL => VariableList, DT => [[VarListEntry, Term], undef], DD => [[ListItem, Para], [ListItem, VarListEntry]], B => Function, I => Replaceable, P => [Para, undef], TR => [Para, undef], LI => [[ListItem, Para]], PRE => \&docbook_html_rpl_PRE, H4 => \&docbook_html_rpl_H4 }; # "BR => Para" yields extra vertical space e.g. at the ends of lists. # Em-dash and en-dash. $cvt->{Mdash} = "—"; $cvt->{Ndash} = "–"; 1; } # Return the result of converting entity $_[1] to DocBook format. sub docbook_entity { my($cvt, $entity) = @_; # docbook2man doesn't handle nbsp. if ($entity eq 'nbsp') { " "; } else { "&$entity;"; } } # Return the result of converting tag $_[1], begin indicator $_[2], and # attributes $_[3] to DocBook format. sub docbook_tag { my($cvt, $tag, $begin, $attr) = @_; my $rpl = $cvt->{DbHtmlRpl}{$tag} || return ''; my @tags; if (!ref $rpl) { @tags = ($rpl); } elsif (ref($rpl) eq ARRAY) { my $tags = @{$rpl} == 1 ? $rpl->[0] : $rpl->[!$begin] || return ''; @tags = ref($tags) ? @{$tags} : ($tags); if (!$begin && @{$rpl} == 1) { @tags = reverse @tags; } } else { return &{$rpl}($cvt, $tag, $begin, $attr); } join('', map { "<" . ($begin ? "" : "/") . "$_>" } @tags); } # Return the result of converting tag $_[1] = PRE, begin indicator $_[2], and # attributes $_[3] to DocBook format. sub docbook_html_rpl_PRE { my($cvt, $tag, $begin, $attr) = @_; if ($cvt->{InSynopsis}) { ''; } else { "<" . ($begin ? "" : "/") . "ProgramListing>\n"; } } # Return the result of converting tag $_[1] = H4, begin indicator $_[2], and # attributes $_[3] to DocBook format. sub docbook_html_rpl_H4 { my($cvt, $tag, $begin, $attr) = @_; if (!$begin) { "\n"; } else { my $out = $cvt->{DbInRefSect1} ? "\n\n" : ""; $cvt->{DbInRefSect1} = 1; $out . "\n"; } } # Return the result of converting the DocBook text in conversion object $_[0] # to man section $_[1] format. sub docbook_toman { my($cvt, $section) = @_; # docbook2man infers the output file name from the man page name, so run # it in a temporary directory. my $prevwd = &pushdtmp; # Delete empty paragraphs, which can accumulate as artifacts of the # translation process. my $out = join('', @{$cvt->{Out}}); $out =~ s!(<Para>\s*)+(<Para>)!$2!gm; $out =~ s!<Para>\s*</Para>!!gm; open(NSGMLS, "| nsgmls | sgmlspl docbook2man-spec.pl") || die "fork nsgmls: $!"; print(NSGMLS $out) || die "write nsgmls: $!"; close(NSGMLS) || die "close nsgmls: $!"; &run_postmortem("nsgmls"); my $file = "$arg_name." . &outsect; $out = &readfile($file); unlink $file, 'manpage.links', 'manpage.log', 'manpage.refs'; &popdtmp($prevwd); # Delete empty .SH directive generated from <Title>, which is # needed in NAME section to avoid docbook2man warning. $out =~ s/^\.SH ""\n//m; # Delete "name \-" inserted by docbook2man. $out =~ s/^$arg_name \\- //m; $out; } # Return whether POD can be used as an intermediate format, and if so, # initialize conversion object $_[0] accordingly. sub pod_init { my $cvt = $_[0]; # Check whether pod2man is available. &findbin("pod2man") || return 0; # For displaying informational messages. $cvt->{Name} = 'POD'; # POD-to-man conversion function. $cvt->{ToMan} = \&pod_toman; # Number of subsequent output operations that should begin new # paragraphs. $cvt->{PodPar} = 0; # Whether one or more non-paragraph output newlines are pending. $cvt->{PodNls} = 0; # Whether no output has been emitted yet. $cvt->{PodBof} = 1; # Whether all output has been emitted. $cvt->{PodEof} = 0; # Whether to convert subsequent output to upper case. $cvt->{PodUpcase} = 0; # Whether we're within a
 section.
    $cvt->{PodInPRE} = 0;

    # Whether to prepend whitespace to each line.
    $cvt->{PodPreWs} = 0;

    # Function to modify its string parameter immediately before it gets
    # output.
    $cvt->{OutFilter} = \&pod_outfilter;

    # Formatting directives before the NAME section.  Add name with hyphen to
    # avoid warnings from some versions of pod2man.
    $cvt->{BeforeName1} = sub {
	&pod_startpar($_[0], "=head1 NAME\n\n$arg_name - ") };

    # Formatting directives before the SYNOPSIS section.
    $cvt->{BeforeSynopsis} = sub {
	&pod_par($_[0], "=head1 SYNOPSIS") };

    # Function to convert its entity argument to POD format.
    $cvt->{Entity} = \&pod_entity;

    # Entity names versus literal translations.
    $cvt->{PodEntityRpl} = {
	'nbsp' => ' ',		# not handled by
	'amp' => '&'		# pod2man doesn't parse E<> in indented lines
    };

    # Function to convert its tag, begin, and attributes arguments to
    # POD format.
    $cvt->{Tag} = \&pod_tag;

    # Em-dash and en-dash.
    $cvt->{Mdash} = "--";
    $cvt->{Ndash} = "-";

    # Stack of nested lists currently being converted.
    $cvt->{PodLists} = [];

    # Final text to output.
    $cvt->{Postlude} = sub { $cvt->{PodEof} = 1; "\n"; };

    1;
}

# Return text $_[1] as the start of a new paragraph.
sub pod_startpar {
    my($cvt, $txt) = @_;
    $cvt->{PodPar} = 1;
    $txt;
}

# Return text $_[1] as a new paragraph.
sub pod_par {
    my($cvt, $txt) = @_;
    $cvt->{PodPar} = 2;
    $txt;
}

# Return the result of converting entity $_[1] to POD format.
sub pod_entity {
    my($cvt, $entity) = @_;

    if (substr($entity, 0, 1) eq "#") {
	$entity = substr($entity, 1);
    }
    if (my $rpl = $cvt->{PodEntityRpl}{$entity}) {
	$rpl;
    } else {
	"E<$entity>";
    }
}

# Return the result of converting tag $_[0], begin indicator $_[1], and
# attributes $_[2] to POD format.
sub pod_tag {
    my($cvt, $tag, $begin, $attr) = @_;

    if ($tag eq PRE) {
	$cvt->{PodInPRE} = $begin;
	if (!$cvt->{InSynopsis}) {
	    $cvt->{PodPreWs} = $begin;
	    &pod_startpar($cvt);
	}
    }
    elsif ($tag eq UL || $tag eq OL || $tag eq DL) {
	if ($begin) {
	    push(@{$cvt->{PodLists}}, { Type => $tag, Count => 0 });
	    &pod_par($cvt, "=over 4");
	} else {
	    pop @{$cvt->{PodLists}};
	    &pod_par($cvt, "=back");
	}
    }
    elsif ($tag eq I || $tag eq B) {
	# pod2man doesn't correctly handle bold/italic spanning newlines in
	# the synopsis.
	$cvt->{InSynopsis} ? "" :
	    $begin ? "$tag<" : ">";
    }
    elsif ($begin && $tag eq H4) {
	$cvt->{PodUpcase} = 2;
	&pod_startpar($cvt, "=head1 ");
    }
    elsif (!$begin && $tag eq H4) {
	$cvt->{PodUpcase} = 0;
	&pod_startpar($cvt);
    }
    elsif ($begin && ($tag eq BR || $tag eq P || $tag eq TR || $tag eq DD)) {
	&pod_startpar($cvt);
    }
    elsif ($begin && ($tag eq LI || $tag eq DT)) {
	@{$cvt->{PodLists}} || &fatal("item outside list");
	my $list = $cvt->{PodLists}[@{$cvt->{PodLists}} - 1];

	if ($list->{Type} eq UL) {
	    &pod_par($cvt, "=item *");
	}
	elsif ($list->{Type} eq OL) {
	    &pod_par($cvt, "=item " . ++$list->{Count} . ".");
	}
	else {
	    &pod_startpar($cvt, "=item ");
	}
    }
    else {
	'';
    }
}

# Modify output string $_[1] as appropriate for POD.
sub pod_outfilter {
    my($cvt, $str) = @_;

    # Delete whitespace at the beginning of lines, except within 
.
    # This prevents extra blank lines from some versions of pod2man.
    if (!$cvt->{PodInPRE}) {
	if ($cvt->{PodNls} || $cvt->{PodPar}) {
	    $str = "\n" . $str;
	}
	$str =~ s/\n[ \t]+/\n/g;
    }

    # Strip leading newlines.
    $str =~ /\A(\n*)/;
    my $nls = length $1;
    $cvt->{PodNls} ||= $nls;
    $str = substr($str, $nls);

    # Emit non-paragraph newlines compressed to 1.
    if (!$cvt->{PodPar}) {
	$str = ($cvt->{PodNls} ? "\n" : "") . $str;
    }

    # Emit paragraphs separators as double newlines.
    elsif (!$cvt->{PodBof}) {
	$str = "\n\n" . $str;
    }

    # Postpone trailing newlines until the next output.
    if (!$cvt->{PodEof}) {
	$str =~ /(\n*)\Z/;
	$cvt->{PodNls} = length $1;
	$str = substr($str, 0, length($str) - $cvt->{PodNls});
    }

    if ($str) {
	$cvt->{PodPar} && $cvt->{PodPar}--;
	$cvt->{PodBof} = 0;
	if ($cvt->{PodUpcase} > 1) {
	    $cvt->{PodUpcase}--;
	}
	elsif ($cvt->{PodUpcase}) {
	    $str = uc $str;
	}
	if ($cvt->{PodPreWs}) {
	    $str =~ s/\n([^\n])/\n $1/gm;
	}
    }
    $str;
}

# Return the result of converting the POD text in conversion object $_[0]
# to man section $_[1] format.
sub pod_toman {
    my($cvt, $section) = @_;

    # pod2man infers the man page name from the input file path, so run
    # it in a temporary directory.
    my $prevwd = &pushdtmp;
    &writefile($arg_name, @{$cvt->{Out}});
    my $cmd = "pod2man --lax --section=$section --center ' '" .
	" --release ' ' $arg_name";
    my $out = `$cmd`;
    &run_postmortem("pod2man");

    unlink $arg_name;
    &popdtmp($prevwd);

    # Delete name with hyphen that we added to the NAME section.
    $out =~ s/^$arg_name \\- //m;

    $out;
}

# Warn of malformed input.
sub html_malformed {
    my($str, $off, $len) = @_;
    my $eof = $off + $len > length $str;
    if ($eof) {
	$len = length($str) - $off;
	$len <= 0 and ($str, $off, $len) = ("", 0, 5);
    }
    &err("malformed input: \"" . substr($str, $off, $len) . "\"");
    $eof || '';
}

# Convert HTML input $_[1] according to conversion object $_[0].
sub html_convert {
    my($cvt, $html) = @_;
    my $lastpos = 0;

    for ($lastpos = 0; $html =~ m!([&<\226\227])!g; $lastpos = pos $html) {
	my $thispos = pos $html;
	my $str = substr($html, $lastpos, $thispos - $lastpos - 1);

	# Literal text.
	if (length $str) {
	    &out($cvt, $str);
	}

	# Entities.
	if ($1 eq '&') {
	    if ($html !~ m!([a-z]+|\#\d+);!g) {
		&html_malformed($html, $thispos - 1, 6);
	    } else {
		&out($cvt, &{$cvt->{Entity}}($cvt, $1));
	    }
	    next;
	}

	# Microsoft uses special byte codes for en-dash and em-dash.
	if ($1 eq "\226") {
	    &out($cvt, $cvt->{Ndash});
	    next;
	}
	if ($1 eq "\227") {
	    &out($cvt, $cvt->{Mdash});
	    next;
	}

	# Parse the rest of the HTML tag.
	if ($html !~ m!(/)?(\w+)([^>]*)>!g) {
	    &html_malformed($html, $thispos - 1, 20) && last;
	    next;
	}
	my($tag, $begin, $attr) = (uc $2, !$1, $3);

	# Translate the tag.
	&out($cvt, &{$cvt->{Tag}}($cvt, $tag, $begin, $attr));
    }

    if ($lastpos != length($html)) {
	&out($cvt, substr($html, $lastpos));
    }
}

# Convert $_[1] according to conversion object $_[0].
sub convert {
    my($cvt, $in) = @_;

    &info("Converting HTML to $cvt->{Name}");
    my $ce = $in =~ m!

]+>(\w+)

!g ? 0 : $in =~ m!

\s*]+>(\w+)\s*

!g ? 1 : &fatal("can't find page name"); my $name_begin = pos $in; my $tail; my $obsolete = 0; # Output the preamble, title, and start of the NAME section. &out($cvt, $cvt->{Preamble}); my($name_html, $synopsis_begin, $tail_begin); # Find the NAME text and SYNOPSIS start. if ($ce) { $in =~ m!(

At a Glance

)!g || &fatal("can't find name end"); $name_html = substr($in, $name_begin, pos($in) - length($1) - $name_begin); $in =~ m!

Syntax

!g || &fatal("can't find synopsis"); $synopsis_begin = pos($in); $in =~ m!(

)!g || &fatal("can't find synopsis end"); $tail_begin = pos($in) - length $1; } elsif ($in !~ m!(|class=syntax>)[.*)?!g || &fatal("can't find synopsis end"); $tail_begin = pos $in; } # RefPurpose must be a single line, so extract the first line from the # NAME section and then emit an empty RefSect1 for the rest. if ($name_html) { $name_html =~ /\A(\s*(?:

)?\s*(\S.*))/ || &fatal("can't find first line of NAME"); my $line = $2; $name_html = substr($name_html, length $1); &out($cvt, $cvt->{BeforeName1}); &html_convert($cvt, $line); &out($cvt, $cvt->{AfterName1}); if ($name_html =~ /\S/) { &out($cvt, $cvt->{BeforeName2}); &html_convert($cvt, $name_html); &out($cvt, $cvt->{AfterName2}); } } # Output the synopsis. if (!$obsolete) { &out($cvt, $cvt->{BeforeSynopsis}); my $synopsis = substr($in, $synopsis_begin, $tail_begin - $synopsis_begin); $cvt->{InSynopsis} = 1; &html_convert($cvt, $synopsis); $cvt->{InSynopsis} = 0; $tail = substr($in, $tail_begin); &out($cvt, $cvt->{AfterSynopsis}); if (!$ce && $tail !~ m!\A\s*/

/; # Delete request for feedback and CD-ROM advertisement, but keep # copyright. $tail =~ s!Copyright (c)$2!x; # Output the rest. &html_convert($cvt, $tail); &out($cvt, $cvt->{Postlude}); } # Append @_[1..] to conversion object $_[0], treating elements as follows: # (1) if scalar, then output as-is; # (2) if undefined, then ignore; # (3) if code, then call code with no args and goto (1) sub out { my($cvt, @elts) = @_; for (my $i = 0; $i < @_; $i++) { my $elt = $elts[$i]; if (!ref $elt) { if (defined $elt) { if ($cvt->{OutFilter}) { $elt = &{$cvt->{OutFilter}}($cvt, $elt); } push(@{$cvt->{Out}}, $elt); } } elsif (ref($elt) eq CODE) { $elts[--$i] = &{$elt}($cvt); } else { &fatal("unrecognized output element type: $elt"); } } } # Return commands to prepend to nroff input. sub nroff_prepend { # The man package maintained by Andries Brouwer prepends: # - a .ll command to set the line length, needed to override nroff's # assumption of 80 columns; # - a .pl command to specify a huge page length, needed to avoid page # headings interleaved with screen output. # This function does the same thing, using logic copied from man-1.5j. # Tell nroff how many columns to use. my @prepend = (); my $width = ($ENV{MANWIDTH} || (-t STDOUT && eval "use Term::ReadKey; (GetTerminalSize())[0]") || $ENV{COLUMNS} || 80); if ($width < 66 || $width > 80) { my $ll = $width * 9 / 10; push(@prepend, sprintf ".ll %d.%di", $ll / 10, $ll % 10); } # Avoid page headings. my $pl = ($ENV{MANPL} || -t STDOUT && "1100i" || "11i"); push(@prepend, sprintf ".pl $pl"); join("\n", @prepend, ''); } # Display man page $arg_name. sub display { # Pipe through pager if writing to a tty. my $pager = ''; if (-t STDOUT) { foreach ($ENV{MANPAGER}, $ENV{PAGER}, 'less', 'more') { &findbin($_) || next; $pager = " | $_"; last; } } my $roff = &nroff_prepend . &readfile(&outpath); my $cmd = "nroff -man$pager"; open(NROFF, "|$cmd") || die "fork nroff: $!"; print(NROFF $roff) || die "write nroff: $!"; close(NROFF) || die "close nroff: $!"; &run_postmortem($cmd); } # Return the contents of the file at path $_[0]. Exit on error. sub readfile { my $path = $_[0]; open(IN, $path) || die "open $path: $!"; local $/ = undef; my $buf = || ''; close IN; $buf; } # Write @_[1..] to path $_[0]. Exit on error. sub writefile { my($path, @out) = @_; open(OUT, ">$path") || die "open $path: $!"; print(OUT @out) || die "write $path: $!"; close(OUT) || die "close $path: $!"; } # Chdir to $tmpdir, creating it first if necessary, and return the previous # cwd. sub pushdtmp { use Cwd; my $prevwd = getcwd() || die "getcwd: $!"; mkdir($tmpdir, 0777); chdir($tmpdir) || die "chdir $tmpdir: $!"; $prevwd; } # Undo the effects of the most recent pushdtmp(), which returned $_[0]. sub popdtmp { chdir($_[0]) || die "chdir $_[0]: $!"; rmdir $tmpdir; } # If the most recent subprocess exited normally, return 1. Otherwise, if # !$_[0], return 0, else display an appropriate message with subprocess name # $_[0] and exit. sub run_postmortem { my $cmd = $_[0] || ''; use POSIX qw(uname WIFEXITED WEXITSTATUS WIFSIGNALED WTERMSIG); my $msg; if (WIFEXITED($?)) { my $status = WEXITSTATUS($?); if (!$status) { return 1; } $msg = "\"$cmd\" exited with status $status"; } elsif (WIFSIGNALED($?)) { $msg = "\"$cmd\" received signal " . WTERMSIG($?); } else { $msg = "\"$cmd\" exited with unrecognized status $?"; } $cmd || return 0; die "$msg\n"; } # Return whether program $_[0] appears to be available. sub findbin { my $prog = $_[0] || return 0; substr($prog, 0, 1) eq '/' && return -x $prog; grep { -x "$_/$prog" } split(':', $ENV{PATH}); } # Display message @_ unless running quietly. sub info { $opt_quiet || print @_, "\n"; } # Exit with error message @_. sub fatal { print STDERR "@_\n"; exit 1; } __END__ =head1 NAME msman - display an MSDN web page as a man page =head1 SYNOPSIS msman [B<-cehinoqs>] I =head1 DESCRIPTION B displays Microsoft Developer Network (MSDN) web page I as a UNIX-style man page. More particularly, it: =over 4 =item 1. searches for and downloads an MSDN web page with I as its title; =item 2. converts the page to POD or DocBook format; =item 3. converts the result to a man page using pod2man or nsgmls with docbook2man-spec.pl; =item 4. stores the converted man page in a cache directory; =item 5. displays the man page. =back =head1 OPTIONS =over 4 =item B<-c> Force download and conversion, even if the page already exists in the cache. =item B<-e> I Attempt conversion using I, which must be C or C. If this option is not specified, B tries DocBook conversion first and POD conversion second. =item B<-h> Display a usage message. =item B<-i> I Read the MSDN page from I instead of downloading it from the Internet. =item B<-n> Do not display the page after converting it. =item B<-o> I Write the man page to I instead of storing it in the cache. =item B<-q> Run quietly. =item B<-s> I

Use section number I
instead of the default C<3>. =back =head1 EXAMPLES View the CreateFile man page, downloading and generating it first if necessary: msman CreateFile Make sure that the cache contains a WIN32_FIND_DATA man page, then use man(1) to view it: msman -n WIN32_FIND_DATA MANPATH=/usr/local/share/msman/man man WIN32_FIND_DATA Generate a new FindFirstFile man page from the current MSDN web page, store it in F, and view it: msman -c -o /tmp/FindFirstFile.3 FindFirstFile =head1 ENVIRONMENT =over 4 =item MANPAGER Program for displaying the formatted man page when the standard output is a tty. =item PAGER Same as MANPAGER, but lower precedence. If neither variable is set, B tries C first, then C. =head1 FILES =over 4 =item F Cached man page directory. =item F Cached HTML page directory. =back =head1 BUGS When using POD conversion, B discards italic and bold formatting in the SYNOPSIS section to avoid strange line wrapping and filling by pod2man. B can't yet convert the MSDN web pages documenting certain interfaces, including COM and MAPI. Please send other bug reports to Enick@duffek.comE. =head1 SEE ALSO nsgmls(1), pod2man(1) =head1 AUTHOR Nick Duffek Enick@duffek.comE