#!/usr/bin/perl -w # pfc_convert - convert AOL .pfc files to other formats. # Copying # ======= # Copyright (C) 2001, 2007 Nick Duffek # Thanks to Rob Pemberton for AOL 9.0a/b/c patches. # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # For a copy of the GNU General Public License, browse one of these URLs: # # http://www.gnu.org/licenses/gpl.html # http://nick.duffek.com/software/gpl.html # # or write to the Free Software Foundation, Inc., 59 Temple Place - Suite # 330, Boston, MA 02111-1307, USA. # Overview # ======== # This program translates AOL 5.0 Personal Filing Cabinet (.pfc) files into # other formats. Specifically, it converts: # - Address Book .pfc files into comma-delimited text files suitable for # importing into Outlook Express # - Favorite Places .pfc files into HTML code suitable for importing into # Internet Explorer and probably Netscape and Mozilla # It may or may not work with .pfc files from other versions of AOL. Patches # and success/failure reports are welcome. # Usage # ===== # Run: # ./pfc_convert -{a|b|p} [ []] # where # is the .pfc file to convert # is the output file to create # Exactly one of the following options must be specified: # -a output comma-delimited text suitable for importing as address book # entries into Outlook Express # -b output HTML text suitable for importing as favorites into Internet # Explorer and probably Netscape and Mozilla # -p output the unconverted results of parsing the input file # Examples # ======== # To transfer an address book from AOL to Outlook Express: # # 1. Open the AOL address book and save it to a file, say # "Address Book.pfc" in "My Documents". # # 2. In Cygwin, run: # cd /cygdrive/c/My\ Documents # ./pfc_convert -a Address\ Book.pfc addr.csv # # 3. In Outlook Express, choose "File -> Import -> Other Address Book", # choose "Text File (Comma Separated Values)", and enter # "C:\My Documents\addr.csv" as the file to import. # To transfer a favorite places list from AOL to Internet Explorer: # # 1. Open the Favorite Places list and save it to a file, say # "Favorite Places.pfc" in "My Documents". # # 2. In Cygwin, run: # cd /cygdrive/c/My\ Documents # ./pfc_convert -b Favorite\ Places.pfc bookmark.htm # # 3. In Internet Explorer, choose "File -> Import and Export...", choose # "Import Favorites", and enter "C:\My Documents\bookmark.htm" as the # file to import. # AOL 5.0 PFC file layout # ======================= # # I determined the following by inspecting two .pfc files. If you have # corrections or additions, I'd be glad to hear about them. # # File offsets are stored as 4-byte little-endian integers. # # File header: # offset 0: literal "AOLVM100" # offset 16: 4-byte file offset of record pointer list # # Record pointer list: # offset 0: literal "RS\001\0" # offset 16: 0-terminated list of 4-byte record pointers # # Record: # offset 0: literal "RS\0\0" # offset 4: 4-byte record length minus 8 # offset 8: record contents # # There are at least 2 record types: # "attribute": collection of attributes describing a folder or entry in # Favorite Places or Address Book # "auxiliary": the Internet resource (URL or email message) described by a # Favorite Places entry, or the name, email address, and comments of an # Address Book entry # # Attribute record contents: # offset 8: 1 if the auxiliary record is in AOLH..AOLF format, 0 otherwise # offset 9: 1 if the following record pointer is an auxiliary record # containing more information about this entry, 0 otherwise # offset 18: 80-byte entry or folder title # # AOLH..AOLF format: # offset 0: literal "AOLH" # offset 4: 4-byte length , including AOLH and AOLF tags # offset 8: AOLH..AOLF fields # offset - 4: literal "AOLF" # # AOLH..AOLF field: # offset 0: 2-byte ordinal value # offset 2: 1-byte field type # # AOLH..AOLF field types and values: # 2,3: 2-byte integer, purpose unknown # 4: 4-byte integer, purpose unknown # 5,7: 4-byte length followed by -byte string # # Attribute records are 126 bytes long, excluding the header. They contain # various other binary data whose meaning I haven't deciphered. # # The first record pointer refers to an attribute record containing the # canonical path to the .pfc file, e.g. "C:\My Documents\Favorite Places.pfc". # # The second record pointer refers to an attribute record containing the # name of the entry collection, e.g. "Favorite Places" or "Address Book". # # The third and subsequent record pointers refer to attribute records # describing folders or pairs of attribute/auxiliary records describing # entries. sub usage { die "usage: $0 -{a|b|p} [ []]\n"; } @ARGV || usage(); if ($ARGV[0] eq '-a') { $output_format = 'Address'; } elsif ($ARGV[0] eq '-b') { $output_format = 'Bookmark'; } elsif ($ARGV[0] eq '-p') { $output_format = 'Parsed'; } else { usage(); } @ARGV > 3 && usage(); $nl = "\r\n"; # end-of-line sequence in output file $in = undef; # input file contents convert($ARGV[1], $ARGV[2]); exit 0; # Convert input .pfc file $_[0] to output file $_[1]. sub convert { my($inpath, $outpath) = @_; my @recs = parse($inpath); emit($outpath, @recs); } # Return the records stored in .pfc file $_[0]. sub parse { my $inpath = $_[0]; $in = readfile($inpath || '-'); # File header. expect_str(0, "AOLVM100" . "\0" x 8); my $listp = uint4(16); # Record pointer list. expect_str($listp, "RS\001\0\004\020\0\0\0\004\0\0\0\0\0"); $listp += 24; my @recs = (); # Scan through the list. for (;;) { my $recp = uint4($listp) || last; $listp += 4; my $reclen = rechdr($recp); $recp += 8; my $prev = @recs && $recs[@recs - 1]; if ($prev && !$prev->{IsFolder} && !defined $prev->{Data}) { if (!$prev->{AOLH} && $output_format eq 'Bookmark') { # Extract just the HTML link. $recp += 32; $reclen -= 43; } $prev->{Data} = str($recp, $reclen); $prev->{DataOff} = $recp; if ($prev->{AOLH}) { parse_AOLH($prev); } } else { push(@recs, { Title => str($recp + 18, 80), IsFolder => !uint1($recp + 9), AOLH => uint1($recp + 8) }); } } @recs; } # Issue message @_[2..] describing an error that occurred at offset $_[1] of # AOLH..AOLF string in record $_[0]. sub err_AOLH { my($rec, $off, @msg) = @_; err("in AOLH at $rec->{DataOff}, offset " . ($off - $rec->{DataOff}) . ": @msg"); } # Extract AOLH..AOLF constructs in record $_[0]. sub parse_AOLH { my $rec = $_[0]; my @fields = (); my $pos = $rec->{DataOff}; my $endpos = $pos + length $rec->{Data}; my @data = (); while ($pos < $endpos) { if ($pos + 12 > $endpos) { err_AOLH($rec, $pos, "not enough room for AOLH + len + AOLF"); last; } my $len = uint4($pos + 4); my $fpos = $pos + $len - 4; if (substr($in, $pos, 4) ne "AOLH") { err_AOLH($rec, $pos, "didn't find AOLH"); last; } if ($pos + $len > $endpos) { err_AOLH($rec, $pos, "AOLH length $len too big"); last; } if (substr($in, $fpos, 4) ne "AOLF") { err_AOLH($rec, $pos, "didn't find AOLF"); last; } $pos += 8; while ($pos < $fpos) { my $ord = uint2($pos); my $type = uint1($pos + 2); $pos += 3; my($dtype, $val); if ($type == 2 || $type == 3) { $dtype = Num; $val = uint2($pos); $pos += 2; } elsif ($type == 4) { $dtype = Num; $val = uint4($pos); $pos += 4; } elsif ($type == 5 || $type == 7) { $dtype = Str; $len = uint4($pos); $val = str($pos + 4, $len); $pos += 4 + $len; my $prev = @fields && $fields[@fields - 1]; if ($prev && ($prev->{Type} != 2 && $prev->{Type} != 3 || $prev->{Val})) { push(@data, "\n"); } push(@data, $val); } else { err_AOLH($rec, $pos, "unknown AOLH type $type"); last; } push(@fields, { Ord => $ord, Type => $type, Dtype => $dtype, Val => $val }); } if ($pos != $fpos) { err_AOLH($rec, $pos, "AOLH field parse didn't end at AOLF"); last; } $pos += 4; } $rec->{AOLH} = \@fields; $rec->{Data} = join('', @data); } # Emit records @[1..] to output file $_[0]. sub emit { my($outpath, @recs) = @_; if (!$outpath || $outpath eq '-') { open(OUT, '>&STDOUT') || die "dup stdout: $!"; } else { open(OUT, ">$outpath") || die "open $outpath: $!"; } if ($output_format eq 'Address') { emit_addresses(OUT, @recs); } elsif ($output_format eq 'Bookmark') { emit_bookmarks(OUT, @recs); } else { emit_parsed(OUT, @recs); } close(OUT) || die "close $outpath: $!"; } # Emit records @[1..] as address book entries to handle $_[0]. sub emit_addresses { my($handle, @recs) = @_; local(*HANDLE) = $handle; # Each record contains 6 AOLH fields, the first 4 of which are: my @tags = ([First, 'First Name'], [Last, 'Last Name'], [Email, 'E-mail Address'], [Notes, Notes]); print HANDLE join(",", map { $_->[1] } @tags), $nl; foreach my $rec (@recs) { if (!$rec->{AOLH}) { err("unexpected non-AOLH field: $rec->{Data}"); next; } my $aolh = $rec->{AOLH}; if (@{$aolh} != 6) { err("unexpected AOLH field count " . @{$aolh}); next; } my %vals = (); for (my $i = 0; $i < 4; $i++) { my $val; if ($aolh->[$i]{Dtype} ne Str) { err("unexpected numeric type $aolh->[$i]{Type}"); $val = ''; } else { $val = $aolh->[$i]{Val}; # Use double quotes to handle special characters. $val =~ s/\"/'/g; # "\"" Emacs colorizing if ($val =~ /,/) { $val = "\"$val\""; } } $vals{$tags[$i][0]} = $val; } if ($vals{Email} && $vals{Email} !~ /@/) { $vals{Email} .= '@aol.com'; } print HANDLE join(",", map { $vals{$_->[0]} } @tags), $nl; } } # Emit records @[1..] as bookmarks to handle $_[0]. sub emit_bookmarks { my($handle, @recs) = @_; local(*HANDLE) = $handle; print HANDLE join($nl, "", "", "Bookmarks", "

Bookmarks

", "

"), $nl; my $folder = 0; my $close_folder = "

$nl"; my $now = time; foreach my $rec (@recs) { # Internet Explorer requires ADD_DATE but not LAST_VISIT or # LAST_MODIFIED. if ($rec->{IsFolder} && !$rec->{AOLH}) { $folder && print HANDLE $close_folder; print(HANDLE "

$rec->{Title}

$nl", "

$nl"); $folder = 1; } # Skip AOLH-style records, which in Favorite Places I've only seen # used for storing email messages. elsif (!$rec->{AOLH}) { print(HANDLE " " x ($folder ? 8 : 4), "

{Data}\"", " ADD_DATE=\"$now\">$rec->{Title}$nl"); } } $folder && print HANDLE $close_folder; print HANDLE "

$nl"; } # Emit records @[1..] as untranslated data to handle $_[0]. sub emit_parsed { my($handle, @recs) = @_; local(*HANDLE) = $handle; for (my $i = 0; $i < @recs; $i++) { my $rec = $recs[$i]; print(HANDLE $i ? "\n" : "", "title: $rec->{Title}\n", "isfolder: ", 0 + $rec->{IsFolder}, "\n"); if (!$rec->{AOLH}) { print HANDLE "data: $rec->{Data}\n"; } else { foreach my $field (@{$rec->{AOLH}}) { print(HANDLE "AOLH: ord = $field->{Ord},", " type = $field->{Type},", " dtype = $field->{Dtype},", " val = $field->{Val}\n"); } } } } # Return whether the input file is long enough to contain $_[1] bytes at # offset $_[0]. sub lenok { my($off, $len) = @_; if ($off + $len <= length $in) { return 1; } err("input file too short for $len-byte object at offset $off"); 0; } # Return the unsigned $_[1]-byte little-endian integer at offset $_[0] in the # input file. sub uint { my($off, $len) = @_; lenok($off, $len) || return 0; my $fmt = $len == 4 ? V : $len == 2 ? 'v' : $len == 1 ? C : die "invalid integer length $len"; unpack($fmt, substr($in, $off, $len)); } # Return the 4-byte little-endian integer at offset $_[0] in the input file. sub uint4 { uint($_[0], 4); } # Return the 2-byte little-endian integer at offset $_[0] in the input file. sub uint2 { uint($_[0], 2); } # Return the 1-byte little-endian integer at offset $_[0] in the input file. sub uint1 { uint($_[0], 1); } # Parse the record header at offset $_[0] and return the record's length. sub rechdr { my $off = $_[0]; expect_str($off, "RS\0\0"); uint4($off + 4); } # Return the null-terminated up-to-$_[1]-byte string at offset $_[0] in the # input file. sub str { my($off, $len) = @_; lenok($off, $len) || return ''; my $str = substr($in, $off, $len); $str =~ s/\0+$//; $str; } # Expect offset $_[0] in the input file to contain string $_[1]. sub expect_str { my($off, $str) = @_; if (lenok($off, length $str)) { if ($str ne substr($in, $off, length $str)) { err("input doesn't match \"$str\" at offset $off"); } } } # Expect the $_[1] bytes at offset $_[0] in the input file to match regular # expression $_[2]. sub expect_re { my($off, $len, $match) = @_; if (lenok($off, $len)) { my $str = substr($in, $off, $len); if ($str !~ /$match/) { err("input doesn't match \"$match\" at offset $off"); } } } # Return the contents of the file at path $_[0]. Exit on error. sub readfile { my $path = $_[0]; if ($path eq '-') { open(IN, '<&STDIN') || die "dup stdin: $!"; } else { open(IN, $path) || die "open $path: $!"; } local $/ = undef; my $contents = ; close IN; $contents; } # Issue error message @_; sub err { print STDERR "@_\n"; }