#!/usr/bin/perl
# Version 0.1.3
# April 29, 2004
#
# The latest version of this file can be found at http://backspace.com/pagify
#
# This software is copyright (C) 2004 John Emerson. It is distributed
# under the terms of the GNU General Public License (GPL). Because it is
# licensed free of charge, there is NO WARRANTY, it is provided AS IS.
# The author can not be held liable for any damage that might arise from
# the use of this software. Use it at your own risk.
#
# See http://www.gnu.org/copyleft/gpl.html for more details.
#
# This script removes the cruft from Word's "Save as Web Page..." HTML output,
# splits the file into separate HTML pages wherever a "Heading 1 appears", and converts
# the endnotes (if any) to footnotes.
#
#
# How to use it:
#
# 1. "Save as Web Page..." from Microsoft Word.
# 2. Edit the file configuration information below.
# 3. Run!
#
# There's still work to be done, but it does make a useful
# first pass when coding big documents.
#
# For more information, visit http://backspace.com/pagify
#
$source_file = "/path/to/HTML_file_from_Word";
$output_root = "/path/to/output_directory/";
$title_front = qq|Document Title|;
$date_stamp = "April 2004";
$language = "en";
# currently avaiable: en, es, fr, pt
# for "next" "index" "previous" links
$verbose = "0";
# binary 1 or 0
$page_header1 = qq|
|;
$page_header2 = qq|
|;
$footnote_separator = qq|
|;
$page_footer = qq|
|;
##### End of configuration #####
if ($language eq "en" ) {
$previous = "previous";
$index = "index";
$next = "next";
}
if ($language eq "es" ) {
$previous = "precédente";
$index = "índice";
$next = "proximo";
}
if ($language eq "fr" ) {
$previous = "précédente";
$index = "index";
$next = "suivant";
}
if ($language eq "pt" ) {
$previous = "precedente";
$index = "índice";
$next = "seguinte";
}
# change line breaks from Mac to Unix
`tr '\015' '\012' < '$source_file' > '$source_file.tmp'`;
`mv '$source_file.tmp' '$source_file'`;
$output_root =~ s/(\.htm|\.html)//;
$n = "0";
$p = "0";
if ($verbose == "1") { print "Reading file\n"; }
open (SOURCE, "<$source_file") || die("can't open $source_file");
while (