#!/bin/bash
# typo32html: A script to generate static html pages from a typo3 website
# originally created for ROCK Linux by Clifford Wolf
# adapted for general usage by Benjamin Schieder
# Copyright (C) 2005 Benjamin Schieder
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
if [ -z "$2" ] ; then
cat <<-EOF
Usage:
$0
Example:
$0 http://localhost/typo3 /home/www/localhost/html/
Note:
The content of the targetdir will be completely deleted!
EOF
exit 1
fi
baseurl=$1
target=$2
set -e
cd $target
date >> typo32html.log
rm -rf /tmp/typo32html.$$
mkdir /tmp/typo32html.$$
cd /tmp/typo32html.$$
echo -n > urlsubst.sed
echo init index.php?id=index > todo.txt
echo init index.php?id=error >> todo.txt
while [ -s todo.txt ]
do
echo -n > todo.new
while read y x
do
echo -n "Fetching [$y] $x ... "
[ "${x%/*}" != "$x" ] && mkdir -p "${x%/*}"
if [ -z "${x##*\?*}" ]; then
url="$baseurl/$x"
md5="${x##*id=}"
md5="${md5%%&*}.html"
[ "$x" != "index.php?id=${md5%.html}" ] && md5="$( echo "$x" | md5sum | cut -f1 -d' ' ).html"
echo -en "rewrite.\nFetching [$y] $md5 ... "
if [ -e "$md5" ]; then
echo "dup."
continue
fi
x="${x//[/\\[}"
x="${x//]/\\]}" # so that plugins using [] in urls work
echo "s,\"$x\",\"$md5\",g" >> urlsubst.sed # so that id=4 and id=44 don't get screwed up
x="${x//&/&}" # plugins using multiple parameters
echo curl -s -D http.txt -o "$md5" "$baseurl/$x"
curl -s -D http.txt -o "$md5" "$baseurl/$x"
x="$md5"
elif [ -e "$x" ]; then
echo "dup."
continue
else
echo curl -s -D http.txt -o "$x" "$baseurl/$x"
curl -s -D http.txt -o "$x" "$baseurl/$x"
fi
if grep -iq '^HTTP/[^ ]* 404' http.txt
then
rm -f "$x" 2> /dev/null
rmdir -p "${x%/*}" 2> /dev/null
echo "not found."
elif grep -iq '^location: ' http.txt
then
loc="$( grep -i '^location: ' http.txt | \
cut -f2- -d' ' | tr -d '\r' )"
cat > "$x" << EOT
Click here if you are not redirected automatically.
EOT
echo "redirected to $loc."
elif [ -s "$x" ]
then
if [ -z "${x%%*.html}" ]
then
subs=0
while read fn
do
[ -z "${fn##*://*}" ] && continue
[ -z "${fn##mailto:*}" ] && continue
if ! [ -e "$fn" ]; then
echo "$x $fn" >> todo.new
(( subs++ ))
fi
done < <(
tr ' \t<>' '\n\n\n\n' < $x | \
egrep -i '^(src|href|lowsrc|background)=' | tr -d '"' | \
cut -f2- -d= | cut -f1 -d'#' | sort -u | \
egrep -v '^[^\?]*[\./]\.' | grep '^[^\./&]' | \
egrep -v 'javascript:'
)
echo "exported (found $subs new items)."
else
echo "exported."
fi
fi
done < todo.txt
sort -u < todo.new > todo.txt
done | tee -a ../typo32html.log
cat >> urlsubst.sed << EOT
s,openPic('showpic.php,openPic('$baseurl/showpic.php,g
EOT
for x in *.html; do
sed -f urlsubst.sed < $x > $x.new
mv $x.new $x
done
cat > .htaccess << EOT
ErrorDocument 404 /error.html
EOT
echo "" > error.new
cat error.html >> error.new
mv error.new error.html
rm -f todo.txt todo.new http.txt urlsubst.sed
rm -rf $target/* $target/.[^.]*
cd ..
mv /tmp/typo32html.$$/* /tmp/typo32html.$$/.[^.]* $target
rmdir /tmp/typo32html.$$
echo "Done fetching $baseurl to $target"