#!/usr/bin/perl
# sitemap.pl - very simple sitemap gernerator which publishes *only* the files listed in the $whilelist file in a sitemap file

# I keep sitemap.pl in my webservers cgi-bin directory
# I have the following in my robots.txt file - sitemap: http://www.example.com/sitemap.xml
# my sitemap.xml which points search engines to sitemap.pl is included in this package


# edit the following three lines to suit your site
my $website="http://www.example.com"; # public view of your website 
my $sitepath="/path/to/my/website"; # where the website is stored on the server
my $whitelist="sitelist.txt"; # file with the list of pages you want published as a sitemap

chdir($sitepath);

open(INFILE, "$sitepath/$whitelist") or die "cannot open $!\n";


print <<EOF;
Content-type: text/xml; charset=UTF-8

<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
         xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
EOF
while (<INFILE>) {
 chomp;
$rfile="$sitepath/$_";
($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks)=stat
$rfile;
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime($mtime);
$year +=1900;
$mon++;
$mod=sprintf("%0.4d-%0.2d-%0.2dT%0.2d:%0.2d:%0.2d+00:00",$year,$mon,$mday,$hour,$min,$sec);

print <<EOF;
<url>
      <loc>$website/$_</loc>
      <lastmod>$mod</lastmod>
</url>
EOF
}

print <<EOF;
</urlset>
EOF

