Mailing List Archive

Sitemap.xml file and directory enumeration
The Sitemap Protocol allows you to inform search engines about URLs on your
websites that are available for crawling. In its simplest form, a Sitemap
that uses the Sitemap Protocol is an XML file that lists URLs for a site.

It has been discovered that many site owners are not building their
Sitemap.xml through spidering, but by scripted runs on their web root
directory structures. An attacker now able to access sitemap.xml is now
able to enumerate all files and directories in the webserver root.

Regards
Gareth

gareth@sensepost.com
www.sensepost.com



# Written by Gareth Phillips - SensePost PTY ltd
# www.sensepost.com
#

desc["english"] = "
Synopsis :

The remote web server contains a 'sitemap.xml' file.

Description :

The Sitemap Protocol allows you to inform search engines about URLs on your
websites that are available for crawling. In its simplest form, a Sitemap
that uses the Sitemap Protocol is an XML file that lists URLs for a site.

It has been discovered that many site owners are not building their
Sitemap.xml through spidering, but by scripted runs on their web root
directory structures. An attacker now able to access sitemap.xml is now
able to enumerate all files and directories in the webserver root.

Solution :

Site owners should be weary about automated generation on the sitemap.xml
file, admins should review the contents of there sitemap.xml file for
sensitive material.

Risk factor :

None";

if (description) {
script_id(123456);
script_version("$Revision: 1.00 $");

name["english"] = "Sitemap.xml File and Directory Enumeration";
script_name(english:name["english"]);

script_description(english:desc["english"]);

summary["english"] = "Checks for a web server's sitemap.xml";
script_summary(english:summary["english"]);

script_category(ACT_GATHER_INFO);
script_family(english:"CGI abuses");

script_copyright(english:"This script is Copyright...");

script_dependencie("http_version.nasl");
script_require_ports("Services/www", 80);

exit(0);
}


include("http_func.inc");
include("http_keepalive.inc");
include("global_settings.inc");


port = get_http_port(default:80);
if (!get_port_state(port)) exit(0);

dirs = get_kb_list(string("www/", port, "content/directories"));
if(isnull(dirs))dirs = make_list("/", "/sitemap", "/map"); # Just some
Defaults

dirs = make_list(dirs, cgi_dirs());

foreach d (dirs)
{
# Trying to retrieve the file.
req = http_get(item:string(d+"/sitemap.xml"), port:port);
res = http_keepalive_send_recv(port:port, data:req, bodyonly:TRUE);
if (res == NULL) exit(0);
else{
ddir += d + '\n';

}
if (res && "?xml version" >< res){
pat = "<loc>(.+)</loc>";
match = egrep(string:res, pattern:pat);
if (match){
foreach matches (split(match)){
matches = chomp(match);
}}
report = string(
desc["english"],
"\n\n",
"Plugin output : \n",
"sitemap.xml found under the following directory: \n",
ddir,
"\n",
match

);

security_note(port:port, data:report);
exit(0);
}
}