Bonjour,
Je cherche depuis pas mal de temps un script php qui marcherait chez free pour creer un sitemap pour des pages dynamiques et htm.
J'ai finallement trouve ce script php http://xml-sitemap-generator.com/sitemap.zip
par contre quelque chose doit bugguer, il ne me prend pas tous mes directories et souvent plante.
si quelqu'un pouvait m'aider avec ce script ou un autre (j'en ai teste tellement sans succes)...
mes pages dynamiques sont un annuaire, et le livre d'or alex guestbook
merci pour votre aide
Je cherche depuis pas mal de temps un script php qui marcherait chez free pour creer un sitemap pour des pages dynamiques et htm.
J'ai finallement trouve ce script php http://xml-sitemap-generator.com/sitemap.zip
Code:
<?php
#To ignore #404 Page Not Found error, and do not include them in sitemap
error_reporting(0);
# The script is free of charge.
# Mandatory disclaimer: THIS SCRIPT CARRIES NO WARRANTY OR GUARRANTEE WHAT SO EVER. USE IS AT YOUR OWN RISK.
# The site url to crawl. Remember to include the slash ( / ) at the end. (EX: http://www.site.com/)
$siteurl = 'http://'.$_SERVER["HTTP_HOST"].'/';
# The frequency of updating. Valid settings are: always, hourly, daily, weekly, monthly, yearly, never
# The value "always" should be used to describe documents that change each time they are accessed. The value "never" should be used to describe archived URLs.
$frequency = "weekly";
# Priority of page in relation to other parts of your site. A number from 0.1 to 1.0 is acceptable.
$priority = "0.5";
# Include last modification date. Options are: true or false
# The date of last modification of the file. This date should be in W3C Datetime format. This format allows you to omit the time portion, if desired, and use YYYY-MM-DD.
$lastmodification = true;
# File extensions to include in sitemap.
$extensions = array("htm", "html", "php", "asp", "pdf");
# Try to index dynamic web pages that have a parameter in there url (?). Valid options
# are true or false. Use this at your own risk, could capture session info
# which possibly could cause problems during the Google index process.
$index_dynamic_pages_params = true;
# First do a check that allow_url_fopen is set to on
if(ini_get("allow_url_fopen") != 1)
die("The php.ini directive 'allow_url_fopen' must be set to 'On' for this script to function.\nPlease set this to 'On' in your php.ini file.\n");
# Make url compatible with Google sitemap Specifications
# As with all XML files, any data values (including URLs) must use entity escape codes for the characters listed in the table below.
# --------------------------------------
# | Character | Simbol | Escape Code |
# |---------------|--------|-------------|
# | Ampersand | & | & |
# |---------------|--------|-------------|
# | Single Quote | ' | ' |
# |---------------|--------|-------------|
# | Double Quote | " | " |
# |---------------|--------|-------------|
# | Greater Than | > | > |
# |---------------|--------|-------------|
# | Less Than | < | < |
# |---------------|--------|-------------|
function googlesitemap_compatible($url) {
$url = str_replace("&","&",$url);
$url = str_replace("'","'",$url);
$url = str_replace('"',""",$url);
$url = str_replace(">",">",$url);
$url = str_replace("<","<",$url);
return $url;
}
# Gets a URLs path minus the actual filename + query.
function getPath($url) {
if($GLOBALS['index_dynamic_pages_params'] == true) {
$url = explode("?", $url);
$url = $url[0];
}
$temp = explode("/", $url);
$fnsize=strlen($temp[(count($temp) - 1)]);
return substr($url, 0, strlen($url) - $fnsize);
}
# Cleans up a path so that extra / slashes are gone, .. are translated, etc
function cleanPath($url) {
$new = array();
$url = explode("/", trim($url));
foreach($url as $p) {
$p = trim($p);
if($p != "" && $p != ".") {
if($p == "..") {
if(is_array($new))
$new = array_pop($new);
} else {
$new = array_merge((array) $new, array($p));
}
}
}
$url = $new[0]."/";
for($i=1; $i < count($new); $i++)
$url .= "/".$new[$i];
return $url;
}
# Checks if URL has specified extension, if so returns true
function checkExt($url, $ext) {
# Strip out parameter info from a script (?)
if($GLOBALS['index_dynamic_pages_params'] == true) {
$url = explode("?", $url);
$url = $url[0];
}
$text=substr($url, strlen($url) - (strlen($ext) + 1), strlen($url));
if($text == ".".$ext)
return true;
else
return false;
}
# Retrieve Site URLs
function getUrls($url, $string) {
$type = "href";
# Regex to chop out urls
preg_match_all("|$type\=\"?'?`?([[:alnum:]:?=&@/._-]+)\"?'?`?|i", $string, $matches);
$ret[$type] = $matches[1];
# Make all URLS literal (full path)
for($i = 0; $i < count($ret['href']); $i++) {
if(! preg_match( '/^(http|https):\/\//i' , $ret['href'][$i]))
$ret['href'][$i] = getPath($url)."/".$ret['href'][$i];
$ret['href'][$i] = cleanPath($ret['href'][$i]);
}
return $ret;
}
function addUrls($urls) {
if(is_array($urls))
for($i=0; $i < count($urls['href']); $i++) {
$skip = 0;
# Cycle through to make sure url is unique
for($x=0; $x < count($GLOBALS['urls']); $x++)
if($GLOBALS['urls'][$x] == $urls['href'][$i]) {
$skip = 1;
break;
}
# Check extension
$extgood = 0;
foreach($GLOBALS['extensions'] as $ext)
if(checkExt($urls['href'][$i], $ext))
$extgood = 1;
# And finally make sure its in the current website
if(! stristr($urls['href'][$i], $GLOBALS['siteurl']))
$skip = 1;
if($skip == 0 && $extgood == 1)
$GLOBALS['urls'][] = $urls['href'][$i];
}
}
function getNextUrl($oldurl) {
if($oldurl == "")
return $GLOBALS['urls'][0];
for($i=0; $i < count($GLOBALS['urls']); $i++)
if($GLOBALS['urls'][$i] == $oldurl)
if(isset($GLOBALS['urls'][($i+1)]))
return $GLOBALS['urls'][($i+1)];
else
return false;
return false;
}
$urls = array($siteurl);
#start to generate inline sitemap
echo '<?xml version="1.0" encoding="UTF-8"?>'."\n".'<!--Google Site Map File Generated by http://xml-sitemap-generator.com/ '.date("D, d M Y G:i:s T").' -->'."\n".'<urlset xmlns="https://www.google.com/schemas/sitemap/0.84">'."\n";
# if possible the script will writte on website a copy of generated sitemap
# start to writte generated sitemap
# Make sure you have writte permission to file sitemap_temp.xml
if($fp = fopen("sitemap_temp.xml", "w")) {
$open_file = 'sucess';
fputs($fp, '<?xml version="1.0" encoding="UTF-8"?>'."\n".'<!--Google Site Map File Generated by http://xml-sitemap-generator.com/ '.date("D, d M Y G:i:s T").' -->'."\n".'<urlset xmlns="https://www.google.com/schemas/sitemap/0.84">'."\n");
}
# Log 404 Error Page Not Found
# Make sure you have writte access to file 404error.txt
if($fp_err = fopen("404error.txt", "w")) $open_file_404 = 'sucess';
$turl = "";
# Cycle through tree and build a url list
while($turl = getNextUrl($turl)) {
# Extend script time limit
set_time_limit(3000);
# Read html file into memory
if($html = file($turl)) {
$html = stripslashes(implode($html));
echo '<url>'."\n\t".'<loc>'.googlesitemap_compatible($turl).'</loc>'."\n";
if($lastmodification == true)
echo "\t".'<lastmod>'.date("Y-m-d").'</lastmod>'."\n";
echo "\t".'<changefreq>'.$frequency.'</changefreq>'."\n\t".'<priority>'.$priority.'</priority>'."\n".'</url>'."\n";
# Get site urls from html and add new unique url to list if needed
addUrls(getUrls($turl, $html));
#writte the same thing above on website if you have permission to writte
if($open_file == 'sucess') {
fputs($fp,'<url>'."\n\t".'<loc>'.googlesitemap_compatible($turl).'</loc>'."\n");
if($lastmodification == true)
fputs($fp,"\t".'<lastmod>'.date("Y-m-d").'</lastmod>'."\n");
fputs($fp,"\t".'<changefreq>'.$frequency.'</changefreq>'."\n\t".'<priority>'.$priority.'</priority>'."\n".'</url>'."\n");
}
} else {
# check if 404error.txt was sucsefuly opened
if($open_file_404 == 'sucess') fputs($fp_err, $turl."\n");
}
}
echo '</urlset>';
if($open_file == 'sucess') {
fputs($fp, '</urlset>');
fclose($fp);
# Make sure you have writte access to file sitemap_OK.xml
# To track evolution of your google sitemap you can replace sitemap_temp.xml with "sitemap_OK_".date(d-m-y).".xml"
copy('sitemap_temp.xml','sitemap_OK.xml');
if($open_file_404 == 'sucess') fclose($fp_err);
}
?>
par contre quelque chose doit bugguer, il ne me prend pas tous mes directories et souvent plante.
si quelqu'un pouvait m'aider avec ce script ou un autre (j'en ai teste tellement sans succes)...
mes pages dynamiques sont un annuaire, et le livre d'or alex guestbook
merci pour votre aide