Blog : How can I check if a URL exists via PHP?

How can I check if a URL exists via PHP?

$file = 'http://www.domain.com/somefile.jpg';
$file_headers = @get_headers($file);
if($file_headers[0] == 'HTTP/1.1 404 Not Found') {
  $exists = false;
}
else {
  $exists = true;
}
From here: http://www.php.net/manual/en/function.file-exists.php#75064
...and right below the above post, there's a curl solution:
function url_exists($url) {
  if (!$fp = curl_init($url)) return false;
  return true;
}
$headers = @get_headers($this->_value);
if(strpos($headers[0],'200')===false)return false;
so anytime you contact a website and get something else than 200 ok it will work
there are a few things to pay attention to:
•   is the url actually valid (a string, not empty, good syntax), this is quick to check server side
•   waiting for a response might take time and block code execution
•   not all headers returned by get_headers() are well formed
•   use curl if you can
•   prevent fetching the entire body/content, but only request the headers
•   consider redirecting urls:
•   do you want the first code returned?
•   or follow all redirects and return the last code?
•   you might end up with a 200, but it could redirect using javascript... we'll never know what happens after
Keep in mind that whatever method you use, it takes time to wait for a response. All code might (and probably will) halt untill you either know the result or the requests have timed out.
for example: the code below could take a LONG time to display the page if the urls are invalid or unreachable:
$urls = getUrls(); // some function getting say 10 or more external links

foreach($urls as $k=>$url){
  // this could potentially take 0-30 seconds each
  // (more or less depending on connection, target site, timeout settings...)
  if( ! isValidUrl($url) ){
  unset($urls[$k]);
  }
}

echo "yay all done! now show my site";
foreach($urls as $url){
  echo "{$url}
";
}

?>
the functions below could be helpfull, you probably want to modify them to suit your needs:
  function isValidUrl($url){
  // first do some quick sanity checks:
  if(!$url || !is_string($url)){
  return false;
  }
  // quick check url is roughly a valid http request: ( http://blah/... )
  if( ! preg_match('/^http(s)?:\/\/[a-z0-9-]+(.[a-z0-9-]+)*(:[0-9]+)?(\/.*)?$/i', $url) ){
  return false;
  }
  // the next bit could be slow:
  if(getHttpResponseCode_using_curl($url) != 200){
//  if(getHttpResponseCode_using_getheaders($url) != 200){  // use this one if you cant use curl
  return false;
  }
  // all good!
  return true;
  }

  function getHttpResponseCode_using_curl($url, $followredirects = true){
  // returns int responsecode, or false (if url does not exist or connection timeout occurs)
  // NOTE: could potentially take up to 0-30 seconds , blocking further code execution (more or less depending on connection, target site, and local timeout settings))
  // if $followredirects == false: return the FIRST known httpcode (ignore redirects)
  // if $followredirects == true : return the LAST  known httpcode (when redirected)
  if(! $url || ! is_string($url)){
  return false;
  }
  $ch = @curl_init($url);
  if($ch === false){
  return false;
  }
  @curl_setopt($ch, CURLOPT_HEADER  ,true);  // we want headers
  @curl_setopt($ch, CURLOPT_NOBODY  ,true);  // dont need body
  @curl_setopt($ch, CURLOPT_RETURNTRANSFER ,true);  // catch output (do NOT print!)
  if($followredirects){
  @curl_setopt($ch, CURLOPT_FOLLOWLOCATION ,true);
  @curl_setopt($ch, CURLOPT_MAXREDIRS  ,10);  // fairly random number, but could prevent unwanted endless redirects with followlocation=true
  }else{
  @curl_setopt($ch, CURLOPT_FOLLOWLOCATION ,false);
  }
//  @curl_setopt($ch, CURLOPT_CONNECTTIMEOUT ,5);  // fairly random number (seconds)... but could prevent waiting forever to get a result
//  @curl_setopt($ch, CURLOPT_TIMEOUT  ,6);  // fairly random number (seconds)... but could prevent waiting forever to get a result
//  @curl_setopt($ch, CURLOPT_USERAGENT  ,"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1");  // pretend we're a regular browser
  @curl_exec($ch);
  if(@curl_errno($ch)){  // should be 0
  @curl_close($ch);
  return false;
  }
  $code = @curl_getinfo($ch, CURLINFO_HTTP_CODE); // note: php.net documentation shows this returns a string, but really it returns an int
  @curl_close($ch);
  return $code;
  }

  function getHttpResponseCode_using_getheaders($url, $followredirects = true){
  // returns string responsecode, or false if no responsecode found in headers (or url does not exist)
  // NOTE: could potentially take up to 0-30 seconds , blocking further code execution (more or less depending on connection, target site, and local timeout settings))
  // if $followredirects == false: return the FIRST known httpcode (ignore redirects)
  // if $followredirects == true : return the LAST  known httpcode (when redirected)
  if(! $url || ! is_string($url)){
  return false;
  }
  $headers = @get_headers($url);
  if($headers && is_array($headers)){
  if($followredirects){
  // we want the the last errorcode, reverse array so we start at the end:
  $headers = array_reverse($headers);
  }
  foreach($headers as $hline){
  // search for things like "HTTP/1.1 200 OK" , "HTTP/1.0 200 OK" , "HTTP/1.1 301 PERMANENTLY MOVED" , "HTTP/1.1 400 Not Found" , etc.
  // note that the exact syntax/version/output differs, so there is some string magic involved here
  if(preg_match('/^HTTP\/\S+\s+([1-9][0-9][0-9])\s+.*/', $hline, $matches) ){// "HTTP/*** ### ***"
  $code = $matches[1];
  return $code;
  }
  }
  // no HTTP/xxx found in headers:
  return false;
  }
  // no headers :
  return false;
  }

you cannot use curl in certain servers u can use this code
$url = 'http://www.example.com';
$array = get_headers($url);
$string = $array[0];
if(strpos($string,"200"))
  {
  echo 'url exists';
  }
  else
  {
  echo 'url does not exist';
  }
?>
function URLIsValid($URL)
{
  $exists = true;
  $file_headers = @get_headers($URL);
  $InvalidHeaders = array('404', '403', '500');
  foreach($InvalidHeaders as $HeaderVal)
  {
  if(strstr($file_headers[0], $HeaderVal))
  {
  $exists = false;
  break;
  }
  }
  return $exists;
}
$url = 'http://google.com';
$not_url = 'stp://google.com';

if (@file_get_contents($url)): echo "Found '$url'!";
else: echo "Can't find '$url'.";
endif;
if (@file_get_contents($not_url)): echo "Found '$not_url!";
else: echo "Can't find '$not_url'.";
endif;

// Found 'http://google.com'!Can't find 'stp://google.com'.
pretty fast:
function http_response($url){
  $resURL = curl_init();
  curl_setopt($resURL, CURLOPT_URL, $url);
  curl_setopt($resURL, CURLOPT_BINARYTRANSFER, 1);
  curl_setopt($resURL, CURLOPT_HEADERFUNCTION, 'curlHeaderCallback');
  curl_setopt($resURL, CURLOPT_FAILONERROR, 1);
  curl_exec ($resURL);
  $intReturnCode = curl_getinfo($resURL, CURLINFO_HTTP_CODE);
  curl_close ($resURL);
  if ($intReturnCode != 200 && $intReturnCode != 302 && $intReturnCode != 304) { return 0; } else return 1;
}

echo 'google:';
echo http_response('http://www.google.com');
echo '/ ogogle:';
echo http_response('http://www.ogogle.com');
function urlIsOk($url)
{
  $headers = @get_headers($url);
  $httpStatus = intval(substr($headers[0], 9, 3));
  if ($httpStatus<400)
  {
  return true;
  }
  return false;
}
Try it, I'm using this, working very fine, my you also helpful, thanks.
$url = 'http://www.example.com';
$array = get_headers($url);
$string = $array[0];
if(strpos($string,"200")) {
  echo 'url exists';
} else {
  echo 'url does not exist';
}
the simple way is curl (and FASTER too)
$mylinks="http://site.com/page.html";
$handlerr = curl_init($mylinks);
curl_setopt($handlerr,  CURLOPT_RETURNTRANSFER, TRUE);
$resp = curl_exec($handlerr);
$ht = curl_getinfo($handlerr, CURLINFO_HTTP_CODE);


if ($ht == '404')
  { echo 'OK';}
else { echo 'NO';}

?>