<?php
	/*
	 * Useful functions for doing operations on text etc
	 * Version 0.0.07 2005-08-22
	 */

	function toUTF8($string)
	{
		if (isUTF8($string))
		{
			/* Already UTF8 */
			return $string;
		}
		return utf8_encode($string);
	}

	function toDate($string)
	{
		return date("Y-m-d H:i:s", $string);
	}


/*** Helper functions ***/
	/*
	 * Validate Unicode UTF-8 Version 4
	 * This function takes as reference the table 3.6 found at http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf
	 * It also flags overlong bytes as error
	 */
	function isUTF8($str)
	{
		// values of -1 represent disalloweded values for the first bytes in current UTF-8
		static $trailing_bytes = array (
			0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
			0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
			0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
			0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
			-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
			-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
			-1,-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
			2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
		);

		$ups = unpack('C*', $str);
		if (!($aCnt = count($ups))) return true; // Empty string *is* valid UTF-8
		for ($i = 1; $i <= $aCnt;)
		{
			if (!($tbytes = $trailing_bytes[($b1 = $ups[$i++])])) continue;
			if ($tbytes == -1) return false;

			$first = true;
			while ($tbytes > 0 && $i <= $aCnt)
			{
				$cbyte = $ups[$i++];
				if (($cbyte & 0xC0) != 0x80) return false;

				if ($first)
				{
					switch ($b1)
					{
						case 0xE0:
							if ($cbyte < 0xA0) return false;
							break;
						case 0xED:
							if ($cbyte > 0x9F) return false;
							break;
						case 0xF0:
							if ($cbyte < 0x90) return false;
							break;
						case 0xF4:
							if ($cbyte > 0x8F) return false;
							break;
						default:
							break;
					}
					$first = false;
				}
				$tbytes--;
			}
			if ($tbytes) return false; // incomplete sequence at EOS
		}
		return true;
	}


	/*
	 * Get the current time in seconds.microseconds
	 * lastmodified 2003-09-28
	 */
	function getmicrotime()
	{
		list($usec, $sec) = explode(" ",microtime());
		return ((float)$usec + (float)$sec);
	}


	function getURIs($text)
	{
		$urls = '(http|file|ftp|https)';
		$ltrs = '\w';
		$gunk = '/#~:.?+=&%@!\-';
		$punc = '.:?\-';
		$any = "$ltrs$gunk$punc";
		preg_match_all("{
					\b
					$urls   :
					[$any] +?


					(?=
						[$punc] *
						[^$any]
					|
						$
					)
				}x", $text, $matches);
		//printf("Output of URLs %d URLs<P>\n", sizeof($matches[0]));
		//foreach ($matches[0] as $u) {
		//$link = $PHP_SELF . '?url=' . urlencode($u);
		//echo "<A HREF='$link'>$u</A><BR>\n";
		return $matches[0];
	}

	function getURIs_new($html, $basehref)
	{
		$uris = array();
		$uricounter = 0;

		//$html_lines = file($url);
		/*$html = "";
		for ($i = 0; $i < count($text); $i++)
		{
			$html .= $text[$i];
		}
		*/
		preg_match_all("|href=([^>]+)|i",$html,$matches);
		for ($i = 0; $i < count($matches[0]); $i++)
		{
			$parts = explode("\"", $matches[1][$i]);
			if ($parts[0] == "")
			{
				$uris[$uricounter] = $parts[1] . "\n";
				$uricounter++;
			} else
			{
				if ($matches[1][$i][0] == "'")
				{
					$parts = explode("'", $matches[1][$i]);
					$uris[$uricounter] = $parts[1];
					$uricounter++;
				} else
				{
					$uris[$uricounter] = $matches[1][$i] . "\n";
					$uricounter++;
				}
			}
		}

		if ($basehref != null)
		{
			$uripieces = parse_url($basehref);
			$hostname = $uripieces["host"];
			for ($i = 0; $i < count($uris); $i++)
			{
				/* Translage '/uri' to 'http://hostname/uri' and '#uri' to '$text . $uri' */
				if ($uris[$i][0] == "/")
				{
					$uris[$i] = $hostname . $uris[$i];
				} else if (substr($uris[$i], 0, 4) == "http" || substr($uris[$i], 0, 4) == "mail" || substr($uris[$i], 0, 4) == "ftp:")
				{
					//nothing
				} else
				{
					$uris[$i] = $basehref . $uris[$i];
				}
			}
		}
		return $uris;
	}

	function getFilteredURIs($text)
	{
		getFilteredURIsWithBase($text, null);
	}

	function getFilteredURIsWithBase($text, $basehref)
	{
		$uris = array_unique(getURIs_new($text, $basehref));

		$filter = array(
				0 => "http://www.google.com/ads_by_google.html",
				1 => "imageads.",
				2 => "pagead",
				3 => "http://www.w3.org/1999/xhtml",
				4 => "http://www.securityfocus.com/sponsor/"
			);
				//4 => "http://www.securityfocus.com/sponsor/SPIDynamics_secpapers_050404"

		//$uris = array_flip($uris);
/*
		for ($i = 0; $i < count($filter); $i++)
		{
		
			if (isset($uris[$filter[$i]]))
			{
				unset($uris[$filter[$i]]);
			}
		}
*/

		$found = false;
		$nrOfURIs = count($uris);
		for ($i = 0; $i < $nrOfURIs; $i++)
		{
			$found = false;

			if ($uris[$i] == "" || $uris[$i] == "http://" || $uris[$i] == "http:" || $uris[$i] == "http" || $uris[$i] == "https" || $uris[$i] == "https:" || $uris[$i] == "https://")
			{
				unset($uris[$i]);
				$found = true;
			}
			
			$j = 0;
			//for ($j = 0; $j < count($filter); $j++)
			while ($j < count($filter) && $found == false)
			{
				//if (stristr($uris[$i], $filter[$j]) != "")
				if (strpos($uris[$i], $filter[$j]) !== false)
				{
					unset($uris[$i]);
					$found = true;
				}
				$j++;
			}
		}

		//$uris = array_flip($uris);
		//print_r($uris);
		return array_merge($uris);
	}

	/*
	 * Better integer-checker
	 */
	function myIsInt($x)
	{
		return (is_numeric($x) ? intval($x) == $x : false);
	}



	function getRequestParam($paramname, $default)
	{
		if (isset($_REQUEST[$paramname]))
		{
			if (myIsInt($default))
			{
				return intval($_REQUEST[$paramname]);
			} else
			{
				return $_REQUEST[$paramname];
			}
		} else
		{
			return $default;
		}
	}


	function isValidURI($uri)
	{
		if( preg_match( '/^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}'
				.'((:[0-9]{1,5})?\/.*)?$/i' ,$uri))
		{
			return true;
		}
		else
		{
			return false;
		}

		/*
		 * Beware - it only validates scemes: http and https, and it only takes into account host and port part of the uri. It does not accept username and password.
		 * For an email validator you could look at http://gaarsmand.com/index.php/IT_l%F8sninger/Kode_eksempler/PHP_kode.
		 */
	}


?>
