Source for file _functions_hitlogs.php
Documentation is available at _functions_hitlogs.php
* Logging of hits, extraction of stats
* b2evolution - {@link http://b2evolution.net/}
* Released under GNU GPL License - {@link http://b2evolution.net/about/license.html}
* @copyright (c)2003-2005 by Francois PLANQUE - {@link http://fplanque.net/}
* @author This file built upon code by N C Young (nathan@ncyoung.com) (http://ncyoung.com/entry/57)
if( !defined('DB_USER') ) die( 'Please, do not access this page directly.' );
//get most linked to pages on site
//select count(visitURL) as count, visitURL from b2hitlog group by visitURL order by count desc
$ar = refererList($refererList,"global");
print join("<br />",$ar);
print join("<br />",topRefererList($topRefererList,"global"));
* Log a hit on a blog page / rss feed
global $DB, $localtimenow, $blog, $tablehitlog, $blackList, $search_engines, $user_agents;
global $doubleCheckReferers, $comments_allowed_uri_scheme, $HTTP_REFERER, $page, $ReqURI, $ReqPath;
# TODO: check for already logged?
$fullCurrentURL =
$_SERVER['SERVER_NAME'].
$ReqURI;
// debug_log( 'Hit Log: '. "full current url: ".$fullCurrentURL);
// debug_log( 'Hit Log: '. "referer: ".$ref);
$RemoteAddr =
$_SERVER['REMOTE_ADDR'];
// debug_log( 'Hit Log: '. "Remote Addr: ".$RemoteAddr);
//$RemoteHost = $_SERVER['REMOTE_HOST'];
//debug_log( 'Hit Log: '. "Remote Host: ".$RemoteHost);
$UserAgent = isset
($_SERVER['HTTP_USER_AGENT']) ?
$_SERVER['HTTP_USER_AGENT'] :
'';
// debug_log( 'Hit Log: '. "User Agent: ".$UserAgent);
{ //then they have tried something funny,
//putting HTML or PHP into the HTTP_REFERER
// debug_log( 'Hit Log: '."Languages: ".$_SERVER['HTTP_ACCEPT_LANGUAGE']);
$ignore =
'no'; // So far so good
{ //then they have tried something funny,
//putting HTML or PHP into the HTTP_REFERER
elseif( $error =
validate_url( $ref, $comments_allowed_uri_scheme ) )
{ //if they are trying to inject javascript or a blocked (spam) URL
foreach( $blackList as $site )
if( strpos( $ref, $site ) !==
false )
// $ignore = 'blacklist';
debug_log( 'Hit Log: '.
T_('referer ignored').
' ('.
T_('BlackList').
')');
// don't mess up the XML!! debug_log( 'Hit Log: referer ignored (RSS));
foreach ($user_agents as $user_agent)
if( ($user_agent[0] ==
'robot') &&
(strstr($UserAgent, $user_agent[1])) )
debug_log( 'Hit Log: '.
T_('referer ignored').
' ('.
T_('robot').
')');
{ // minimum http://az.fr/ , this will be considered direct access (although it could be https:)
debug_log( 'Hit Log: '.
T_('referer ignored').
' ('.
T_('invalid').
')' );
{ // identify search engines
foreach($search_engines as $engine)
// debug_log( 'Hit Log: '."engine: ".$engine);
debug_log( 'Hit Log: '.
T_('referer ignored').
" (".
T_('search engine').
")");
if( $doubleCheckReferers )
debug_log( 'Hit Log: '.
T_('loading referering page') );
// this is so that the page up until the call to
// logReferer will get shown before it tries to check
// back against the refering URL.
$fp =
@fopen ($ref, 'r');
//timeout after 5 seconds
if (strstr($page,$fullCurrentURL))
debug_log( 'Hit Log: '.
T_('found current url in page') );
// Direct accesses are always good hits
{ // This was probably spam!
debug_log( 'Hit Log: '.
sprintf('did not find %s in %s', $fullCurrentURL, $page ) );
$sql =
"INSERT INTO $tablehitlog( visitTime, visitURL, hit_ignore, referingURL, baseDomain,
hit_blog_ID, hit_remote_addr, hit_user_agent )
VALUES( FROM_UNIXTIME(".
$localtimenow.
"), '".
$DB->escape($ReqURI).
"', '$ignore',
'".
$DB->escape($ref).
"', '".
$DB->escape($baseDomain).
"', $blog,
'".
$DB->escape($RemoteAddr).
"', '".
$DB->escape($UserAgent).
"')";
* Auto pruning of old stats
if( isset
($stats_autoprune) &&
($stats_autoprune >
0) )
{ // Autopruning is requested
$sql =
"DELETE FROM T_hitlog
WHERE visitTime < '".
date( 'Y-m-d', $localtimenow -
($stats_autoprune *
86400) ).
"'";
$rows_affected =
$DB->query( $sql );
debug_log( 'Hit Log: autopruned '.
$rows_affected.
' rows.' );
* {@internal hit_delete(-) }}
* @param int ID to delete
global $DB, $tablehitlog;
$sql =
"DELETE FROM $tablehitlog WHERE visitID = $hit_ID";
* Delete all hits from a certain date
* {@internal hit_prune(-) }}
* @param int unix timestamp to delete hits for
global $DB, $tablehitlog;
$iso_date =
date ('Y-m-d', $date);
$sql =
"DELETE FROM $tablehitlog WHERE DATE_FORMAT(visitTime,'%Y-%m-%d') = '$iso_date'";
* {@internal hit_change_type(-) }}
* @param int ID to change
* @param string new type, must be valid ENUM for hit_ignore field
global $DB, $tablehitlog;
$sql =
"UPDATE $tablehitlog ".
"SET hit_ignore = '$type', ".
" visitTime = visitTime ".
// prevent mySQL from updating timestamp
"WHERE visitID = $hit_ID";
* {@internal refererList(-) }}
$type =
"'no'", // 'no' normal refer, 'invalid', 'badchar', 'blacklist', 'rss', 'robot', 'search'
$groupby =
'', // baseDomain
$get_total_hits =
false, // Get total number of hits (needed for percentages)
$get_user_agent =
false ) // Get the user agent
global $DB, $tablehitlog, $res_stats, $stats_total_hits, $ReqURI;
autoquote( $type ); // In case quotes are missing
//if no visitURL, will show links to current page.
//if url given, will show links to that page.
//if url="global" will show links to all pages
$sql =
"SELECT visitID, UNIX_TIMESTAMP(visitTime) AS visitTime, referingURL, baseDomain";
$sql =
"SELECT COUNT(*) AS totalHits, referingURL, baseDomain";
$sql .=
", hit_user_agent";
$sql_from_where =
" FROM $tablehitlog WHERE hit_ignore IN ($type)";
$sql_from_where .=
" AND hit_blog_ID = '$blog_ID'";
if ($visitURL !=
"global")
$sql_from_where .=
" AND visitURL = '$visitURL'";
$sql .=
" ORDER BY visitID DESC";
$sql .=
" GROUP BY $groupby ORDER BY totalHits DESC";
$sql .=
" LIMIT $howMany";
$res_stats =
$DB->get_results( $sql, ARRAY_A );
{ // we need to get total hits
$sql =
"SELECT COUNT(*) ".
$sql_from_where;
$stats_total_hits =
$DB->get_var( $sql );
{ // we're not getting total hits
$stats_total_hits =
1; // just in case some tries a percentage anyway (avoid div by 0)
echo
$row_stats['visitID'];
* stats_hit_remote_addr(-)
echo
$row_stats['hit_remote_addr'];
echo
date_i18n( $format, $row_stats['visitTime'] );
* stats_total_hit_count(-)
global $stats_total_hits;
echo
$row_stats['totalHits'];
global $row_stats, $stats_total_hits;
$percent =
$row_stats['totalHits'] *
100 /
$stats_total_hits;
echo
number_format( $percent, $decimals, $dec_point, '' ).
' %';
echo
$row_stats['hit_blog_ID'];
function stats_referer( $before=
'', $after=
'', $disp_ref =
true )
$ref =
trim($row_stats['referingURL']);
return $row_stats['baseDomain'];
* stats_search_keywords(-)
* Displays keywords used for search leading to this page
$ref =
$row_stats['referingURL'];
if( ($pos_question =
strpos( $ref, '?' )) ==
false )
echo
'[', T_('not a query - no params!'), ']';
foreach( $ref_params as $ref_param )
$param_parts =
explode( '=', $ref_param );
if( $param_parts[0] ==
'q' or $param_parts[0] ==
'query' or $param_parts[0] ==
'p' or $param_parts[0] ==
'kw' or $param_parts[0] ==
'qs' )
{ // found "q" query parameter
if( strpos( $q, 'Ã' ) !==
false )
{ // Probability that the string is UTF-8 encoded is very high, that'll do for now...
//echo "[UTF-8 decoding]";
foreach( $qwords as $qw )
if( strlen( $qw ) >
30 ) $qw =
substr( $qw, 0, 30 ).
"..."; // word too long, crop it
echo
'[', T_('no query string found'), ']';
global $row_stats, $user_agents;
$UserAgent =
$row_stats[ 'hit_user_agent' ];
foreach ($user_agents as $curr_user_agent)
if (stristr($UserAgent, $curr_user_agent[1]))
$UserAgent =
$curr_user_agent[2];
* Display "Statistics" title if these have been requested
* @deprecated Stats should not be made public (SPAM!!!)
* @param string Prefix to be displayed if something is going to be displayed
* @param mixed Output format, see {@link format_to_output()} or false to
* return value instead of displaying it
function stats_title( $prefix =
' ', $display =
'htmlbody' )
/* select count(*) as nb, hit_ignore
where `hit_ignore` LIKE 'invalid' AND `hit_user_agent` LIKE 'FAST-WebCrawler/%'