if true; then
rm -rf /tmp/refer-*
i=0
cat /var/log/thttpd.log /var/log/woolweb.log | perl -e '
while(<>) { ($ip, $user, $date, $request, $uri, $proto, $rc, $size, $refer, $ua, $rate, $domain) =
(/([0-9.]+?) - ([^ ]+) [[](.+?)[]] "([A-Z]+) (.+?) (HTTP[^"]+)" ([0-9]+) ([0-9]+) "(.*?)" "([^"]*)" "([^"]+)" "([^"]*)"/);
if ($ip) { print "$refer\n" } }
' | sort | uniq | grep -v 'swoolley[.]org\|positivism[.]org\|swoolley[.]homeip[.]net\|65[.]102[.]47.\|24[.]' |
grep -iv \
'baidu[.]com\|a9[.]com\|infoseek\|resultados[.]\|google\|answers[.]\|/search[?]\|query=\|/results\|/search\|/query\|search=\|qry=\|=seek\|[.]biz\|search[.]msn[.]\|SearchType=web\|pesquisa[.]sapo[.]pt\|[?]adsense' |
while read URL; do
# ls -l | grep -q -F -- "$URL" || {
((i++))
wget -T 5 -U 'Referral Validation Bot for swoolley.org' -O - -t 1 "$URL" > /tmp/refer-$i 2> /tmp/refer-$i-wgetstderr
{ grep -q '127[.]0[.]0[.]' /tmp/refer-$i-wgetstderr && ((i--)); } ||
ln -s "$URL" /tmp/refer-$i-link
# }
done
fi
cd /tmp
ls refer-* | grep -v 'link\|md5sum\|title\|wgetstderr$' |
while read line; do
[ ! -h "${line}-title" ] && {
TITLE="$(perl -e '$/=undef;$_=<>;s/.{0,4096}
(.{0,4096})<\/title>.*/$1/si;print' $line 2> /dev/null)"
TITLE="${TITLE:0:512}"
[ -z "$TITLE" ] && TITLE="Untitled"
ln -s "$TITLE" "${line}-title"
}
done
# perl -e '$find="seth.positivism.org";$/=undef;$_=<>;s/]*)"?.*?>(.*?)<\/a>/ [$1]($2) /gis;s/<.*?>//gs;s/[<>]//gs;s/&[a-z\#0-9]+;?/?/gs;s/[[:^print:]]//gs;s/[\s]+/ /gs;$line=72;$chars=(($line*3-length($find))/2);$lchars=int($chars);$rchars=int($chars+0.5);$_=(" " x $line).$_.(" " x $line);$res="";s/(.{$lchars})\Q$find\E(.{$rchars})/$res.="$1$find$2"/ge;$_=$res;s/(.{$line})/$1\n/g;print' /tmp/refer-290
if true; then
ls refer-* | grep -v 'link\|md5sum\|title\|wgetstderr$' |
while read line; do
[ ! -h "${line}-md5sum" ] && ln -s "$(md5sum $line 2> /dev/null| cut -d' ' -f1)" "${line}-md5sum"
done
cd /tmp
ls refer-* | grep -v 'link\|md5sum\|title\|wgetstderr$' |
while read line; do
perl -ne '
chomp;
$file = $_;
open(FILE, "<", "$file") or warn "$!";
$url = readlink("${file}-link") or warn "$!";
$md5 = readlink("${file}-md5sum") or warn "$!";
$tit = readlink("${file}-title"); # or warn "$!";
{ local $/ = undef; $data = ; }
close(FILE);
$data =~ m{href=(["'"'"']?)http://(?:.*swoolley[.]org|.*positivism[.]org|swoolley[.]homeip[.]net|24[.][0-9.]+|24[.].*|65[.]102[.]47[.].*)(/\S*)\1} and
$uri = $2 and
$data !~ m{) {
chomp;
($url, $uri, $md5, $tit) = (/^([^\t]*)\t([^\t]*)\t([^\t]*)\t(.*?)$/);
$hash{$url} = 1;
$hash{$url}{$uri} = 1;
$hashmd5{$url} = $md5;
$hashtit{$url} = $tit;
if (length($url) < length($md5{$md5}) or not defined($md5{$md5})) { $md5{$md5} = $url; }
}
$hash{""} = undef;
open(LOG, ") { ($ip, $user, $date, $request, $uri, $proto, $rc, $size, $refer, $ua, $rate, $domain) =
(/([0-9.]+?) - ([^ ]+) [[](.+?)[]] "([A-Z]+) (.+?) (HTTP[^"]+)" ([0-9]+) ([0-9]+) "(.*?)" "([^"]*)" "([^"]+)" "([^"]*)"/);
if (defined($hash{$refer}{$uri})) {
$originalrefer = $refer;
$refernonwww = $refer;
if (defined($hashmd5{$refernonwww})) {
if (length($refer) > length($refernonwww)) { $refer = $refernonwww; }
}
$refernonwww =~ s!/$!!;
if (defined($hashmd5{$refernonwww})) {
if (length($refer) > length($refernonwww)) { $refer = $refernonwww; }
}
$refernonwww =~ s!/index.[a-z0-9]+$!!;
if (defined($hashmd5{$refernonwww})) {
if (length($refer) > length($refernonwww)) { $refer = $refernonwww; }
}
$refernonwww = $originalrefer;
$refernonwww =~ s!http://www[0-9]*[.]!http://!;
if (defined($hashmd5{$refernonwww})) {
if (length($refer) > length($refernonwww)) { $refer = $refernonwww; }
}
$refernonwww =~ s!/$!!;
if (defined($hashmd5{$refernonwww})) {
if (length($refer) > length($refernonwww)) { $refer = $refernonwww; }
}
$refernonwww =~ s!/index.[a-z0-9]+$!!;
if (defined($hashmd5{$refernonwww})) {
if (length($refer) > length($refernonwww)) { $refer = $refernonwww; }
}
$printrefer = $md5{$hashmd5{$refer}};
$printrefer =~ s/&/\&/g;
print "$uri\t$printrefer\t$hashtit{$printrefer}\n";
}
}
close(LOG);
' | sort > /tmp/referindex.tab
cp /tmp/referindex.tab /var/www
fi
cat /tmp/referindex.tab | uniq -c | sort -rn | column -t > /var/www/referrers.txt
{ echo 'Referrers for swoolley.org
Referrers for swoolley.org
resource | count | referrer |
'
grep '^/' /var/www/referindex.tab | sort | uniq -c | sort -rn | perl -pe 's/^(.{7}) /$1\t/g;s/^(.*?)\t(.*?)\t/$2\t$1\t/' | sort -s -k1,1 |
sed -e 's/&/\&/g;s/"/\"/g;s/\</g;s![\t][\t]*!!g;s!http://[^<]*!\&!;s!^! | !;s!$! |
!' |
perl -pe 's!()(.*?)!$1 . (length($2)<60?$2:substr($2,0,60)."...") . ""!ge;'
#grep '^/' /var/www/referindex.tab | sort | uniq -c | sort -rn | perl -pe 's/^(.*?)\t(.*?)\t/$2\t$1\t/' | sort -s -k1,1 |
# sed -e 's/&/\&/g;s/"/\"/g;s/\</g;s![\t]*!!g;s!http://.*!\&!;s! | !!;s!$!
!' |
echo '
'; } > /var/www/referrers.html