if true; then rm -rf /tmp/refer-* i=0 cat /var/log/thttpd.log /var/log/woolweb.log | perl -e ' while(<>) { ($ip, $user, $date, $request, $uri, $proto, $rc, $size, $refer, $ua, $rate, $domain) = (/([0-9.]+?) - ([^ ]+) [[](.+?)[]] "([A-Z]+) (.+?) (HTTP[^"]+)" ([0-9]+) ([0-9]+) "(.*?)" "([^"]*)" "([^"]+)" "([^"]*)"/); if ($ip) { print "$refer\n" } } ' | sort | uniq | grep -v 'swoolley[.]org\|positivism[.]org\|swoolley[.]homeip[.]net\|65[.]102[.]47.\|24[.]' | grep -iv \ 'baidu[.]com\|a9[.]com\|infoseek\|resultados[.]\|google\|answers[.]\|/search[?]\|query=\|/results\|/search\|/query\|search=\|qry=\|=seek\|[.]biz\|search[.]msn[.]\|SearchType=web\|pesquisa[.]sapo[.]pt\|[?]adsense' | while read URL; do # ls -l | grep -q -F -- "$URL" || { ((i++)) wget -T 5 -U 'Referral Validation Bot for swoolley.org' -O - -t 1 "$URL" > /tmp/refer-$i 2> /tmp/refer-$i-wgetstderr { grep -q '127[.]0[.]0[.]' /tmp/refer-$i-wgetstderr && ((i--)); } || ln -s "$URL" /tmp/refer-$i-link # } done fi cd /tmp ls refer-* | grep -v 'link\|md5sum\|title\|wgetstderr$' | while read line; do [ ! -h "${line}-title" ] && { TITLE="$(perl -e '$/=undef;$_=<>;s/.{0,4096}(.{0,4096})<\/title>.*/$1/si;print' $line 2> /dev/null)" TITLE="${TITLE:0:512}" [ -z "$TITLE" ] && TITLE="Untitled" ln -s "$TITLE" "${line}-title" } done # perl -e '$find="seth.positivism.org";$/=undef;$_=<>;s/]*)"?.*?>(.*?)<\/a>/ [$1]($2) /gis;s/<.*?>//gs;s/[<>]//gs;s/&[a-z\#0-9]+;?/?/gs;s/[[:^print:]]//gs;s/[\s]+/ /gs;$line=72;$chars=(($line*3-length($find))/2);$lchars=int($chars);$rchars=int($chars+0.5);$_=(" " x $line).$_.(" " x $line);$res="";s/(.{$lchars})\Q$find\E(.{$rchars})/$res.="$1$find$2"/ge;$_=$res;s/(.{$line})/$1\n/g;print' /tmp/refer-290 if true; then ls refer-* | grep -v 'link\|md5sum\|title\|wgetstderr$' | while read line; do [ ! -h "${line}-md5sum" ] && ln -s "$(md5sum $line 2> /dev/null| cut -d' ' -f1)" "${line}-md5sum" done cd /tmp ls refer-* | grep -v 'link\|md5sum\|title\|wgetstderr$' | while read line; do perl -ne ' chomp; $file = $_; open(FILE, "<", "$file") or warn "$!"; $url = readlink("${file}-link") or warn "$!"; $md5 = readlink("${file}-md5sum") or warn "$!"; $tit = readlink("${file}-title"); # or warn "$!"; { local $/ = undef; $data = ; } close(FILE); $data =~ m{href=(["'"'"']?)http://(?:.*swoolley[.]org|.*positivism[.]org|swoolley[.]homeip[.]net|24[.][0-9.]+|24[.].*|65[.]102[.]47[.].*)(/\S*)\1} and $uri = $2 and $data !~ m{) { chomp; ($url, $uri, $md5, $tit) = (/^([^\t]*)\t([^\t]*)\t([^\t]*)\t(.*?)$/); $hash{$url} = 1; $hash{$url}{$uri} = 1; $hashmd5{$url} = $md5; $hashtit{$url} = $tit; if (length($url) < length($md5{$md5}) or not defined($md5{$md5})) { $md5{$md5} = $url; } } $hash{""} = undef; open(LOG, ") { ($ip, $user, $date, $request, $uri, $proto, $rc, $size, $refer, $ua, $rate, $domain) = (/([0-9.]+?) - ([^ ]+) [[](.+?)[]] "([A-Z]+) (.+?) (HTTP[^"]+)" ([0-9]+) ([0-9]+) "(.*?)" "([^"]*)" "([^"]+)" "([^"]*)"/); if (defined($hash{$refer}{$uri})) { $originalrefer = $refer; $refernonwww = $refer; if (defined($hashmd5{$refernonwww})) { if (length($refer) > length($refernonwww)) { $refer = $refernonwww; } } $refernonwww =~ s!/$!!; if (defined($hashmd5{$refernonwww})) { if (length($refer) > length($refernonwww)) { $refer = $refernonwww; } } $refernonwww =~ s!/index.[a-z0-9]+$!!; if (defined($hashmd5{$refernonwww})) { if (length($refer) > length($refernonwww)) { $refer = $refernonwww; } } $refernonwww = $originalrefer; $refernonwww =~ s!http://www[0-9]*[.]!http://!; if (defined($hashmd5{$refernonwww})) { if (length($refer) > length($refernonwww)) { $refer = $refernonwww; } } $refernonwww =~ s!/$!!; if (defined($hashmd5{$refernonwww})) { if (length($refer) > length($refernonwww)) { $refer = $refernonwww; } } $refernonwww =~ s!/index.[a-z0-9]+$!!; if (defined($hashmd5{$refernonwww})) { if (length($refer) > length($refernonwww)) { $refer = $refernonwww; } } $printrefer = $md5{$hashmd5{$refer}}; $printrefer =~ s/&/\&/g; print "$uri\t$printrefer\t$hashtit{$printrefer}\n"; } } close(LOG); ' | sort > /tmp/referindex.tab cp /tmp/referindex.tab /var/www fi cat /tmp/referindex.tab | uniq -c | sort -rn | column -t > /var/www/referrers.txt { echo 'Referrers for swoolley.org

Referrers for swoolley.org

' grep '^/' /var/www/referindex.tab | sort | uniq -c | sort -rn | perl -pe 's/^(.{7}) /$1\t/g;s/^(.*?)\t(.*?)\t/$2\t$1\t/' | sort -s -k1,1 | sed -e 's/&/\&/g;s/"/\"/g;s/!' | perl -pe 's!()(.*?)!$1 . (length($2)<60?$2:substr($2,0,60)."...") . ""!ge;' #grep '^/' /var/www/referindex.tab | sort | uniq -c | sort -rn | perl -pe 's/^(.*?)\t(.*?)\t/$2\t$1\t/' | sort -s -k1,1 | # sed -e 's/&/\&/g;s/"/\"/g;s/!!;s!$!!' | echo '
resourcecountreferrer
!g;s!http://[^<]*!\&!;s!^!
!;s!$!
!g;s!http://.*!\&!;s!
'; } > /var/www/referrers.html