script.sh
527 Bytes
# !/bin/sh
wget -c -r -l 1 http://www.daemonology.net/hn-daily/
ln -s -f www.daemonology.net/hn-daily
cat hn-daily/[0-9][0-9][0-9][0-9]-[0-9][0-9].html | tr [:upper:] [:lower:] | tr [:punct:] ' ' | tr ' ' '\n' | grep -v ^$ | sort > words
wget -c http://www.daemonology.net/hn-daily/2010.html
cat 2010.html | tr [:upper:] [:lower:] | tr [:punct:] ' ' | tr ' ' '\n' | grep -v ^$ | sort | uniq > words_default
fgrep -v -w -f words_default words > words_remaining
grep -v ^[0-9] words_remaining | uniq -c | sort -n -r > statistic