#! perl # # wfrq.pl Daniel Brockman 20030828 Word Frequency Counter # Permission granted to copy with attribution to author. # # Usage: wfrq.pl < inputfile > outputfile # # wfrq reads the content of inputfile, counts the # occurrences of words and shows the most frequently # used words and the number of times that each occurred. # wfrq separates words on any character other than a # digit or a letter, ignoring all punctuation. wfrq # ignores very frequently occurring words including "a", # "and" and "the" and some others. Two words are considered # the same word if their first $nl letters are the same. # Only the first $nl characters are shown on the output # list. # # $nl is a parameter currently set to six. # # revised 070223 to use Wds.pm. BEGIN {@INC=(".",@INC)} ; # test use strict; use Wds; my ( %tally, # counts of occurrences by word %retally, # list of words by number of occurrences $i,$j,$k, # aux @list, # aux $word, # contains a word split out of %retally @numkeys, # number keys @lines, # array of lines read from file ); { no strict; # disable strict in this block so we can use the typeglob &wfil(*STDIN,\@lines); # append content of standard input to @lines } &wfee(\@lines,\%tally); # count the occurrences &wftl(\%retally,\%tally); # group words by frequency # reverse sort %retally #@numkeys = sort { $a <=> $b } keys %retally ; @numkeys = sort { $b <=> $a } keys %retally ; # most frequent first # write list of frequencies for $i (@numkeys) { for $word (@list=split(/ /,$retally{$i})) { print "$word $i\n"; } # for word } # for i numkeys #