# This file is a part of Julia. # License is MIT: http://julialang.org/license # Implementation of "word-count" of a text # Takes a string. # Returns a Dict with the number of times each word (converted # to lowercase) appears in that string. function wordcount(text) words = split(text, r"[^[:alpha:]]", keep=false) counts = Dict{String, Int64}() for w = words v = lowercase(w) counts[v] = get(counts, v, 0) + 1 end return counts end # Takes a collection of Dicts in the format returned by wordcount() # Returns a Dict in which words that appear in multiple inputs # have their totals added together. function wcreduce(wcs) counts = Dict() for c in wcs, (k,v) in c counts[k] = get(counts, k, 0) + v end return counts end # Takes the list of input file names. # Combines the contents of all input files, then performs a wordcount # on the resulting string. function wordcount_files(inputs...) text = "" for file in inputs text *= readstring(file) end wordcount(text) end function wordcount_save(result_file, wc) total = sum(collect(values(wc))) open(result_file, "w") do f swc = sort(collect(wc), by = tuple -> last(tuple)) for i in length(swc):-1:1 @printf(f, "%20s %10d %14.4f\n", swc[i][1], swc[i][2], 100*swc[i][2]/total) end end end # Takes the array of names of result files in the format saved by wordcount_save() # Reads the data from disk, merges, and returns a Dict in the format returned by wordcount() function wordcount_mergedata(inputs::Array{String,1}) wc = Dict{String, Int64}() for file in inputs data = open(file) lines = readlines(data) for l in lines k, v, dummy = split(l) wc[k] = get(wc, k, 0) + parse(v) end end wc end if !isinteractive() if length(ARGS) > 0 && ARGS[1] == "count" result_file = ARGS[2] input = ARGS[3] wc = wordcount_files(input) wordcount_save(result_file, wc) elseif length(ARGS) > 0 && ARGS[1] == "merge" result_file = ARGS[2] wc = wordcount_mergedata(ARGS[3:length(ARGS)]) wordcount_save(result_file, wc) else println("Usage:") println(" julia ", basename(@__FILE__), " count datafile textfile") println("or") println(" julia ", basename(@__FILE__), " merge resultfile datafile1 datafile2 ...") exit() end end