[https://en.wikipedia.org/wiki/Brian_Kernighan Brian Kernighan], in a [https://www.youtube.com/watch?v=Sg4U4r_AgJU lecture] at the University of Nottingham, described a [https://youtu.be/Sg4U4r_AgJU?t=50s problem] on which this task is based.
;Problem:
You are given a a data file of thousands of lines; each of three whitespace
separated fields: a date, a one word name and the magnitude of the event.
Example lines from the file would be lines like:
8/27/1883 Krakatoa 8.8
5/18/1980 MountStHelens 7.6
3/13/2009 CostaRica 5.1
Task
- Create a program or script invocation to find all the events with magnitude greater than 6
- Assuming an appropriate name e.g. "data.txt" for the file: :# Either: Show how your program is invoked to process a data file of that name. :# Or: Incorporate the file name into the program, (as it is assumed that the program is single use).
ALGOL 68
IF FILE input file;
STRING file name = "data.txt";
open( input file, file name, stand in channel ) /= 0
THEN
# failed to open the file #
print( ( "Unable to open """ + file name + """", newline ) )
ELSE
# file opened OK #
BOOL at eof := FALSE;
# set the EOF handler for the file #
on logical file end( input file, ( REF FILE f )BOOL:
BEGIN
# note that we reached EOF on the latest read #
at eof := TRUE;
# return TRUE so processing can continue #
TRUE
END
);
# return the real value of the specified field on the line #
PROC real field = ( STRING line, INT field )REAL:
BEGIN
REAL result := 0;
INT c pos := LWB line;
INT max pos := UPB line;
STRING f := "";
FOR f ield number TO field WHILE c pos <= max pos DO
# skip leading spaces #
WHILE IF c pos > max pos THEN FALSE ELSE line[ c pos ] = " " FI DO
c pos +:= 1
OD;
IF c pos <= max pos THEN
# have a field #
INT start pos = c pos;
WHILE IF c pos > max pos THEN FALSE ELSE line[ c pos ] /= " " FI DO
c pos +:= 1
OD;
IF field number = field THEN
# have the required field #
f := line[ start pos : c pos - 1 ]
FI
FI
OD;
IF f /= "" THEN
# have the field - assume it a real value and convert it #
FILE real value;
associate( real value, f );
on value error( real value
, ( REF FILE f )BOOL:
BEGIN
# "handle" invalid data #
result := 0;
# return TRUE so processing can continue #
TRUE
END
);
get( real value, ( result ) )
FI;
result
END # real field # ;
# show the lines where the third field is > 6 #
WHILE NOT at eof
DO
STRING line;
get( input file, ( line, newline ) );
IF real field( line, 3 ) > 6 THEN
print( ( line, newline ) )
FI
OD;
# close the file #
close( input file )
FI
AWK
awk '$3 > 6' data.txt
C++
// Randizo was here!
#include <iostream>
#include <fstream>
#include <string>
using namespace std;
int main()
{
ifstream file("../include/earthquake.txt");
int count_quake = 0;
int column = 1;
string value;
double size_quake;
string row = "";
while(file >> value)
{
if(column == 3)
{
size_quake = stod(value);
if(size_quake>6.0)
{
count_quake++;
row += value + "\t";
cout << row << endl;
}
column = 1;
row = "";
}
else
{
column++;
row+=value + "\t";
}
}
cout << "\nNumber of quakes greater than 6 is " << count_quake << endl;
return 0;
}
New version:
// Jolkdarr was also here!
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
int main() {
using namespace std;
ifstream file("data.txt");
int count_quake = 0;
string s1, s2;
double rate;
while (!file.eof()) {
file >> s1 >> s2 >> rate;
if (rate > 6.0) {
cout << s1 << setw(20) << s2 << " " << rate << endl;
count_quake++;
}
}
cout << endl << "Number of quakes greater than 6 is " << count_quake << endl;
return 0;
}
C
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main() {
FILE *fp;
char *line = NULL;
size_t len = 0;
ssize_t read;
char *lw, *lt;
fp = fopen("data.txt", "r");
if (fp == NULL) {
printf("Unable to open file\n");
exit(1);
}
printf("Those earthquakes with a magnitude > 6.0 are:\n\n");
while ((read = getline(&line, &len, fp)) != EOF) {
if (read < 2) continue; /* ignore blank lines */
lw = strrchr(line, ' '); /* look for last space */
lt = strrchr(line, '\t'); /* look for last tab */
if (!lw && !lt) continue; /* ignore lines with no whitespace */
if (lt > lw) lw = lt; /* lw points to last space or tab */
if (atof(lw + 1) > 6.0) printf("%s", line);
}
fclose(fp);
if (line) free(line);
return 0;
}
Using the given file:
Those earthquakes with a magnitude > 6.0 are:
8/27/1883 Krakatoa 8.8
5/18/1980 MountStHelens 7.6
C#
using System;
using System.IO;
using System.Linq;
using System.Collections.Generic;
public class Program
{
static void Main() {
foreach (var earthquake in LargeEarthquakes("data.txt", 6))
Console.WriteLine(string.Join(" ", earthquake));
}
static IEnumerable<string[]> LargeEarthquakes(string filename, double limit) =>
from line in File.ReadLines(filename)
let parts = line.Split(default(char[]), StringSplitOptions.RemoveEmptyEntries)
where double.Parse(parts[2]) > limit
select parts;
}
Cixl
use: cx;
'data.txt' `r fopen lines {
let: (time place mag) @@s split ..;
let: (m1 m2) $mag @. split &int map ..;
$m1 6 >= $m2 0 > and {[$time @@s $place @@s $mag] say} if
} for
8/27/1883 Krakatoa 8.8
5/18/1980 MountStHelens 7.6
D
import std.conv : to;
import std.regex : ctRegex, split;
import std.stdio : File, writeln;
void main() {
auto ctr = ctRegex!"\\s+";
writeln("Those earthquakes with a magnitude > 6.0 are:");
foreach (line; File("data.txt").byLineCopy) {
auto parts = split(line, ctr);
if (parts[2].to!double > 6.0) {
writeln(line);
}
}
}
Those earthquakes with a magnitude > 6.0 are:
8/27/1883 Krakatoa 8.8
5/18/1980 MountStHelens 7.6
Emacs Lisp
#!/usr/bin/env emacs --script
(dolist (arg command-line-args-left)
(find-file arg)
(while (not (eobp))
(let* ((line (buffer-substring (line-beginning-position)
(line-end-position)))
(magn (nth 2 (split-string line "\\s-+"))))
(when (> (string-to-number magn) 6.0)
(message line)))
(forward-line 1))))
Factor
lines
is a convenience word that reads lines from standard input. If you don't want to type them all in yourself, it is suggested that you give the program a file to read. For example, on the Windows command line: factor kernighan.factor < earthquakes.txt
USING: io math math.parser prettyprint sequences splitting ;
IN: rosetta-code.kernighan
lines [ "\s" split last string>number 6 > ] filter .
Go
package main
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
)
func main() {
f, err := os.Open("data.txt")
if err != nil {
fmt.Println("Unable to open the file")
return
}
defer f.Close()
fmt.Println("Those earthquakes with a magnitude > 6.0 are:\n")
input := bufio.NewScanner(f)
for input.Scan() {
line := input.Text()
fields := strings.Fields(line)
mag, err := strconv.ParseFloat(fields[2], 64)
if err != nil {
fmt.Println("Unable to parse magnitude of an earthquake")
return
}
if mag > 6.0 {
fmt.Println(line)
}
}
}
Those earthquakes with a magnitude > 6.0 are:
8/27/1883 Krakatoa 8.8
5/18/1980 MountStHelens 7.6
Groovy
import java.util.regex.Pattern
class LargeEarthquake {
static void main(String[] args) {
def r = Pattern.compile("\\s+")
println("Those earthquakes with a magnitude > 6.0 are:\n")
def f = new File("data.txt")
f.eachLine { it ->
if (r.split(it)[2].toDouble() > 6.0) {
println(it)
}
}
}
}
Those earthquakes with a magnitude > 6.0 are:
8/27/1883 Krakatoa 8.8
5/18/1980 MountStHelens 7.6
Kotlin
// Version 1.2.40
import java.io.File
fun main(args: Array<String>) {
val r = Regex("""\s+""")
println("Those earthquakes with a magnitude > 6.0 are:\n")
File("data.txt").forEachLine {
if (it.split(r)[2].toDouble() > 6.0) println(it)
}
}
Using the given file:
Those earthquakes with a magnitude > 6.0 are:
8/27/1883 Krakatoa 8.8
5/18/1980 MountStHelens 7.6
Haskell
import qualified Data.ByteString.Lazy.Char8 as C
main :: IO ()
main = do
cs <- C.readFile "data.txt"
mapM_ print $
C.lines cs >>=
(\x ->
[ x
| 6 < (read (last (C.unpack <$> C.words x)) :: Float) ])
"8/27/1883 Krakatoa 8.8"
"5/18/1980 MountStHelens 7.6"
J
NB. this program is designed for systems where the line ending is either LF or CRLF
NB. filename select_magnitude minimum
NB. default file is /tmp/famous.quakers
select_magnitude=: '/tmp/famous.quakers'&$: : (4 :0)
data =. 1!:1 boxopen x NB. read the file
data =. data -. CR NB. remove nasty carriage returns
data =. ,&LF^:(LF~:{:) data NB. append new line if none found
lines =. [;._2 data NB. split the literal based on the final character
magnitudes =. ". _1&{::@(<;._2)@(,&' ')@deb"1 lines
(y <: magnitudes) # lines
)
select_magnitude 6
8/27/1883 Krakatoa 8.8
5/18/1980 MountStHelens 7.6
Julia
Using the example data as a small text file.
using DataFrames, CSV
df = CSV.File("kernighansproblem.txt", delim=" ", ignorerepeated=true,
header=["Date", "Location", "Magnitude"], types=[DateTime, String, Float64],
dateformat="mm/dd/yyyy") |> DataFrame
println(filter(row -> row[:Magnitude] > 6, df))
{{output}}
2×3 DataFrame
│ Row │ Date │ Location │ Magnitude │
│ │ DateTime │ String │ Float64 │
├─────┼─────────────────────┼───────────────┼───────────┤
│ 1 │ 1883-08-27T00:00:00 │ Krakatoa │ 8.8 │
│ 2 │ 1980-05-18T00:00:00 │ MountStHelens │ 7.6 │
Lua
For each line, the Lua pattern "%S+$" is used to capture between the final space character and the end of the line.
-- arg[1] is the first argument provided at the command line
for line in io.lines(arg[1] or "data.txt") do -- use data.txt if arg[1] is nil
magnitude = line:match("%S+$")
if tonumber(magnitude) > 6 then print(line) end
end
Perl
perl -n -e '/(\S+)\s*$/ and $1 > 6 and print' data.txt
Perl 6
Pass in a file name, or use default for demonstration purposes.
$_ = @*ARGS[0] ?? @*ARGS[0].IO !! q:to/END/;
8/27/1883 Krakatoa 8.8
5/18/1980 MountStHelens 7.6
3/13/2009 CostaRica 5.1
END
map { .say if .words[2] > 6 }, .lines;
PHP
Parse using PHP's fscanf().
<?php
// make sure filename was specified on command line
if ( ! isset( $argv[1] ) )
die( 'Data file name required' );
// open file and check for success
if ( ! $fh = fopen( $argv[1], 'r' ) )
die ( 'Cannot open file: ' . $argv[1] );
while ( list( $date, $loc, $mag ) = fscanf( $fh, "%s %s %f" ) ) {
if ( $mag > 6 ) {
printf( "% -12s % -19s %.1f\n", $date, $loc, $mag );
}
}
fclose( $fh );
Usage: Specify file name on command line. Ex:
php eq.php data.txt
8/27/1883 Krakatoa 8.8 5/18/1980 MountStHelens 7.6 ``` ## Phix ```Phix sequence cl = command_line() string filename = iff(length(cl)>=3?cl[3]:"e02.txt") integer fn = open(filename,"r") if fn=-1 then crash("cannot open filename") end if while 1 do object line = gets(fn) if line=-1 then exit end if line = substitute(trim(line),"\t"," ") sequence r = scanf(line,"%s %f") if length(r)=1 and r[1][2]>6 then ?line end if end while close(fn) ``` ```txt "8/27/1883 Krakatoa 8.8" "5/18/1980 MountStHelens 7.6" ``` ## Python Typed into a bash shell or similar: ```python python -c ' with open("data.txt") as f: for ln in f: if float(ln.strip().split()[2]) > 6: print(ln.strip())' ``` Or, if scale permits a file slurp and a parse retained for further processing, we can combine the parse and filter with a concatMap abstraction: ```python from os.path import expanduser from functools import (reduce) from itertools import (chain) # largeQuakes :: Int -> [String] -> [(String, String, String)] def largeQuakes(n): def quake(threshold): def go(x): ws = x.split() return [tuple(ws)] if threshold < float(ws[2]) else [] return lambda x: go(x) return concatMap(quake(n)) # main :: IO () def main(): print ( largeQuakes(6)( open(expanduser('~/data.txt')).read().splitlines() ) ) # GENERIC ABSTRACTION ------------------------------------- # concatMap :: (a -> [b]) -> [a] -> [b] def concatMap(f): return lambda xs: list( chain.from_iterable( map(f, xs) ) ) # MAIN --- if __name__ == '__main__': main() ``` ```txt [('8/27/1883', 'Krakatoa', '8.8'), ('5/18/1980', 'MountStHelens', '7.6')] ``` ## Racket The file specified contains the three lines from the task description. This is just a file filter, matching lines are printed out. ```racket #lang racket (with-input-from-file "data/large-earthquake.txt" (λ () (for ((s (in-port read-line)) #:when (> (string->number (third (string-split s))) 6)) (displayln s)))) ``` Or, defining a list -> list function in terms of '''filter''': ```scheme #lang racket ; largeQuakes :: Int -> [String] -> [String] (define (largeQuakes n xs) (filter (λ (x) (< n (string->number (last (string-split x))))) xs)) ; main :: IO () (module* main #f (display (unlines (largeQuakes 6 (lines (readFile "~/quakes.txt")))))) ; GENERIC --------------------------------------------- ; lines :: String -> [String] (define (lines s) (string-split s "\n")) ; readFile :: FilePath -> IO String (define (readFile fp) (file->string (expand-user-path fp))) ; unlines :: [String] -> String (define (unlines xs) (string-join xs "\n")) ``` ```txt 8/27/1883 Krakatoa 8.8 5/18/1980 MountStHelens 7.6 ``` To combine filtering with more pre-processing, we can use '''concatMap''' in place of '''filter''': ```scheme #lang racket (require gregor) ; Date parsing ; test :: IO () (module* main #f (for ([q ((quakesAbove 6) (lines (readFile "~/quakes.txt")))]) (writeln q))) ; quakesAbove :: Int -> [String] -> [(Date, String, Float)] (define (quakesAbove n) (λ (xs) ((concatMap (λ (x) (local [(define-values (dte k mgn) (apply values (string-split x))) (define m (string->number mgn))] (if (< n m) (list (list (parse-date dte "M/d/y") k m)) '())))) xs))) ; GENERIC --------------------------------------------- ; concatMap :: (a -> [b]) -> [a] -> [b] (define (concatMap f) (λ (xs) (foldr (λ (x a) (append (f x) a)) '() xs))) ; lines :: String -> [String] (define (lines s) (string-split s "\n")) ; readFile :: FilePath -> IO String (define (readFile fp) (file->string (expand-user-path fp))) ``` ```txt (#"Krakatoa" 8.8) (# "MountStHelens" 7.6) ``` ## REXX A little extra coding was added to provide: :::* an output title (with centering and better alignment) :::* an error message for when the input file wasn't found (or is empty) :::* the number of records read :::* the number of records that met the qualifying magnitude :::* the qualifying magnitude ```rexx /*REXX program to read a file containing a list of earthquakes: date, site, magnitude.*/ parse arg iFID mMag . /*obtain optional arguments from the CL*/ if iFID=='' | iFID=="," then iFID= 'earthquakes.dat' /*Not specified? Then use default*/ if mMag=='' | mMag=="," then mMag= 6 /* " " " " " */ #=0 /*# of earthquakes that meet criteria. */ do j=0 while lines(iFID)\==0 /*read all lines in the input file. */ if j==0 then say 'Reading from file: ' iFID /*show the name of the file being read.*/ parse value linein(iFID) with date site mag . /*parse three words from an input line.*/ if mag<=mMag then iterate /*Is the quake too small? Then skip it*/ #= # + 1; if j==0 then say /*bump the number of qualifying quakes.*/ if #==1 then say center('date', 20, "═") '=magnitude=' center("site", 20, '═') say center(date, 20) center(mag/1, 11) ' ' site end /*j*/ /*stick a fork in it, we're all done. */ say say if j\==0 then say j 'records read from file: ' iFID say if j==0 then say er 'file ' iFID " is empty or not found." else say # ' earthquakes listed whose magnitude is ≥ ' mMag ``` ```txt Reading from file: earthquakes.dat ════════date════════ =magnitude= ════════site════════ 08/27/1883 8.8 Krakatoa 05/18/1980 7.6 MountStHelens 3 records read from file: earthquakes.dat 2 earthquakes listed whose magnitude is ≥ 6 ``` ## Ring ```ring # Project : Kernighans large earthquake problem load "stdlib.ring" nr = 0 equake = list(3) fn = "equake.txt" fp = fopen(fn,"r") while not feof(fp) nr = nr + 1 equake[nr] = readline(fp) end fclose(fp) for n = 1 to len(equake) for m = 1 to len(equake[n]) if equake[n][m] = " " sp = m ok next sptemp = right(equake[n],len(equake[n])-sp) sptemo = number(sptemp) if sptemp > 6 see equake[n] + nl ok next ``` Output: ```txt 8/27/1883 Krakatoa 8.8 5/18/1980 MountStHelens 7.6 ``` ## Ruby ```txt ruby -nae "$F[2].to_f > 6 && print" data.txt ``` A more interesting problem. Print only the events whose magnitude is above average. Contents of the file: ```txt 8/27/1883 Krakatoa 8.8 5/18/1980 MountStHelens 7.6 3/13/2009 CostaRica 5.1 2000-02-02 Foo 7.7 1959-08-08 Bar 6.2 1849-09-09 Pym 9.0 ``` The command: ```txt ruby -e"m=$<.to_a;f=->s{s.split[2].to_f};a=m.reduce(0){|t,s|t+f[s]}/m.size;puts m.select{|s|f[s]>a}" e.txt ``` Output: ```txt 8/27/1883 Krakatoa 8.8 5/18/1980 MountStHelens 7.6 2000-02-02 Foo 7.7 1849-09-09 Pym 9.0 ``` ## Scala ```Scala scala.io.Source.fromFile("data.txt").getLines .map("\\s+".r.split(_)) .filter(_(2).toDouble > 6.0) .map(_.mkString("\t")) .foreach(println) ``` ## Swift Expects the program to be started with the path to the data file. ```Swift import Foundation guard let path = Array(CommandLine.arguments.dropFirst()).first else { fatalError() } let fileData = FileManager.default.contents(atPath: path)! let eventData = String(data: fileData, encoding: .utf8)! for line in eventData.components(separatedBy: "\n") { guard let lastSpace = line.lastIndex(of: " "), // Get index of last space line.index(after: lastSpace) != line.endIndex, // make sure the last space isn't the end of the line let magnitude = Double(String(line[line.index(after: lastSpace)])), magnitude > 6 else { // Finally check the magnitude continue } print(line) } ``` ## Tcl Inspired by awk. ```tcl catch {console show} ;## show console when running from tclwish catch {wm withdraw .} set filename "data.txt" set fh [open $filename] set NR 0 ;# number-of-record, means linenumber while {[gets $fh line]>=0} { ;# gets returns length of line, -1 means eof incr NR set line2 [regexp -all -inline {\S+} $line] ;# reduce multiple whitespace set fld [split $line2] ;# split line into fields, at whitespace set f3 [lindex $fld 2] ;# zero-based #set NF [llength $fld] ;# number-of-fields if {$f3 > 6} { puts "$line" } } close $fh ``` ## Yabasic ```Yabasic if peek("argument") then filename$ = peek$("argument") else filename$ = "data.txt" end if dim tok$(1) a = open(filename$) if not a error "Could not open '" + filename$ + "' for reading" while(not eof(a)) line input #a a$ void = token(a$, tok$()) if val(tok$(3)) > 6 print a$ wend close a ``` ## zkl While lexical comparsions [of numeric data] are fine for this problem, it is bad practice so I don't do it (written so text is automatically converted to float). ```zkl fcn equake(data,out=Console){ data.pump(out,fcn(line){ 6.0line.split()[-1] },Void.Filter) } ``` ```zkl equake(Data(Void, #<<< "8/27/1883 Krakatoa 8.8\n" "5/18/1980 MountStHelens 7.6\n" "3/13/2009 CostaRica 5.1\n" #<<< )); ``` or ```zkl equake(File("equake.txt")); ``` or ```zkl $ zkl --eval 'File.stdin.pump(Console,fcn(line){ 6.0