⚠️ Warning: This is a draft ⚠️
This means it might contain formatting issues, incorrect code, conceptual problems, or other severe issues.
If you want to help to improve and eventually enable this page, please fork RosettaGit's repository and open a merge request on GitHub.
{{draft task}} ; Task:Given an input string and a dictionary of words, segment the input string into a space-separated sequence of dictionary words if possible.
Aime
integer
wordbreak(record dict, text phrase, integer p, list words)
{
integer complete, n;
text s;
complete = 0;
if (rsk_lower(dict, phrase, s)) {
if (s == phrase) {
words.append(s);
complete = 1;
} else {
do {
n = 0;
while (phrase[n] == s[n]) {
n += 1;
}
if (!n) {
break;
}
words.append(cut(s, 0, n));
complete = wordbreak(dict, project(phrase, n), p + 1, words);
if (complete) {
break;
}
words.delete(-1);
} while (rsk_less(dict, s, s));
}
}
if (!p) {
o_(phrase, ":");
if (complete) {
words.ucall(o_, 1, " ");
} else {
o_(" can't break");
}
o_newline();
words.clear;
}
complete;
}
integer
main(void)
{
record dict;
dict.fit("a", 0, "bc", 0, "abc", 0, "cd", 0, "b", 0);
list("abcd", "abbc", "abcbcd", "acdbc", "abcdd").ucall(wordbreak, 1, dict, 0, list());
return 0;
}
{{Out}}
abcd: a b cd
abbc: a b bc
abcbcd: abc b cd
acdbc: a cd bc
abcdd: can't break
Go
package main
import (
"fmt"
"strings"
)
type dict map[string]bool
func newDict(words ...string) dict {
d := dict{}
for _, w := range words {
d[w] = true
}
return d
}
func (d dict) wordBreak(s string) (broken []string, ok bool) {
if s == "" {
return nil, true
}
type prefix struct {
length int
broken []string
}
bp := []prefix{{0, nil}}
for end := 1; end <= len(s); end++ {
for i := len(bp) - 1; i >= 0; i-- {
w := s[bp[i].length:end]
if d[w] {
b := append(bp[i].broken, w)
if end == len(s) {
return b, true
}
bp = append(bp, prefix{end, b})
break
}
}
}
return nil, false
}
func main() {
d := newDict("a", "bc", "abc", "cd", "b")
for _, s := range []string{"abcd", "abbc", "abcbcd", "acdbc", "abcdd"} {
if b, ok := d.wordBreak(s); ok {
fmt.Printf("%s: %s\n", s, strings.Join(b, " "))
} else {
fmt.Println("can't break")
}
}
}
{{out}}
abcd: a b cd
abbc: a b bc
abcbcd: a bc b cd
acdbc: a cd bc
can't break
Haskell
{{Trans|Javascript}}
import Data.Tree
import Data.List (isPrefixOf, intercalate)
wordBreaks :: [String] -> String -> String
wordBreaks ws s =
let parses = go <$> tokenTrees ws s
go t
| null (subForest t) = [rootLabel t]
| otherwise = subForest t >>= ((:) (rootLabel t) . go)
report xs
| null xs = "\tNot parseable with these words"
| otherwise = unlines $ (('\t' :) . intercalate " -> ") <$> xs
in s ++ (':' : '\n' : report parses)
tokenTrees :: [String] -> String -> [Tree String]
tokenTrees ws = go
where
go s
| s `elem` ws = [Node s []]
| otherwise = ws >>= next s
next s w
| w `isPrefixOf` s = parse w (go (drop (length w) s))
| otherwise = []
parse w xs
| null xs = []
| otherwise = [Node w xs]
-- TEST ------------------------------------------------------------
ws, texts :: [String]
ws = words "a bc abc cd b"
texts = words "abcd abbc abcbcd acdbc abcdd"
main :: IO ()
main = (putStrLn . unlines) $ wordBreaks ws <$> texts
{{Out}}
abcd:
a -> b -> cd
abbc:
a -> b -> bc
abcbcd:
a -> bc -> b -> cd
abc -> b -> cd
acdbc:
a -> cd -> bc
abcdd:
Not parseable with these words
J
With such short sentences we can find the partition sets, then check that all are words.
all_partitions=: <@(<;.1)"1 _~ (1,.[:#:[:i.2^<:@:#) NB. all_partitions 'abcd'
word_break=: ([ #~ 0 = [: #&>@:] -.L:_1 _)~ all_partitions
main=: (] , (;:inv L:_1@:word_break >))"_ 0 boxopen
NB. demonstrate partitions of four integers
all_partitions i. 4
┌─────────┬─────────┬─────────┬─────────┬─────────┬─────────┬─────────┬─────────┐
│┌───────┐│┌─────┬─┐│┌───┬───┐│┌───┬─┬─┐│┌─┬─────┐│┌─┬───┬─┐│┌─┬─┬───┐│┌─┬─┬─┬─┐│
││0 1 2 3│││0 1 2│3│││0 1│2 3│││0 1│2│3│││0│1 2 3│││0│1 2│3│││0│1│2 3│││0│1│2│3││
│└───────┘│└─────┴─┘│└───┴───┘│└───┴─┴─┘│└─┴─────┘│└─┴───┴─┘│└─┴─┴───┘│└─┴─┴─┴─┘│
└─────────┴─────────┴─────────┴─────────┴─────────┴─────────┴─────────┴─────────┘
NB. demonstrate word_break
NB. (,L:_1]0;1 2;0 1 2;2 3;1)
NB. ┌─┬───┬─────┬───┬─┐
NB. │0│1 2│0 1 2│2 3│1│
NB. └─┴───┴─────┴───┴─┘
(,L:_1]0;1 2;0 1 2;2 3;1) word_break i. 4
┌─────────┐
│┌─┬─┬───┐│
││0│1│2 3││
│└─┴─┴───┘│
└─────────┘
NB. save and display the dictionary
[dictionary=: ;: 'a bc abc cd b'
┌─┬──┬───┬──┬─┐
│a│bc│abc│cd│b│
└─┴──┴───┴──┴─┘
NB. demonstrate main
dictionary main 'abc'
┌───┬───┬────┐
│abc│abc│a bc│
└───┴───┴────┘
NB. solution
dictionary main ;: 'abcd abbc abcbcd acdbc abcdd'
┌──────┬────────┬─────────┐
│abcd │a b cd │ │
├──────┼────────┼─────────┤
│abbc │a b bc │ │
├──────┼────────┼─────────┤
│abcbcd│abc b cd│a bc b cd│
├──────┼────────┼─────────┤
│acdbc │a cd bc │ │
├──────┼────────┼─────────┤
│abcdd │ │ │
└──────┴────────┴─────────┘
Javascript
Composing a solution from generic functions. {{Trans|Python}}
(() => {
'use strict';
const main = () => {
const
wds = words('a bc abc cd b'),
texts = words('abcd abbc abcbcd acdbc abcdd');
return unlines(
map(wordBreaks(wds),
texts
)
);
};
// WORD BREAKS ----------------------------------------
// tokenTrees :: [String] -> String -> [Tree String]
const tokenTrees = (wds, s) => {
const go = s =>
wds.includes(s) ? (
[Node(s, [])]
) : bindList(wds, next(s));
const next = s => w =>
s.startsWith(w) ? (
parse(w, go(s.slice(w.length)))
) : [];
const parse = (w, xs) =>
0 < xs.length ? [Node(w, xs)] : xs;
return go(s);
};
// wordBreaks :: [String] -> String -> String
const wordBreaks = wds => s => {
const
// go :: Tree a -> [a]
go = t => isNull(t.nest) ? [
t.root
] : bindList(
t.nest,
compose(cons(t.root), go),
),
parses = map(go, tokenTrees(wds, s));
return `${s}:\n` + (
0 < parses.length ? unlines(
map(x => '\t' + intercalateS(' -> ', x),
parses
)
) : '\t(Not parseable with these words)'
);
};
// GENERIC FUNCTIONS ----------------------------------
// Node :: a -> [Tree a] -> Tree a
const Node = (v, xs) => ({
type: 'Node',
root: v,
nest: xs || []
});
// bindList (>>=) :: [a] -> (a -> [b]) -> [b]
const bindList = (xs, mf) => [].concat.apply([], xs.map(mf));
// compose (<<<) :: (b -> c) -> (a -> b) -> a -> c
const compose = (f, g) => x => f(g(x));
// cons :: a -> [a] -> [a]
const cons = x => xs => [x].concat(xs);
// intercalateS :: String -> [String] -> String
const intercalateS = (s, xs) =>
xs.join(s);
// isNull :: [a] -> Bool
// isNull :: String -> Bool
const isNull = xs =>
Array.isArray(xs) || ('string' === typeof xs) ? (
1 > xs.length
) : undefined;
// isPrefixOf takes two lists or strings and returns
// true iff the first is a prefix of the second.
// isPrefixOf :: [a] -> [a] -> Bool
// isPrefixOf :: String -> String -> Bool
const isPrefixOf = (xs, ys) => {
const pfx = (xs, ys) => {
const intX = xs.length;
return 0 < intX ? (
ys.length >= intX ? xs[0] === ys[0] && pfx(
xs.slice(1), ys.slice(1)
) : false
) : true;
};
return 'string' !== typeof xs ? (
pfx(xs, ys)
) : ys.startsWith(xs);
};
// map :: (a -> b) -> [a] -> [b]
const map = (f, xs) => xs.map(f);
// unlines :: [String] -> String
const unlines = xs => xs.join('\n');
// words :: String -> [String]
const words = s => s.split(/\s+/);
// MAIN ---
return main();
})();
{{Out}}
abcd:
a -> b -> cd
abbc:
a -> b -> bc
abcbcd:
a -> bc -> b -> cd
abc -> b -> cd
acdbc:
a -> cd -> bc
abcdd:
(Not parseable with these words)
Julia
Some extra loops to record and print all solutions.
words = ["a", "bc", "abc", "cd", "b"]
strings = ["abcd", "abbc", "abcbcd", "acdbc", "abcdd"]
subregex = join(words, ")|(")
regexes = ["\^\(\($subregex\)\)\{$i}\$" for i in 6:-1:1]
function wordbreak()
for s in strings
solutions = []
for regex in regexes
rmat = match(Regex(regex), s)
if rmat != nothing
push!(solutions, ["$w" for w in Set(rmat.captures) if w != nothing])
end
end
if length(solutions) > 0
println("$(length(solutions)) Solution(s) for $s:")
for sol in solutions
println(" Solution: $(sol)")
end
else
println("No solutions for $s : No fitting matches found.")
end
end
end
wordbreak()
{{output}}
1 Solution(s) for abcd:
Solution: SubString{String}["cd", "b", "a"]
1 Solution(s) for abbc:
Solution: SubString{String}["b", "a", "bc"]
2 Solution(s) for abcbcd:
Solution: SubString{String}["cd", "b", "a", "bc"]
Solution: SubString{String}["cd", "abc", "b"]
1 Solution(s) for acdbc:
Solution: SubString{String}["cd", "a", "bc"]
No solutions for abcdd : No fitting matches found.
Kotlin
I've downloaded the free dictionary at http://www.puzzlers.org/pub/wordlists/unixdict.txt for this task. All single letters from 'a' to 'z' are considered to be words by this dictionary but 'bc' and 'cd' which I'd have expected to be present are not.
// version 1.1.3
import java.io.File
val partitions = mutableListOf<List<String>>()
fun partitionString(s: String, ml: MutableList<String>, level: Int) {
for (i in s.length - 1 downTo 1) {
val part1 = s.substring(0, i)
val part2 = s.substring(i)
ml.add(part1)
ml.add(part2)
partitions.add(ml.toList())
if (part2.length > 1) {
ml.removeAt(ml.lastIndex)
partitionString(part2, ml, level + 1)
}
while (ml.size > level) ml.removeAt(ml.lastIndex)
}
}
fun main(args: Array<String>) {
val words = File("unixdict.txt").readLines()
val strings = listOf("abcd", "abbc", "abcbcd", "acdbc", "abcdd")
for (s in strings) {
partitions.clear()
partitions.add(listOf(s))
val ml = mutableListOf<String>()
partitionString(s, ml, 0)
val solutions = mutableListOf<List<String>>()
for (partition in partitions) {
var allInDict = true
for (item in partition) {
if (words.indexOf(item) == -1) {
allInDict = false
break
}
}
if (allInDict) solutions.add(partition)
}
val plural = if (solutions.size == 1) "" else "s"
println("$s: ${solutions.size} solution$plural")
for (solution in solutions) {
println(" ${solution.joinToString(" ")}")
}
println()
}
}
{{out}}
abcd: 2 solutions
abc d
a b c d
abbc: 1 solution
a b b c
abcbcd: 3 solutions
abc b c d
a b cb c d
a b c b c d
acdbc: 2 solutions
ac d b c
a c d b c
abcdd: 2 solutions
abc d d
a b c d d
Lua
-- a specialized dict format is used to minimize the
-- possible candidates for this particalur problem
function genDict(ws)
local d,dup,head,rest = {},{}
for w in ws:gmatch"%w+" do
local lw = w:lower()
if not dup[lw] then
dup[lw], head,rest = true, lw:match"^(%w)(.-)$"
d[head] = d[head] or {n=-1}
local len = #rest
d[head][len] = d[head][len] or {}
d[head][len][rest] = true
if len>d[head].n then
d[head].n = len
end
end
end
return d
end
-- sample default dict
local defWords = "a;bc;abc;cd;b"
local defDict = genDict(defWords)
function wordbreak(w, dict)
if type(w)~='string' or w:len()==0 then
return nil,'emprty or not a string'
end
dict = type(dict)=='string' and genDict(dict) or dict or defDict
local r, len = {}, #w
-- backtracking
local function try(i)
if i>len then return true end
local head = w:sub(i,i):lower()
local d = dict[head]
if not d then return end
for j=math.min(d.n, len-i),0,-1 do -- prefer longer first
if d[j] then
local rest = w:sub(i+1,i+j):lower()
if d[j][rest] then
r[1+#r] = w:sub(i,i+j)
if try(i+j+1) then
return true
else
r[#r]=nil
end
end
end
end
end
if try(1) then
return table.unpack(r)
else
return nil,'-no solution-'
end
end
-- test
local test = {'abcd','abbc','abcbcd','acdbc','abcdd' }
for i=1,#test do
print(test[i],wordbreak(test[i]))
end
{{out}}
abcd a b cd
abbc a b bc
abcbcd abc b cd
acdbc a cd bc
abcdd nil -no solution-
Perl
use strict;
use warnings;
my @words = <a o is pi ion par per sip miss able>;
print "$_: " . word_break($_,@words) . "\n" for <a aa amiss parable opera operable inoperable permission mississippi>;
sub word_break {
my($word,@dictionary) = @_;
my @matches;
my $one_of = join '|', @dictionary;
@matches = $word =~ /^ ($one_of) ($one_of)? ($one_of)? ($one_of)? $/x; # sub-optimal: limited number of matches
return join(' ', grep {$_} @matches) || "(not possible)";
}
{{out}}
a: a
aa: a a
ado: ad o
amiss: a miss
admission: ad miss ion
parable: par able
opera: o per a
operable: o per able
inoperable: in o per able
permission: per miss ion
permissible: Not possible
mississippi: miss is sip pi
Perl 6
{{works with|Rakudo|2017.04}} This implementation does not necessarily find ''every'' combination, it returns the one with the longest matching tokens.
;
my $regex = @words.join('|');
put "$_: ", word-break($_) for <abcd abbc abcbcd acdbc abcdd>;
sub word-break (Str $word) { ($word ~~ / ^ (<$regex>)+ $ /)[0] // "Not possible" }
{{out}}
abcd: a b cd
abbc: a b bc
abcbcd: abc b cd
acdbc: a cd bc
abcdd: Not possible
Phix
See talk page
procedure populate_dict(sequence s)
?s
for i=1 to length(s) do setd(s[i],0) end for
end procedure
--/*
function valid_word(string word)
if length(word)=1 then return find(word,{"a","i"})!=0 end if
if find(word,{"sis","sst","se"}) then return false end if -- hack
for i=1 to length(word) do
integer ch = word[i]
if ch<'a'
or ch>'z' then
return false
end if
end for
return true
end function
--*/
populate_dict(split("a bc abc cd b"))
--/*
integer fn = open("demo\\unixdict.txt","r")
sequence words = get_text(fn,GT_LF_STRIPPED)
close(fn)
for i=length(words) to 1 by -1 do
if not valid_word(words[i]) then
words[i] = words[$]
words = words[1..$-1]
end if
end for
populate_dict(words)
--*/
function prrec(sequence wordstarts, integer idx, sequence sofar, bool show)
if idx>length(wordstarts) then
if show then
?sofar
end if
return 1
end if
integer res = 0
for i=1 to length(wordstarts[idx]) do
string w = wordstarts[idx][i]
res += prrec(wordstarts,idx+length(w),append(sofar,w),show)
end for
return res
end function
function flattens(sequence s)
-- remove all nesting and empty sequences from a nested sequence of strings
sequence res = {}, si
for i=1 to length(s) do
si = s[i]
if string(si) then
res = append(res,si)
else
res &= flattens(si)
end if
end for
return res
end function
procedure test(string s)
integer l = length(s)
sequence wordstarts = repeat({},l), wordends = repeat(0,l)
integer wordend = 1 -- (pretend a word just ended at start)
for i=1 to l do
if wordend then
for j=i to l do
object pkey = getd_partial_key(s[i..j])
if string(pkey) and length(pkey)>j-i and s[i..j]=pkey[1..j-i+1] then
if length(pkey)=j-i+1 then
-- exact match
wordstarts[i] = append(wordstarts[i],pkey)
wordends[j] += 1
end if
else
exit
end if
end for
end if
wordend = wordends[i]
end for
bool worthwhile = true
while worthwhile do
worthwhile = false
wordend = 1 -- (pretend a word just ended at start)
for i=1 to l do
if wordend then
-- eliminate any words that end before a wordstarts of {}.
for j=length(wordstarts[i]) to 1 by -1 do
integer wl = length(wordstarts[i][j])
if i+wl<=l and wordstarts[i+wl]={} then
wordends[i+wl-1] -= 1
wordstarts[i][j..j] = {}
worthwhile = true
end if
end for
else
-- elimitate all words that start here.
for j=1 to length(wordstarts[i]) do
integer wl = length(wordstarts[i][j])
if i+wl<=l then
wordends[i+wl-1] -= 1
worthwhile = true
end if
end for
wordstarts[i] = {}
end if
wordend = wordends[i]
end for
end while
--?{wordstarts,wordends}
if sum(wordends)=0 then
printf(1,"%s: not possible",{s})
else
integer count = prrec(wordstarts,1,{},false)
if count=1 then
printf(1,"%s: 1 solution: %s\n",{s,join(flattens(wordstarts))})
elsif count>20 then
printf(1,"%s: %d solution(s): (too many to show)\n",{s,count})
pp({wordstarts,wordends})
else
printf(1,"%s: %d solution(s):\n",{s,count})
count = prrec(wordstarts,1,{},true)
end if
end if
end procedure
constant tests = {"abcd","abbc","abcbcd","acdbc","abcdd"}
--constant tests = {"wordsisstringofspaceseparatedwords"}
for i=1 to length(tests) do test(tests[i]) end for
{{out}}
{"a","bc","abc","cd","b"}
abcd: 1 solution: a b cd
abbc: 1 solution: a b bc
abcbcd: 2 solution(s):
{"a","bc","b","cd"}
{"abc","b","cd"}
acdbc: 1 solution: a cd bc
abcdd: not possible
PicoLisp
(setq *Dict (quote "a" "bc" "abc" "cd" "b"))
(setq *Dict2
(quote
"mobile" "samsung" "sam" "sung" "man" "mango"
"icecream" "and" "go" "i" "like" "ice" "cream" ) )
(de word (Str D)
(let
(Str (chop Str)
Len (length Str)
DP (need (inc Len))
Res (need (inc Len))
B 1 )
(set DP 0)
(map
'((L)
(and
(get DP B)
(for N (length L)
(let Str (pack (head N L))
(when (member Str D)
(set (nth Res (+ B N))
(copy (get Res B)) )
(queue (nth Res (+ B N)) Str)
(set (nth DP (+ B N))
(inc (get DP B)) ) ) ) ) )
(inc 'B) )
Str )
(last Res) ) )
(println (word "abcd" *Dict))
(println (word "abbc" *Dict))
(println (word "abcbcd" *Dict))
(println (word "acdbc" *Dict))
(println (word "abcdd" *Dict))
(println (word "ilikesamsung" *Dict2))
(println (word "iii" *Dict2))
(println (word "ilikelikeimangoiii" *Dict2))
(println (word "samsungandmango" *Dict2))
(println (word "samsungandmangok" *Dict2))
(println (word "ksamsungandmango" *Dict2))
{{out}}
("a" "b" "cd")
("a" "b" "bc")
("a" "bc" "b" "cd")
("a" "cd" "bc")
NIL
("i" "like" "sam" "sung")
("i" "i" "i")
("i" "like" "like" "i" "man" "go" "i" "i" "i")
("sam" "sung" "and" "man" "go")
NIL
NIL
Python
Functional
The '''tokenTrees''' function recursively builds a tree of possible token sequences, using a list monad ('''concatMap''' with a function which returns its result wrapped in a list – an empty list where a parse has failed) to discard all branches which lead to dead ends. This allows us to return more than one possible word-break parse for a given lexicon and input string. (Searches for 'monadic parsing in Python' will yield references to more sophisticated uses of this general approach).
{{Works with|Python|3.7}}
'''Parsing a string for word breaks'''
from itertools import (chain)
# stringParse :: [String] -> String -> Tree String
def stringParse(lexicon):
'''A tree of strings representing a parse of s
in terms of the tokens in lexicon.
'''
return lambda s: Node(s)(
tokenTrees(lexicon)(s)
)
# tokenTrees :: [String] -> String -> [Tree String]
def tokenTrees(wds):
'''A list of possible parse trees for s,
based on the lexicon supplied in wds.
'''
def go(s):
return [Node(s)([])] if s in wds else (
concatMap(nxt(s))(wds)
)
def nxt(s):
return lambda w: parse(
w, go(s[len(w):])
) if s.startswith(w) else []
def parse(w, xs):
return [Node(w)(xs)] if xs else xs
return lambda s: go(s)
# showParse :: Tree String -> String
def showParse(tree):
'''Multi line display of a string followed by any
possible parses of it, or an explanatory
message, if no parse was possible.
'''
def showTokens(x):
xs = x['nest']
return ' ' + x['root'] + (showTokens(xs[0]) if xs else '')
parses = tree['nest']
return tree['root'] + ':\n' + (
'\n'.join(
map(showTokens, parses)
) if parses else ' ( Not parseable in terms of these words )'
)
# TEST -------------------------------------------------
# main :: IO ()
def main():
'''Parse test and display of results.'''
lexicon = 'a bc abc cd b'.split()
testSamples = 'abcd abbc abcbcd acdbc abcdd'.split()
print(unlines(
map(
showParse,
map(
stringParse(lexicon),
testSamples
)
)
))
# GENERIC FUNCTIONS ---------------------------------------
# Node :: a -> [Tree a] -> Tree a
def Node(v):
'''Contructor for a Tree node which connects a
value of some kind to a list of zero or
more child trees.'''
return lambda xs: {'type': 'Node', 'root': v, 'nest': xs}
# concatMap :: (a -> [b]) -> [a] -> [b]
def concatMap(f):
'''A concatenated list over which a function has been mapped.
The list monad can be derived by using a function f which
wraps its output in a list,
(using an empty list to represent computational failure).'''
return lambda xs: list(
chain.from_iterable(map(f, xs))
)
# unlines :: [String] -> String
def unlines(xs):
'''A single string derived by the intercalation
of a list of strings with the newline character.'''
return '\n'.join(xs)
# MAIN ---
if __name__ == '__main__':
main()
{{Out}}
abcd:
a b cd
abbc:
a b bc
abcbcd:
a bc b cd
abc b cd
acdbc:
a cd bc
abcdd:
( Not parseable in terms of these words )
Racket
This returns all the possible splits (and null list if none is possible). Who's to say which is the best?
#lang racket
(define render-phrases pretty-print)
(define dict-1 (list "a" "bc" "abc" "cd" "b"))
(define dict-2 (list "mobile" "samsung" "sam" "sung" "man" "mango"
"icecream" "and" "go" "i" "like" "ice" "cream"))
(define (word-splits str d)
(let ((memo (make-hash)))
(let inr ((s str))
(hash-ref! memo s
(λ () (append* (filter-map (λ (w)
(and (string-prefix? s w)
(if (string=? w s)
(list s)
(map (λ (tl) (string-append w " " tl))
(inr (substring s (string-length w)))))))
d)))))))
(module+ main
(render-phrases (word-splits "abcd" dict-1))
(render-phrases (word-splits "abbc" dict-1))
(render-phrases (word-splits "abcbcd" dict-1))
(render-phrases (word-splits "acdbc" dict-1))
(render-phrases (word-splits "abcdd" dict-1))
(render-phrases (word-splits "ilikesamsung" dict-2))
(render-phrases (word-splits "iii" dict-2))
(render-phrases (word-splits "ilikelikeimangoiii" dict-2))
(render-phrases (word-splits "samsungandmango" dict-2))
(render-phrases (word-splits "samsungandmangok" dict-2))
(render-phrases (word-splits "ksamsungandmango" dict-2)))
{{out}}
'("a b cd")
'("a b bc")
'("a bc b cd" "abc b cd")
'("a cd bc")
'()
'("i like samsung" "i like sam sung")
'("i i i")
'("i like like i man go i i i" "i like like i mango i i i")
'("samsung and man go"
"samsung and mango"
"sam sung and man go"
"sam sung and mango")
'()
'()
REXX
This REXX version allows the words to be tested (and the dictionary words) to be specified on the command line.
/*REXX program breaks up a word (or string) into a list of words from a dictionary. */
parse arg a '/' x; a=space(a); x=space(x) /*get optional args; elide extra blanks*/
if a=='' | a=="," then a= 'abcd abbc abcbcd acdbc abcdd' /*maybe use the defaults ? */
if x=='' | x=="," then x= 'a bc abc cd b' /* " " " " */
na=words(a) /*the number of words to be tested. */
nx=words(x) /* " " " " " the dictionary*/
say nx ' dictionary words: ' x /*display the words in the dictionary. */
say /*display a blank line to the terminal.*/
aw=0; do i=1 for na; _=word(a, i) /*obtain a word that will be tested. */
aw=max(aw, length(_) ) /*find widest width word being tested. */
end /*i*/ /* [↑] AW is used to align the output*/
@.=0 /*initialize the dictionary to "null". */
xw=0; do i=1 for nx; _=word(x, i) /*obtain a word from the dictionary. */
xw=max(xw, length(_) ); @._=1 /*find widest width dictionary word. */
end /*i*/ /* [↑] define a dictionary word. */
p=0 /* [↓] process a word in the A list.*/
do j=1 for na; yy=word(a, j) /*YY: test a word from the A list.*/
do t=(nx+1)**(xw+1) by -1 to 1 until y==''; y=yy /*try word possibility.*/
$= /*nullify the (possible) result list. */
do try=t while y\='' /*keep testing until Y is exhausted. */
p=(try + p) // xw + 1 /*use a possible width for this attempt*/
p=fw(y, p); if p==0 then iterate t /*is this part of the word not found ? */
$=$ ? /*It was found. Add partial to the list*/
y=substr(y, p + 1) /*now, use and test the rest of word. */
end /*try*/
end /*t*/
if t==0 then $= '(not possible)' /*indicate that the word isn't possible*/
say right(yy, aw) '───►' strip($) /*display the result to the terminal. */
end /*j*/
exit /*stick a fork in it, we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
fw: parse arg z,L; do k=L by -1 for L; ?=left(z,k); if @.? then leave; end; return k
{{out|output|text= when using the default inputs:}}
5 dictionary words: a bc abc cd b
abcd ───► a b cd
abbc ───► a b bc
abcbcd ───► abc b cd
acdbc ───► a cd bc
abcdd ───► (not possible)
Ring
# Project : Word break problem
load "stdlib.ring"
list = ["a", "bc", "abc", "cd", "b"]
inslist = list
for n = 1 to len(inslist) - 1
for m = len(inslist) to 1 step -1
insert(list,0,inslist[m])
next
next
strings = ["abcd", "abbc", "abcbcd", "acdbc", "abcdd"]
ind = len(list)
items = newlist(pow(2,len(list))-1,ind)
powerset(list,ind)
for p = 1 to len(strings)
showarray(items,strings[p])
next
func powerset(list,ind)
num = 0
num2 = 0
items = newlist(pow(2,len(list))-1,2*ind)
for i = 2 to (2 << len(list)) - 1 step 2
num2 = 0
num = num + 1
for j = 1 to len(list)
if i & (1 << j)
num2 = num2 + 1
if list[j] != 0
items[num][num2] = list[j]
ok
ok
next
next
return items
func showarray(items,par)
ready = []
for n = 1 to len(items)
for m = n + 1 to len(items) - 1
flag = 0
str = ""
for x = 1 to len(items[n])
if items[n][x] != 0
str = str + items[n][x] + " "
ok
next
str = left(str, len(str) - 1)
strsave = str
str = substr(str, " ", "")
if str = par
pos = find(ready,strsave)
if pos = 0
add(ready,strsave)
flag = 1
see par + " = " + strsave + nl
ok
if flag != 1
del(items,m)
ok
ok
next
next
Output:
abcd = a b cd
abbc = a b bc
abcbcd = abc b cd
acdbc = a cd bc
Rust
Dynamic programming
use std::collections::HashSet;
fn create_string(s: &str, v: Vec<Option<usize>>) -> String {
let mut idx = s.len();
let mut slice_vec = vec![];
while let Some(prev) = v[idx] {
slice_vec.push(&s[prev..idx]);
idx = prev;
}
slice_vec.reverse();
slice_vec.join(" ")
}
fn word_break(s: &str, dict: HashSet<&str>) -> Option<String> {
let size = s.len() + 1;
let mut possible = vec![None; size];
let check_word = |i,j| dict.get(&s[i..j]).map(|_| i);
for i in 1..size {
possible[i] = possible[i].or_else(|| check_word(0,i));
if possible[i].is_some() {
for j in i+1..size {
possible[j] = possible[j].or_else(|| check_word(i,j));
}
if possible[s.len()].is_some() {
return Some(create_string(s, possible));
}
};
}
None
}
fn main() {
let mut set = HashSet::new();
set.insert("a");
set.insert("bc");
set.insert("abc");
set.insert("cd");
set.insert("b");
println!("{:?}", word_break("abcd", set).unwrap());
}
{{output}}
"a b cd"
Scala
First solution
Finds all possible solutions recursively, using a trie representation of the dictionary:
case class TrieNode(isWord: Boolean, children: Map[Char, TrieNode]) {
def add(s: String): TrieNode = s match {
case "" => copy(isWord = true)
case _ => {
val child = children.getOrElse(s.head, TrieNode(false, Map.empty))
copy(children = children + (s.head -> child.add(s.tail)))
}
}
}
def buildTrie(xs: String*): TrieNode = {
xs.foldLeft(TrieNode(false, Map.empty))(_.add(_))
}
def wordBreakRec(s: String, root: TrieNode, currentPos: TrieNode, soFar: String): List[List[String]] = {
val usingCurrentWord = if (currentPos.isWord) {
if (s.isEmpty) {
List(List(soFar))
} else {
wordBreakRec(s, root, root, "").map(soFar :: _)
}
} else {
List.empty[List[String]]
}
val usingCurrentPrefix = (for {
ch <- s.headOption
child <- currentPos.children.get(ch)
} yield wordBreakRec(s.tail, root, child, soFar + ch)).getOrElse(List.empty)
usingCurrentWord ++ usingCurrentPrefix
}
def wordBreak(s: String, dict: TrieNode): List[List[String]] = {
wordBreakRec(s, dict, dict, "")
}
Calling it with some example strings:
val dict = buildTrie("a", "bc", "abc", "cd", "b")
val testCases = List("abcd", "abbc", "abcbcd", "acdbc", "abcdd")
for (s <- testCases) {
val solutions = wordBreak(s, dict)
println(s"$s has ${solutions.size} solution(s):")
for (words <- solutions) {
println("\t" + words.mkString(" "))
}
}
{{out}}
abcd has 1 solution(s):
a b cd
abbc has 1 solution(s):
a b bc
abcbcd has 2 solution(s):
a bc b cd
abc b cd
acdbc has 1 solution(s):
a cd bc
abcdd has 0 solution(s):
Combined solution
{{Out}}Best seen running in your browser either by [https://scalafiddle.io/sf/49YwsD5/1 ScalaFiddle (ES aka JavaScript, non JVM)] or [https://scastie.scala-lang.org/L47OzgmjSkOnQ5wfrZ5snQ Scastie (remote JVM)].
object WordBreak extends App {
val dict = buildTrie("a", "bc", "abc", "cd", "b")
lazy val empty = TrieNode(isWord = false, Map.empty) // lazy or in a companion object
case class TrieNode(isWord: Boolean, children: Map[Char, TrieNode]) {
def add(s: String): TrieNode = {
def child = children.withDefaultValue(empty)(s.head)
if (s.isEmpty) copy(isWord = true)
else copy(children = children.updated(s.head, child.add(s.tail)))
}
}
def buildTrie(xs: String*): TrieNode = xs.foldLeft(empty)(_.add(_))
def wordBreak(s: String, dict: TrieNode): List[List[String]] = {
def wordBreakRec(s: String,
root: TrieNode,
currentPos: TrieNode,
soFar: String): List[List[String]] = {
def usingCurrentWord =
if (currentPos.isWord)
if (s.isEmpty) List(List(soFar))
else wordBreakRec(s, root, root, "").map(soFar :: _)
else Nil
def usingCurrentPrefix =
(for {ch <- s.headOption
child <- currentPos.children.get(ch)
} yield wordBreakRec(s.tail, root, child, soFar + ch))
.getOrElse(Nil)
usingCurrentWord ++ usingCurrentPrefix
}
wordBreakRec(s, dict, dict, "")
}
// Calling it with some example strings:
List("abcd", "abbc", "abcbcd", "acdbc", "abcdd").foreach(s => {
val solutions = wordBreak(s, dict)
println(s"$s has ${solutions.size} solution(s):")
solutions.foreach(words => println(words.mkString("\t", " ", "")))
})
}
Seed7
$ include "seed7_05.s7i";
const func boolean: wordBreak (in string: stri, in array string: words, in string: resultList) is func
result
var boolean: found is FALSE;
local
var string: word is "";
begin
if stri = "" then
writeln(resultList);
found := TRUE;
else
for word range words do
if startsWith(stri, word) and
wordBreak(stri[succ(length(word)) ..], words, resultList & " " & word) then
found := TRUE;
end if;
end for;
end if;
end func;
const proc: main is func
local
const array string: words is [] ("a", "bc", "abc", "cd", "b");
var string: stri is "";
var string: resultList is "";
begin
for stri range [] ("abcd", "abbc", "abcbcd", "acdbc", "abcdd") do
write(stri <& ": ");
if not wordBreak(stri, words, resultList) then
writeln("can't break");
end if;
end for;
end func;
{{out}}
abcd: a b cd
abbc: a b bc
abcbcd: a bc b cd
abc b cd
acdbc: a cd bc
abcdd: can't break
Sidef
{{trans|zkl}}
func word_break (str, words) {
var r = ->(str, arr=[]) {
return true if str.is_empty
for word in (words) {
str.begins_with(word) || next
if (__FUNC__(str.substr(word.len), arr)) {
arr << word
return arr
}
}
return false
}(str)
r.kind_of(Array) ? r.reverse : nil
}
var words = %w(a o is pi ion par per sip miss able)
var strs = %w(a amiss parable opera operable inoperable permission mississippi)
for str in (strs) {
printf("%11s: %s\n", str, word_break(str, words) \\ '(not possible)')
}
{{out}}
a: ["a"]
amiss: ["a", "miss"]
parable: ["par", "able"]
opera: ["o", "per", "a"]
operable: ["o", "per", "able"]
inoperable: (not possible)
permission: ["per", "miss", "ion"]
mississippi: ["miss", "is", "sip", "pi"]
zkl
fcn wordBreak(str,words){ // words is string of space seperated words
words=words.split(" "); // to list of words
r:=fcn(str,words,sink){ // recursive search, easy to collect answer
foreach word in (words){
if(not str) return(True); // consumed string ie matched everything
if(str.find(word)==0){ // word starts str, 0 so answer is ordered
z:=word.len();
if(self.fcn(str.del(0,z),words,sink)) return(sink.write(word));
}
}
False // can't make forward progress, back out & retry
}(str,words,List()); // run the lambda
if(False==r) return("not possible");
r.reverse().concat(" ")
}
foreach text in (T("abcd","abbc","abcbcd","acdbc","abcdd")){
println(text,": ",wordBreak(text,"a bc abc cd b"))
}
{{out}}
abcd: a b cd
abbc: a b bc
abcbcd: a bc b cd
acdbc: a cd bc
abcdd: not possible