Science
Copyright © 2024 Jiri Kriz, www.nosco.ch

14    Natural Languages I

Solution of Exercise 14

14.1    Tokenizer

tokenize( String, Words):-   
	string_codes(String, Chars),
    parse_chars( Chars, Words),
    !.
    
parse_chars( [ Char| Chars], [ Word| Words]) :-
    word_char( Char),
	preprocess_char( Char, Char1),
    get_word_chars( [ Char1| Chars], RestChars, WordChars),
    name( Word, WordChars),
    parse_chars( RestChars, Words).
parse_chars( [ Char| Chars], Words) :-
    fill_char( Char),
    parse_chars( Chars, Words).
parse_chars( [ Char| Chars], []) :-
    end_char(Char).
parse_chars( [], []).

preprocess_char( Char, Char).  /* redefined in Ex. 14.2 */
    
get_word_chars( [ Char| Chars], RestChars, [ Char| WordChars]) :-
    word_char( Char), !, 
    get_word_chars( Chars, RestChars, WordChars). 
get_word_chars( Chars, Chars, []). 
        
word_char( Char) :- Char >= 97, Char =< 122.    /* a b ... z */ 
word_char( Char) :- Char >= 65, Char =< 90.     /* A B ... Z */ 
word_char( Char) :- Char >= 48, Char =< 57.     /* 0 1 ... 9 */ 

end_char( 44).      /* ',' */ 
end_char( 46).      /* '.' */ 
end_char( 63).      /* '?' */ 
end_char( 33).      /* '!' */
end_char( 10).      /* LF */

fill_char( 32).    /* blank */
fill_char( 9).      /* tab */

talk :- 
    write( "Welcome! Type 'bye' to quit."), nl,
    repeat,
        nl, write( "> "),
		read_string( current_input, "\n", "", End, String),
		preprocess( String, String2), 
        tokenize( String2, Words),
        process_words( Words), nl,
    Words = [ bye| _], !.

preprocess( String, String2) :-
	String2 = String.

process_words( Words) :-
    length( Words, N),
    write_list( Words), write( '/ '), write( N), nl.

write_list( [X1| Xs]) :- write( X1), write(' '), write_list( Xs).
write_list( []).


/* Remark:
There is a simple solution in SWI Prolog: 
   
tokenize( String, Atoms) :-
	split_string(String, " ", " ", Strings),
	strings_to_atoms(Strings, Atoms).
	
strings_to_atoms( [], []).
strings_to_atoms( [ String| Strings], [ Atom| Atoms]) :-
	atom_codes( Atom, String),
	strings_to_atoms( Strings, Atoms).
	
However this solution is not quite satisfactory:
?- tokenize("ab cd?", A). => A = [ab, 'cd?'].
*/


/* Testing:

?- talk.
Welcome! Type 'bye' to quit.

> The man arrived.
The man arrived / 3

> Who arrived?
Who arrived / 2

> bye
bye / 1

true.
*/

Back to example 14.1

14.2    Natural Language Database Queries

/* Solution 14.1 has to be consulted first */

preprocess_char( Char, Char1) :-
	to_lower( Char, Char1).
	
process_words( Words) :-
	Question = Words,
	query( Question, _),
	!.
	
query( Question, Answer) :-
    sublist3( [ Property, of, Object], Question), 
    object( Object, Relation), 
    property( Relation, Property, NP),  
    relation( Relation, NR),
    functor( Term, Relation, NR), 
    arg( 1, Term, Object), arg( NP, Term, Answer),
    call( Term),
	write( Property), write( ' of '), write( Object),
	write( ' is '), write( Answer), nl,
	!.

query( [ bye| _], _) :- !. 
	
query( Question, _) :-
	write( "Cannot answer "), write( Question), nl.
	
sublist3( [ X1, X2, X3], [ X1, X2, X3 | _]).
sublist3( [ X1, X2, X3], [ Y1, Y2, Y3| Xs ]) :-
	sublist3( [ X1, X2, X3], [ Y2, Y3| Xs ]).

	
/* Database */
relation( state, 4).

property( state, population, 2).
property( state, area, 3).
property( state, capital, 4).

object( switzerland, state).
object( germany, state).
object( italy, state).

state( switzerland, 6, 41, bern). 
state( germany,  81, 357, berlin). 
state( italy,  61, 301, rom). 

/* Testing:
?- talk.
Welcome! Type 'bye' to quit.

> Capital of Switzerland?
capital of switzerland is bern

> what is the area of Germany ?
area of germany is 357

> Population of Italy is what ?
population of italy is 61

> bye
true.
*/

Back to example 14.2

14.3    ELIZA

/* Solution 14.1 has to be consulted first */

process_words( Words) :-
	eliza(Words).

eliza( [bye]) :-
	write( 'Goodby. I hope I have helped you.'), nl,
	!.
eliza( Input) :-
	pattern( Stimulus, Response),
	match( Stimulus, Table, Input), 
	match( Response, Table, Output), 
	write_list( Output),
	!.
	
match( [N| Pattern], Table, Words) :-
	integer( N), lookup( N, Table, Words1),
	append( Words1, Words2, Words),
	match( Pattern, Table, Words2).
match( [Word| Pattern], Table, [Word| Words]) :-
	atom( Word), match( Pattern, Table, Words).
match( [], Table, []).

lookup( Key, [(Key, Value)| Dict], Value).
lookup( Key, [(Key1, Value1)| Dict], Value) :-
	Key \= Key1, lookup( Key, Dict, Value).

pattern( [i, am, 1], ['How long have you been', 1, ?]).
pattern( [1, you, 2, me], ['What makes you think I', 1, 2, you, ?]).
pattern( [i, like, 1], ['Does anyone else in your family like', 1, ?]).
pattern( [i, feel, 1], ['Do you often feel that way?']).
pattern( [1, X, 2], ['Please tell me more about', X, .]) :- important( X).
pattern( [1], ['Please go on.']).

important( father).
important( mother).
important( son).
important( daughter).
important( sister).
important( brother).

/* Testing
talk.
*/

Back to example 14.3