Hiroyuki Komatsu
komat****@users*****
2004年 12月 20日 (月) 00:03:48 JST
Index: prime/lib/prime2.rb diff -u prime/lib/prime2.rb:1.1.2.3 prime/lib/prime2.rb:1.1.2.4 --- prime/lib/prime2.rb:1.1.2.3 Sat Dec 18 17:07:10 2004 +++ prime/lib/prime2.rb Mon Dec 20 00:03:48 2004 @@ -1,5 +1,5 @@ # prime2.rb: Module for PRIME2 protocol. -# $Id: prime2.rb,v 1.1.2.3 2004/12/18 08:07:10 komatsu Exp $ +# $Id: prime2.rb,v 1.1.2.4 2004/12/19 15:03:48 komatsu Exp $ # # Copyright (C) 2004 Hiroyuki Komatsu <komat****@taiya*****> # All rights reserved. @@ -139,23 +139,19 @@ composer = session_get_composer(session) context = session_get_context(session) - words_compact = convert_compact(composer, context) - words_overall = convert_overall(composer, context) - words_japanese = convert_japanese(composer, context) - results_compact = PrimeWordList::merge_with_label(@context, - words_compact) - results_conversion = PrimeWordList::merge_with_label(@context, - words_overall, - words_japanese) - candidates = PrimeWordList::concat(results_compact | results_conversion) + conversions_compact = convert_compact(composer, context) + conversions_japanese = convert_japanese(composer, context) + conversions_overall = convert_overall(composer, context) + + ## FIXME: Shink verbose candidates. + ## FIXME: (2004-12-19) <Hiro> + conversions = PrimeConversionList.new( conversions_compact | + conversions_japanese | + conversions_overall ) end - conversions = _adhoc_wordlist_to_conversionlist(candidates) - session_set_conversions(session, conversions) return conversions -# session_set_candidates(session, candidates) -# return candidates end def _adhoc_wordlist_to_conversionlist (wordlist) @@ -246,13 +242,14 @@ end ## - ## convertion methods + ## conversion methods ## ## This is a wrapper for convert_*. This converts query to - ## a PrimeConvertionList insted of PrimeWordList and returns it. + ## a PrimeConversionList insted of PrimeWordList and returns it. def convert (query) wordlist = search(query) + PrimeWordList::attach_prefix(@context, wordlist) return _adhoc_wordlist_to_conversionlist( wordlist ) end private :convert @@ -261,16 +258,14 @@ # 「よ→予測」 expansion = composer.edit_get_expansion() query = PrimeQuery.new(expansion, nil, :prefix, context) - words = search(query) - return words + return convert(query) end def convert_exact (composer, context) # 「よそく→予測」 conversion = composer.edit_get_conversion() query = PrimeQuery.new(conversion, nil, :exact, context) - words = search(query) - return words + return convert(query) end def convert_raw (composer, context) @@ -278,42 +273,153 @@ ## FIXME: <komat****@taiya*****> (2004-02-28) raw_input = composer.edit_get_raw_input() query = PrimeQuery.new(raw_input, nil, :exact, @context) - words = search(query) - return words + return convert(query) end def convert_overall (composer, context) # 「1+1=→2」, 「aiueo→アイウエオ」 raw_input = composer.edit_get_raw_input() query = PrimeQuery.new(raw_input, nil, :overall) - words = search(query) - return words + return convert(query) end def convert_compact (composer, context) - words_prefix = convert_prefix(composer, context) + conversion_prefix = convert_prefix(composer, context).first() ## If the result of search_prefix is empty, this method stops the following ## search_japanese_uniclause for the quickness. - if words_prefix.empty? then - ## Ruby 1.6 does not keep the class PrimeWordList word[0,1] if the - ## value of word is [], and the class of the result of word[0,1] - ## becomes Array which is a super class of PrimewordList. - return PrimeWordList.new() + if conversion_prefix.nil?() then + return PrimeConversionList.new() + end + + ## If the result of convert_japanese_uniclause exists and the score of it + ## is greater than the result of convert_prefix, the return conversion + ## becomes the convert_japanese_uniclause's one. + conversion_japanese = convert_japanese_uniclause(composer, context).first() + if conversion_japanese.nil?() then + conversion = conversion_prefix + elsif conversion_japanese.score < conversion_prefix.score then + conversion = conversion_prefix + else ## conversion_japanese.score >= conversion_prefix.score + conversion = conversion_japanese + end + + return PrimeConversionList.new( [conversion] ) + + ## Predict a next segment of the conversion. + next_segment = predict_next_segment( conversion ) + if next_segment.nil? then + return PrimeConversionList.new( [conversion] ) + end + + conversion2 = conversion.dup() + conversion2.segment_insert(next_segment) + return PrimeConversionList.new( [conversion, conversion2] ) + end + + + ## This predicts candidate words as a next word for the specified conversion + ## data. For example when the specified conversion means "どうもありがとう", + ## one of the results would contain "ございます". + def predict_next_segment (conversion) + if conversion.nil? then + return nil end - words_japanese = convert_japanese_uniclause(composer, context) - words_compact = PrimeWordList::merge(words_prefix, words_japanese)[0,1] + ## The current context is just the previous word. + context = conversion.get_literal() - if words_compact.length > 0 then - predict_with_multi_clauses!(words_compact) + ## If the last character of the specified conversion is one of stop_words, + ## This method stops its prediction. (EXPERIMENTAL) + stop_words = \ + [ PRIME_ENV['style_japanese_period'], + PRIME_ENV['style_japanese_comma'] ] + if context =~ /(#{stop_words.join('|')})$/ then + return nil + end - words_compact[1..-1].each {|word| - word.score = words_compact[0].score - } + query = PrimeQuery.new([""], nil, :context, context) + next_words = search(query) + if next_words.empty? then + return nil end - return words_compact + + ## Create a PrimeSegment from the first value of a PrimeWord. + next_words = next_words[0,1] + next_word = next_words.first + next_word.prefix = Prime::get_prefix(context, word.literal) + + reading = next_word.to_text_pron() + base = "" + pos = nil + adjunct = "" + pos_adjunct = nil + + next_segment = + PrimeSegment.new(reading, base, pos, adjunct, pos_adjunct, context) + next_segment.set_candidates(next_words, 0) + return next_segment + end + private :predict_next_segment + + ## This returns a PrimeConversionList. + def convert_japanese (composer, context) + segments_list = convert_japanese_process_segments_list(composer) + conversions = [] + + segments_list.each { | segments | + if segments.length == 1 then + conversions += convert_from_segment( segments.first() ) + else + segments.each { | segment | + query = PrimeQuery.new( [segment.base], segment.pos ) + words = search(query) + words.each { | word | + word.conjugation = segment.adjunct + word.conjugation_pos = segment.pos_adjunct + } + index = (segment.pos == nil) ? -1 : 0 + segment.set_candidates(words, index) + } + score = convert_japanese_get_score(segments) + conversions.push( PrimeConversion.new(segments, score) ) + end + } + return PrimeConversionList.new( conversions ) + end + + ## This returns a PrimeWordList. + ## FIXME: Change the method name. + def convert_japanese_uniclause (composer, context) + segments_list = convert_japanese_process_segments_list(composer, 1) + conversions = PrimeConversionList.new() + + segments_list.each { | segments | # The lengh of segments must be 1. + conversions += convert_from_segment( segments.first() ) + } + return conversions + end + + ## This converts from the specified segment to convertions. + ## FIXME: Chage the function name. + ## FIXME: (2004-12-19) <Hiro>. + def convert_from_segment (segment) + conversions = PrimeConversionList.new() + query = PrimeQuery.new( [segment.base], segment.pos ) + words = search(query) + + words.length.times { | index | + word = words[index] + word.conjugation = segment.adjunct + word.conjugation_pos = segment.pos_adjunct + + new_segment = segment.dup() + new_segment.set_candidates(words, index) + conversions.push( PrimeConversion.new( [new_segment], word.score ) ) + } + return conversions end + private :convert_from_segment def convert_japanese_process_segments_list (composer, threshold = 4) string = composer.edit_get_surface_string() @@ -338,6 +444,7 @@ } return conversions end + private :convert_japanese_process_segments_list def convert_japanese_get_score (segments) segment = segments[0] @@ -361,61 +468,7 @@ } return score end - - ## This returns a PrimeConversionList. - def convert_japanese (composer, context) - segments_list = convert_japanese_process_segments_list(composer) - conversions = PrimeConversionList.new() - - segments_list.each { | segments | - if segments.length == 1 then - segment = segments[0] - query = PrimeQuery.new( [segment.base], segment.pos ) - words = search(query) - - words.length.times { | index | - word = words[index] - word.conjugation = segment.adjunct - word.conjugation_pos = segment.pos_adjunct - - new_segment = segment.dup() - new_segment.set_candidates(words, index) - conversions.push( PrimeConversion.new( [new_segment], word.score ) ) - } - else - segments.each { | segment | - query = PrimeQuery.new( [segment.reading], segment.pos ) - words = search(query) - words.each { | word | - word.conjugation = segment.adjunct - word.conjugation_pos = segment.pos_adjunct - } - index = (segment.pos == nil) ? -1 : 0 - segment.set_candidates(words, index) - } - score = convert_japanese_get_score(segments) - conversions.push( PrimeConversion.new(segments, score) ) - end - } - return conversions - end - - ## This returns a PrimeWordList. - def convert_japanese_uniclause (composer, context) - segments_list = convert_japanese_process_segments_list(composer, 1) - words = PrimeWordList.new() - segments_list.each { | segments | - segment = segments[0] # The lengh of segments must be 1. - query = PrimeQuery.new( [segment.base], segment.pos ) - words = search(query) - - words.each { | word | - word.conjugation = segment.adjunct - word.conjugation_pos = segment.pos_adjunct - } - } - return words - end + private :convert_japanese_get_score class PrimeSession def initialize () @@ -524,6 +577,18 @@ @score = score end + def get_literal() + literal = "" + @segments.each { | segment | literal += segment.get_literal() } + return literal + end + + def segment_insert (segment) + ## FIXME: Condiser @position. + ## FIXME: (2004-12-19) <Hiro> + @segments.push(segments) + end + def to_text_debug () if****@segme***** == 1 then return "(#{score})\t" + @segments[0].to_text_data() # with annotations @@ -546,9 +611,10 @@ class PrimeSegment - attr_reader :reading, :base, :pos, :adjunct, :pos_adjunct + attr_reader :reading, :base, :pos, :adjunct, :pos_adjunct, :context def initialize (reading, - base = nil, pos = nil, adjunct = "", pos_adjunct = nil) + base = nil, pos = nil, adjunct = "", pos_adjunct = nil, + context = nil) @reading = reading ## The following data is a query guideline. @@ -556,13 +622,15 @@ @pos = pos @adjunct = adjunct @pos_adjunct = pos_adjunct + @context = context @candidates = PrimeWordList.new() @candidate_index = -1 end def dup () - segment = PrimeSegment.new(@reading, @base, @pos, @adjunct, @pos_adjunct) + segment = + PrimeSegment.new(@reading, @base, @pos, @adjunct, @pos_adjunct, @context) segment.set_candidates(@candidates, @candidate_index) return segment end @@ -602,7 +670,7 @@ end def to_text () - return ( get_literal() + @adjunct ) + return get_literal() end end Index: prime/lib/prime.rb diff -u prime/lib/prime.rb:1.7.4.11 prime/lib/prime.rb:1.7.4.12 --- prime/lib/prime.rb:1.7.4.11 Sat Dec 18 17:07:10 2004 +++ prime/lib/prime.rb Mon Dec 20 00:03:48 2004 @@ -1,5 +1,5 @@ # prime/prime.rb -# $Id: prime.rb,v 1.7.4.11 2004/12/18 08:07:10 komatsu Exp $ +# $Id: prime.rb,v 1.7.4.12 2004/12/19 15:03:48 komatsu Exp $ # # Copyright (C) 2002, 2003, 2004 Hiroyuki Komatsu <komat****@taiya*****> # All rights reserved. @@ -549,6 +549,14 @@ return merged end + ## This attaches a prefix characters depend on the context, + ## and returns the given words. + def PrimeWordList::attach_prefix (context, words) + words.each { | word | + word.prefix = Prime::get_prefix(context, word.literal) + } + return words + end def to_text texts = self.map {|word|