[Prime-cvs] CVS update: prime/lib

Back to archive index

Hiroyuki Komatsu komat****@users*****
2004年 12月 20日 (月) 00:03:48 JST


Index: prime/lib/prime2.rb
diff -u prime/lib/prime2.rb:1.1.2.3 prime/lib/prime2.rb:1.1.2.4
--- prime/lib/prime2.rb:1.1.2.3	Sat Dec 18 17:07:10 2004
+++ prime/lib/prime2.rb	Mon Dec 20 00:03:48 2004
@@ -1,5 +1,5 @@
 # prime2.rb: Module for PRIME2 protocol.
-# $Id: prime2.rb,v 1.1.2.3 2004/12/18 08:07:10 komatsu Exp $
+# $Id: prime2.rb,v 1.1.2.4 2004/12/19 15:03:48 komatsu Exp $
 #
 # Copyright (C) 2004 Hiroyuki Komatsu <komat****@taiya*****>
 #     All rights reserved.
@@ -139,23 +139,19 @@
       composer = session_get_composer(session)
       context  = session_get_context(session)
 
-      words_compact  = convert_compact(composer, context)
-      words_overall  = convert_overall(composer, context)
-      words_japanese = convert_japanese(composer, context)
-      results_compact    = PrimeWordList::merge_with_label(@context,
-                                                           words_compact)
-      results_conversion = PrimeWordList::merge_with_label(@context,
-                                                           words_overall,
-                                                           words_japanese)
-      candidates = PrimeWordList::concat(results_compact | results_conversion)
+      conversions_compact  = convert_compact(composer, context)
+      conversions_japanese = convert_japanese(composer, context)
+      conversions_overall  = convert_overall(composer, context)
+
+      ## FIXME: Shink verbose candidates.
+      ## FIXME: (2004-12-19) <Hiro>
+      conversions = PrimeConversionList.new( conversions_compact |
+                                               conversions_japanese |
+                                               conversions_overall )
     end
 
-    conversions = _adhoc_wordlist_to_conversionlist(candidates)
-
     session_set_conversions(session, conversions)
     return conversions
-#     session_set_candidates(session, candidates)
-#     return candidates
   end
 
   def _adhoc_wordlist_to_conversionlist (wordlist)
@@ -246,13 +242,14 @@
   end
 
   ##
-  ## convertion methods
+  ## conversion methods
   ##  
 
   ## This is a wrapper for convert_*.  This converts query to
-  ## a PrimeConvertionList insted of PrimeWordList and returns it.
+  ## a PrimeConversionList insted of PrimeWordList and returns it.
   def convert (query)
     wordlist = search(query)
+    PrimeWordList::attach_prefix(@context, wordlist)
     return _adhoc_wordlist_to_conversionlist( wordlist )
   end
   private :convert
@@ -261,16 +258,14 @@
     # 「よ→予測」
     expansion = composer.edit_get_expansion()
     query = PrimeQuery.new(expansion, nil, :prefix, context)
-    words = search(query)
-    return words
+    return convert(query)
   end
 
   def convert_exact (composer, context)
     # 「よそく→予測」
     conversion = composer.edit_get_conversion()
     query = PrimeQuery.new(conversion, nil, :exact, context)
-    words = search(query)
-    return words
+    return convert(query)
   end
 
   def convert_raw (composer, context)
@@ -278,42 +273,153 @@
     ## FIXME: <komat****@taiya*****> (2004-02-28)
     raw_input = composer.edit_get_raw_input()
     query = PrimeQuery.new(raw_input, nil, :exact, @context)
-    words = search(query)
-    return words
+    return convert(query)
   end
 
   def convert_overall (composer, context)
     # 「1+1=→2」, 「aiueo→アイウエオ」
     raw_input = composer.edit_get_raw_input()
     query = PrimeQuery.new(raw_input, nil, :overall)
-    words = search(query)
-    return words
+    return convert(query)
   end
 
   def convert_compact (composer, context)
-    words_prefix   = convert_prefix(composer, context)
+    conversion_prefix = convert_prefix(composer, context).first()
 
     ## If the result of search_prefix is empty, this method stops the following
     ## search_japanese_uniclause for the quickness.
-    if words_prefix.empty? then
-      ## Ruby 1.6 does not keep the class PrimeWordList word[0,1] if the
-      ## value of word is [], and the class of the result of word[0,1]
-      ## becomes Array which is a super class of PrimewordList.
-      return PrimeWordList.new()
+    if conversion_prefix.nil?() then
+      return PrimeConversionList.new()
+    end
+
+    ## If the result of convert_japanese_uniclause exists and the score of it
+    ## is greater than the result of convert_prefix, the return conversion
+    ## becomes the convert_japanese_uniclause's one.
+    conversion_japanese = convert_japanese_uniclause(composer, context).first()
+    if conversion_japanese.nil?() then
+      conversion = conversion_prefix
+    elsif conversion_japanese.score < conversion_prefix.score then
+      conversion = conversion_prefix
+    else  ## conversion_japanese.score >= conversion_prefix.score
+      conversion = conversion_japanese
+    end
+
+    return PrimeConversionList.new( [conversion] )
+
+    ## Predict a next segment of the conversion.
+    next_segment = predict_next_segment( conversion )
+    if next_segment.nil? then
+      return PrimeConversionList.new( [conversion] )
+    end
+
+    conversion2 = conversion.dup()
+    conversion2.segment_insert(next_segment)
+    return PrimeConversionList.new( [conversion, conversion2] )
+  end
+
+
+  ## This predicts candidate words as a next word for the specified conversion
+  ## data.  For example when the specified conversion means "どうもありがとう",
+  ## one of the results would contain "ございます".
+  def predict_next_segment (conversion)
+    if conversion.nil? then
+      return nil
     end
 
-    words_japanese = convert_japanese_uniclause(composer, context)
-    words_compact  = PrimeWordList::merge(words_prefix, words_japanese)[0,1]
+    ## The current context is just the previous word.
+    context = conversion.get_literal()
 
-    if words_compact.length > 0 then
-      predict_with_multi_clauses!(words_compact)
+    ## If the last character of the specified conversion is one of stop_words,
+    ## This method stops its prediction.  (EXPERIMENTAL)
+    stop_words = \
+    [ PRIME_ENV['style_japanese_period'],
+      PRIME_ENV['style_japanese_comma'] ]
+    if context =~ /(#{stop_words.join('|')})$/ then
+      return nil
+    end
 
-      words_compact[1..-1].each {|word|
-        word.score = words_compact[0].score
-      }
+    query = PrimeQuery.new([""], nil, :context, context)
+    next_words = search(query)
+    if next_words.empty? then
+      return nil
     end
-    return words_compact
+
+    ## Create a PrimeSegment from the first value of a PrimeWord.
+    next_words = next_words[0,1]
+    next_word  = next_words.first
+    next_word.prefix = Prime::get_prefix(context, word.literal)
+
+    reading     = next_word.to_text_pron()
+    base        = ""
+    pos         = nil
+    adjunct     = ""
+    pos_adjunct = nil
+
+    next_segment =
+      PrimeSegment.new(reading, base, pos, adjunct, pos_adjunct, context)
+    next_segment.set_candidates(next_words, 0)
+    return next_segment
+  end
+  private :predict_next_segment
+
+  ## This returns a PrimeConversionList.
+  def convert_japanese (composer, context)
+    segments_list = convert_japanese_process_segments_list(composer)
+    conversions = []
+
+    segments_list.each { | segments |
+      if segments.length == 1 then
+        conversions += convert_from_segment( segments.first() )
+      else
+        segments.each { | segment |
+          query = PrimeQuery.new( [segment.base], segment.pos )
+          words = search(query)
+          words.each { | word |
+            word.conjugation     = segment.adjunct
+            word.conjugation_pos = segment.pos_adjunct
+          }
+          index = (segment.pos == nil) ? -1 : 0
+          segment.set_candidates(words, index)
+        }
+        score = convert_japanese_get_score(segments)
+        conversions.push( PrimeConversion.new(segments, score) )
+      end
+    }
+    return PrimeConversionList.new( conversions )
+  end
+
+  ## This returns a PrimeWordList.
+  ## FIXME: Change the method name.
+  def convert_japanese_uniclause (composer, context)
+    segments_list = convert_japanese_process_segments_list(composer, 1)
+    conversions = PrimeConversionList.new()
+
+    segments_list.each { | segments | # The lengh of segments must be 1.
+      conversions += convert_from_segment( segments.first() )
+    }
+    return conversions
+  end
+
+  ## This converts from the specified segment to convertions.
+  ## FIXME: Chage the function name.
+  ## FIXME: (2004-12-19) <Hiro>.
+  def convert_from_segment (segment)
+    conversions = PrimeConversionList.new()
+    query = PrimeQuery.new( [segment.base], segment.pos )
+    words = search(query)
+
+    words.length.times { | index |
+      word = words[index]
+      word.conjugation     = segment.adjunct
+      word.conjugation_pos = segment.pos_adjunct
+
+      new_segment = segment.dup()
+      new_segment.set_candidates(words, index)
+      conversions.push( PrimeConversion.new( [new_segment], word.score ) )
+    }
+    return conversions
   end
+  private :convert_from_segment
 
   def convert_japanese_process_segments_list (composer, threshold = 4)
     string = composer.edit_get_surface_string()
@@ -338,6 +444,7 @@
     }
     return conversions
   end
+  private :convert_japanese_process_segments_list
 
   def convert_japanese_get_score (segments)
     segment = segments[0]
@@ -361,61 +468,7 @@
     }
     return score
   end
-
-  ## This returns a PrimeConversionList.
-  def convert_japanese (composer, context)
-    segments_list = convert_japanese_process_segments_list(composer)
-    conversions = PrimeConversionList.new()
-
-    segments_list.each { | segments |
-      if segments.length == 1 then
-        segment = segments[0]
-        query = PrimeQuery.new( [segment.base], segment.pos )
-        words = search(query)
-
-        words.length.times { | index |
-          word = words[index]
-          word.conjugation     = segment.adjunct
-          word.conjugation_pos = segment.pos_adjunct
-
-          new_segment = segment.dup()
-          new_segment.set_candidates(words, index)
-          conversions.push( PrimeConversion.new( [new_segment], word.score ) )
-        }
-      else
-        segments.each { | segment |
-          query = PrimeQuery.new( [segment.reading], segment.pos )
-          words = search(query)
-          words.each { | word |
-            word.conjugation     = segment.adjunct
-            word.conjugation_pos = segment.pos_adjunct
-          }
-          index = (segment.pos == nil) ? -1 : 0
-          segment.set_candidates(words, index)
-        }
-        score = convert_japanese_get_score(segments)
-        conversions.push( PrimeConversion.new(segments, score) )
-      end
-    }
-    return conversions
-  end
-
-  ## This returns a PrimeWordList.
-  def convert_japanese_uniclause (composer, context)
-    segments_list = convert_japanese_process_segments_list(composer, 1)
-    words = PrimeWordList.new()
-    segments_list.each { | segments |
-      segment = segments[0]   # The lengh of segments must be 1.
-      query = PrimeQuery.new( [segment.base], segment.pos )
-      words = search(query)
-
-      words.each { | word |
-        word.conjugation     = segment.adjunct
-        word.conjugation_pos = segment.pos_adjunct
-      }
-    }
-    return words
-  end
+  private :convert_japanese_get_score
 
   class PrimeSession
     def initialize ()
@@ -524,6 +577,18 @@
     @score    = score
   end
 
+  def get_literal()
+    literal = ""
+    @segments.each { | segment | literal += segment.get_literal() }
+    return literal
+  end
+
+  def segment_insert (segment)
+    ## FIXME: Condiser @position.
+    ## FIXME: (2004-12-19) <Hiro>
+    @segments.push(segments)
+  end
+
   def to_text_debug ()
     if****@segme***** == 1 then
       return "(#{score})\t" + @segments[0].to_text_data()  # with annotations
@@ -546,9 +611,10 @@
 
 
 class PrimeSegment
-  attr_reader :reading, :base, :pos, :adjunct, :pos_adjunct
+  attr_reader :reading, :base, :pos, :adjunct, :pos_adjunct, :context
   def initialize (reading,
-                  base = nil, pos = nil, adjunct = "", pos_adjunct = nil)
+                  base = nil, pos = nil, adjunct = "", pos_adjunct = nil,
+                  context = nil)
     @reading = reading
 
     ## The following data is a query guideline.
@@ -556,13 +622,15 @@
     @pos         = pos
     @adjunct     = adjunct
     @pos_adjunct = pos_adjunct
+    @context     = context
 
     @candidates      = PrimeWordList.new()
     @candidate_index = -1
   end
 
   def dup ()
-    segment = PrimeSegment.new(@reading, @base, @pos, @adjunct, @pos_adjunct)
+    segment = 
+      PrimeSegment.new(@reading, @base, @pos, @adjunct, @pos_adjunct, @context)
     segment.set_candidates(@candidates, @candidate_index)
     return segment
   end
@@ -602,7 +670,7 @@
   end
 
   def to_text ()
-    return ( get_literal() + @adjunct )
+    return get_literal()
   end
 end
 
Index: prime/lib/prime.rb
diff -u prime/lib/prime.rb:1.7.4.11 prime/lib/prime.rb:1.7.4.12
--- prime/lib/prime.rb:1.7.4.11	Sat Dec 18 17:07:10 2004
+++ prime/lib/prime.rb	Mon Dec 20 00:03:48 2004
@@ -1,5 +1,5 @@
 # prime/prime.rb
-# $Id: prime.rb,v 1.7.4.11 2004/12/18 08:07:10 komatsu Exp $
+# $Id: prime.rb,v 1.7.4.12 2004/12/19 15:03:48 komatsu Exp $
 #
 # Copyright (C) 2002, 2003, 2004 Hiroyuki Komatsu <komat****@taiya*****>
 #     All rights reserved.
@@ -549,6 +549,14 @@
     return merged
   end
 
+  ## This attaches a prefix characters depend on the context,
+  ## and returns the given words.
+  def PrimeWordList::attach_prefix (context, words)
+    words.each { | word |
+      word.prefix = Prime::get_prefix(context, word.literal)
+    }
+    return words
+  end
 
   def to_text
     texts = self.map {|word|


Prime-cvs メーリングリストの案内
Back to archive index