a dialog project for development version.
Révision | 629a0da3a79bc1e12bd1b9c7c7e6a1f360867541 (tree) |
---|---|
l'heure | 2010-11-11 17:42:26 |
Auteur | Takuya Nishimoto <nishimoto@m.ie...> |
Commiter | Takuya Nishimoto |
phrase_dur
@@ -3,6 +3,10 @@ | ||
3 | 3 | # $KCODE = 'u' |
4 | 4 | require 'nkf' |
5 | 5 | |
6 | +gtalk_bin = "/usr/local/galatea-dialog/bin/gtalk" # for utf8 | |
7 | +# gtalk_bin = "/usr/local/galatea-engine/SSM/gtalk/gtalk" # for euc-jp | |
8 | +gdm_jar = "/lab/common/src/nishi/sfjp/dialog-studio-java/bin/gdm.jar" | |
9 | + | |
6 | 10 | if ARGV.length == 1 |
7 | 11 | outputname = ARGV[0] |
8 | 12 | text = STDIN.read |
@@ -17,10 +21,7 @@ | ||
17 | 21 | |
18 | 22 | outputdir = File.join(Dir.pwd, outputname) |
19 | 23 | speaker = "female01" |
20 | - | |
21 | 24 | text = NKF.nkf('-w', text) |
22 | -gtalk_bin = "/usr/local/galatea-dialog/bin/gtalk" # for utf8 | |
23 | -# gtalk_bin = "/usr/local/galatea-engine/SSM/gtalk/gtalk" # for euc-jp | |
24 | 25 | |
25 | 26 | #################### |
26 | 27 | # based on script/runner |
@@ -80,6 +81,7 @@ | ||
80 | 81 | # puts text |
81 | 82 | # puts cmd |
82 | 83 | FileUtils.mkdir_p outputname |
84 | +system "mv #{outputdir}/#{outputname}.log #{outputdir}/#{outputname}.log.bak" | |
83 | 85 | Open3.popen3("#{cmd} >&2") do |stdin, stdout, stderr| |
84 | 86 | stdin.puts "set Log = #{outputdir}/#{outputname}.log.eucjp" |
85 | 87 | stdin.puts "set Log.conf = YES" |
@@ -116,3 +118,105 @@ | ||
116 | 118 | system "mv #{outputdir}/#{outputname}.wav.info #{outputdir}/#{outputname}.wav.info.eucjp" |
117 | 119 | system "nkf -Ew #{outputdir}/#{outputname}.wav.info.eucjp > #{outputdir}/#{outputname}.wav.info" |
118 | 120 | |
121 | +####################################### | |
122 | +# get phonemes in phrases | |
123 | +####################################### | |
124 | + | |
125 | +# find "* aphrase data" | |
126 | + | |
127 | +lines = [] | |
128 | +flag = false | |
129 | +File.open("#{outputdir}/#{outputname}.log", "r").each_line do |line| | |
130 | + s = line.chomp | |
131 | + if s.match(/^\* aphrase data/) | |
132 | + if lines.length == 0 | |
133 | + flag = true | |
134 | + end | |
135 | + elsif s.match(/^\- n_aphrase:/) | |
136 | + flag = false | |
137 | + end | |
138 | + if flag | |
139 | + if (not s.match(/^\* aphrase data/)) and (not s.match(/^\(orth/)) | |
140 | + lines << s | |
141 | + end | |
142 | + end | |
143 | +end | |
144 | +# (orth pron [accent] mora position DEC/INT | |
145 | + | |
146 | +lines2 = [] | |
147 | +lines.each do |s| | |
148 | + s2 = s.split(/\t/)[1] | |
149 | + s3 = NKF.nkf('-w --hiragana', s2) | |
150 | + lines2 << s3 | |
151 | + # puts s3 | |
152 | +end | |
153 | + | |
154 | +cmd = "LANG=ja_JP.utf8; java -cp #{gdm_jar} galatea.io.julius.GrammarUtil -t" | |
155 | +lines3 = [] | |
156 | +Open3.popen3(cmd) do |stdin, stdout, stderr| | |
157 | + lines2.each do |s| | |
158 | + stdin.puts s | |
159 | + end | |
160 | + stdin.close_write | |
161 | + s = stdout.gets | |
162 | + while s | |
163 | + lines3 << [s.chomp] # make array | |
164 | + s = stdout.gets | |
165 | + end | |
166 | +end | |
167 | +phrases = lines3 | |
168 | + | |
169 | +phrases.each_with_index do |item, idx| | |
170 | + item << lines2[idx] | |
171 | +end | |
172 | + | |
173 | +# p phrases | |
174 | +# [["silB;", "silB"], ["w;a;t;a;k;U;sh;i;w;a;", "わたくしわ"], | |
175 | +# ["o;N;s;e;e;g;o;o;s;e;e;k;i;d;e;s;U;", "おんせーごーせーきです"], | |
176 | +# ["silE;", "silE"]] | |
177 | + | |
178 | +####################################### | |
179 | +# parse .pros file | |
180 | +####################################### | |
181 | + | |
182 | +separator_count = 0 | |
183 | +durations = [] | |
184 | +File.open("#{outputdir}/#{outputname}.pros", "r").each_line do |line| | |
185 | + s = line.chomp | |
186 | + if s == "-----" | |
187 | + separator_count += 1 | |
188 | + end | |
189 | + if separator_count == 1 and (s != "-----") | |
190 | + # w [40] | |
191 | + m = s.match(/^(\S+) \[(\d+)\]$/) | |
192 | + durations << [ m[1], m[2] ] | |
193 | + end | |
194 | +end | |
195 | + | |
196 | +# p durations | |
197 | + | |
198 | +####################################### | |
199 | +# output | |
200 | +####################################### | |
201 | + | |
202 | +lines = [] | |
203 | +count = 0 | |
204 | +clock = 0 | |
205 | +phrases.each do |phrase| | |
206 | + lines << "----- " + phrase[1] | |
207 | + phrase[0].split(/;/).each do |i| | |
208 | + d = durations[count] | |
209 | + dur = d[1].to_i | |
210 | + starttime = clock | |
211 | + endtime = clock + dur | |
212 | + lines << starttime.to_s + " " + endtime.to_s + " " + d[0] + " " + i + " " + dur.to_s | |
213 | + clock += dur | |
214 | + count += 1 | |
215 | + end | |
216 | +end | |
217 | +lines << "-----" | |
218 | +File.open("#{outputdir}/#{outputname}.phrase_dur", "w") do |file| | |
219 | + lines.each do |s| | |
220 | + file.puts s | |
221 | + end | |
222 | +end |