diff options
Diffstat (limited to 'util')
-rwxr-xr-x | util/bin/mic.stt | 12 | ||||
-rw-r--r-- | util/lib/stt/README.md | 4 | ||||
-rw-r--r-- | util/lib/stt/google.sh | 35 | ||||
-rwxr-xr-x | util/t/stt/stt-works-with-espeak | 16 |
4 files changed, 67 insertions, 0 deletions
diff --git a/util/bin/mic.stt b/util/bin/mic.stt new file mode 100755 index 00000000..9236f85c --- /dev/null +++ b/util/bin/mic.stt @@ -0,0 +1,12 @@ +#!/bin/sh +set -efux +cd $(dirname $(readlink -f $0)) +. ../lib/stt/google.sh +duration=${1?please provide duration via \$1} +lang=${lang:-de-DE} +export lang +echo "language is set to $lang" +echo "will record for '$duration' seconds" +f=$(record_audio ${duration}) +trap 'rm $f' TERM EXIT HUP +stt "$f" diff --git a/util/lib/stt/README.md b/util/lib/stt/README.md new file mode 100644 index 00000000..be905770 --- /dev/null +++ b/util/lib/stt/README.md @@ -0,0 +1,4 @@ +# Speech to Text api wrapper + +Because Speech to text is hardâ„¢ with FOSS, these libraries utilize the magic of +the internets to solve this problem. diff --git a/util/lib/stt/google.sh b/util/lib/stt/google.sh new file mode 100644 index 00000000..a78579d5 --- /dev/null +++ b/util/lib/stt/google.sh @@ -0,0 +1,35 @@ +#!/bin/sh + +_get_content_type(){ + file -b --mime-type "$1" +} +_get_audio_rate(){ + file "$1" | sed -n -e 's/.* \([.0-9]\+\) kHz.*/\1/p' \ + | awk '{print int($1 *1000)}' +} + +record_audio(){ + # usage : _record_audio num_seconds + # echoes the output file + tmpfile=$(mktemp) + : ${1?please provide number of seconds to record} + arecord -d "$1" -r 16000 -t wav -q -f cd | flac -s -f - -o "$tmpfile" && echo "$tmpfile" +} +stt(){ + # usage: (lang=de-de stty recorded_file) + : ${1? please provide recorded file} + infile="$1" + lang=${lang:-en-us} + _get_content_type "$1" | (! grep -q "x-flac" ) \ + && echo "infile needs to be in flac format" \ + && return 1 + # only flac seems to be working... + wget -q -O - \ + -U 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7' \ + --post-file "$infile" \ + --header "Content-Type: `_get_content_type $infile`; rate=`_get_audio_rate $infile`;" \ + "http://www.google.com/speech-api/v1/recognize?lang=${lang}&client=chromium&maxresults=1" \ + | sed -n 's/.*utterance":"\([^"]*\)".*/\1/p' + + # returns {"status":0,"id":"d9269e6f741997161e41a4d441b34ba1-1","hypotheses":[{"utterance":"hallo Welt","confidence":0.7008959}]} +} diff --git a/util/t/stt/stt-works-with-espeak b/util/t/stt/stt-works-with-espeak new file mode 100755 index 00000000..ff39f567 --- /dev/null +++ b/util/t/stt/stt-works-with-espeak @@ -0,0 +1,16 @@ +#!/bin/sh + +cd $(dirname $(readlink -f $0)) +. ../../lib/stt/google.sh +tmp=$(mktemp) +test_str="hello" +trap "rm $tmp" TERM INT EXIT HUP +espeak --stdout "$test_str" | flac --totally-silent -f -o "$tmp" - + +if stt "$tmp" | egrep "^$test_str\$" >/dev/null ;then + echo "ok" + exit 0 +else + echo "not ok" + exit 1 +fi |