Contents

GSoC Weekly Update: Week 4, 5

This post will briefly cover:

  • Learnings
  • Tasks done, and those in progress
  • Helpful resources

For the project proposal, visit here.


Creating the Recipes for a successful build of Vosk Library:

In order to integrate Vosk into AGL we need to build it from scratch. The instructions are mentioned in the Vosk Website under “Compilation from source”. As mentioned, the compilation is not straighforward and includes several nuances.

As listed in the Dockerfile for vosk-api, and the Dockerfile for vosk-server, the below steps outline the libraries that were required to be built for vosk-api, and the corresponding recipes:

1. Openfst

GitHub Repo: https://github.com/alphacep/openfst

Recipe created: vosk-openfst_1.8.0.bb

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
DESCRIPTION = "OpenFst is a library for constructing, combining, optimizing, and searching weighted finite-state transducers (FSTs)."
SUMMARY = "Openfst mirror with some fixes"
HOMEPAGE = "https://github.com/alphacep/openfst"
LICENSE = "Unknown"
LIC_FILES_CHKSUM = "file://COPYING;md5=973381090441f4eb420224655e05e064"

SRC_URI = "git://github.com/alphacep/openfst.git;protocol=https;branch=master \
           file://0001-build-fixes-for-bitbake.patch \
           "
# A patch was required to fix openfst build issues

SRCREV = "7dfd808194105162f20084bb4d8e4ee4b65266d5"
S = "${WORKDIR}/git"

inherit autotools python3native

EXTRA_OECONF = "--enable-static --enable-shared --enable-far --enable-ngram-fsts --enable-lookahead-fsts --with-pic --disable-bin"

FILES:${PN} += " /usr/lib/fst   /usr/lib/fst/arc_lookahead-fst.so \
  /usr/lib/fst/ilabel_lookahead-fst.so \
  /usr/lib/fst/ngram-fst.so \
  /usr/lib/fst/olabel_lookahead-fst.so \
"

2. OpenBLAS

GitHub Repo: https://github.com/xianyi/OpenBLAS

Recipe created: openblas_git.bb

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
DESCRIPTION = "OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version."
SUMMARY = "OpenBLAS : An optimized BLAS library"
HOMEPAGE = "http://www.openblas.net/"
LICENSE = "BSD-3-Clause"
LIC_FILES_CHKSUM = "file://LICENSE;md5=5adf4792c949a00013ce25d476a2abc0"

SRC_URI = "\
    git://github.com/xianyi/OpenBLAS;protocol=https;branch=develop \
"

# tag 0.3.20
SRCREV = "0b678b19dc03f2a999d6e038814c4c50b9640a4e"
S = "${WORKDIR}/git"

DEPENDS += "libgfortran"

def map_arch(d):
    import re
    arch = d.getVar('TARGET_ARCH', True)
    if   re.match('i.86$', arch):    return 'ATOM'
    elif re.match('x86_64$', arch):  return 'ATOM'
    elif re.match('aarch32$', arch): return 'CORTEXA9'
    elif re.match('aarch64$', arch): return 'ARMV8'
    return 'CORTEXA15'

def map_bits(d):
    import re
    arch = d.getVar('TARGET_ARCH', True)
    if   re.match('i.86$', arch):    return 32
    elif re.match('x86_64$', arch):  return 64
    elif re.match('aarch32$', arch): return 32
    elif re.match('aarch64$', arch): return 64
    return 32

EXTRA_OEMAKE = "\
    BUILD_WITHOUT_LAPACK=OFF \
    HOSTCC=${BUILD_CC} \
    CROSS=1 \
    CROSS_SUFFIX=${TARGET_PREFIX} \
    BINARY=${@map_bits(d)} \
    TARGET=${@map_arch(d)} \
    OPENBLAS_LIBRARY_DIR=${D}${libdir} \
    DYNAMIC_ARCH=ON \
    BUILD_STATIC_LIBS=ON \
    USE_LOCKING=1 \
    USE_THREAD=0 \
"

do_install() {
    oe_runmake PREFIX=${D}${prefix} install
    rm -rf ${D}${bindir} ${D}${libdir}/cmake
    # fixup pkgconfig file
    sed -i -e "s#libdir=/.*#libdir=${libdir}#" ${D}${libdir}/pkgconfig/openblas.pc
    sed -i -e "s#includedir=/.*#includedir=${includedir}#" ${D}${libdir}/pkgconfig/openblas.pc

    cat  ${D}${libdir}/pkgconfig/openblas.pc

}

FILES:${PN}-dev = "${includedir} ${libdir}/lib${PN}.so"
FILES:${PN}     = "${libdir}/*"

3. The Kaldi ASR library

GitHub Repo: https://github.com/kaldi-asr/kaldi

Recipe created: vosk-kaldi_git.bb

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
SUMMARY = "Kaldi Speech Recognition Toolkit"
HOMEPAGE = "http://kaldi-asr.org/"
LICENSE = "Unknown"
LIC_FILES_CHKSUM = "file://../COPYING;md5=a10e448a64dbd3723ff3fb2f397fba2e \
                    file://doc/legal.dox;md5=3cba845003f27e67da70faa5da924c1e"

SRC_URI = "git://github.com/alphacep/kaldi.git;protocol=https;branch=vosk \
           file://0001-Fixes-for-shared-library-compilation.patch \
           "
# A patch was required to fix kaldi build issues

PV = "1.0+git${SRCPV}"
SRCREV = "76cd51d44c0a61e3905c35cadb2ec5f54f3e42d1"

S = "${WORKDIR}/git/src"

DEPENDS += "openblas vosk-openfst"
inherit python3native

ALLOW_EMPTY_${PN} = "1"

MYCONF = "--mathlib=OPENBLAS --static --shared --use-cuda=no --fst-root=${STAGING_INCDIR}/../ --fst-version=1.8.0 --openblas-root=${STAGING_INCDIR}/../ "

do_configure() {

  ./configure ${MYCONF}

}

do_compile() {

  make ${PARALLEL_MAKE}

}

do_install() {

  install -d ${D}${libdir}

  for i in lib/*.so ; do
    install -m 0644 ${i} ${D}${libdir}/
  done

  for i in */*.a ; do
    install -m 0644 ${i} ${D}${libdir}/
  done

  for j in base chain decoder feat fstext gmm gst-plugin hmm itf ivector kws lat lm matrix nnet nnet2 nnet3 online online2 rnnlm sgmm2 tfrnnlm transform tree util cudadecoder  cudadecoderbin  cudafeat  cudamatrix ; do
    install -d ${D}${includedir}/kaldi/$j
    for i in $j/*.h ; do 
      install -m 0644 $i ${D}${includedir}/kaldi/$j/
    done
  done

  # make sure we have the package vosk-kaldi

  install -d ${D}/usr/share/kaldi
  echo "This is vosk-kaldi" > ${D}/usr/share/kaldi/README

}

FILES:${PN} += " /usr/share/kaldi  /usr/share/kaldi/README"
ERROR_QA:remove = "rpaths"
ERROR_QA:remove = "dev-elf"
ALLOW_EMPTY:${PN} = "1"

4. The Vosk library (required by vosk-server)

Recipe created: vosk_0.3.42.bb

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
SUMMARY = "Offline open source speech recognition API based on Kaldi and Vosk"
HOMEPAGE = "https://github.com/alphacep/vosk-api"
LICENSE = "Apache-2.0"
LIC_FILES_CHKSUM = "file://../COPYING;md5=d09bbd7a3746b6052fbd78b26a87396b"

SRC_URI = "git://github.com/alphacep/vosk-api;protocol=https;branch=master \
           file://0001-Build-fixes-for-shared-library-under-bitbake.patch \
           "
# A patch was required to fix vosk build issues

PV = "0.3.42+git${SRCPV}"
SRCREV = "b1b216d4c87d708935f1601287fe502aa11ee4a9"

S = "${WORKDIR}/git/src"

DEPENDS += " vosk-kaldi vosk-openfst openblas"

RDEPENDS:${PN} += " \
"

CFLAGS:append = " -I${STAGING_INCDIR}/kaldi -g "
LDFLAGS:remove = "-Wl,--as-needed"

do_configure(){
    :
}

do_compile(){
    make KALDI_ROOT=${STAGING_INCDIR}/kaldi/ OPENFST_ROOT=${STAGING_INCDIR} OPENBLAS_ROOT=${STAGING_INCDIR} USE_SHARED=1 EXTRA_CFLAGS="${CFLAGS}" EXTRA_LDFLAGS="${LDFLAGS}" ${PARALLEL_MAKE}
}

do_install(){

    install -d ${D}${libdir}
    install -m 0644 libvosk.so.0.3.42 ${D}${libdir}
    cd ${D}${libdir}
    ln -sf libvosk.so.0.3.42 libvosk.so
    cd ${S}

    install -d ${D}${includedir}/vosk
    for i in *.h ; do
	install -m 0644 $i ${D}${includedir}/vosk/
    done
}

#ERROR_QA:remove = "already-stripped"
ERROR_QA:remove = "dev-deps"

Recipe for the python module sounddevice required by vosk-api:

GitHub Repo: https://github.com/spatialaudio/python-sounddevice/

Recipe created: python3-sounddevice_0.4.4.bb

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
SUMMARY = "Play and Record Sound with Python"
HOMEPAGE = "http://python-sounddevice.readthedocs.io/"
LICENSE = "MIT"
LIC_FILES_CHKSUM = "file://LICENSE;md5=bd8634ff9bb1159041c3d4328659d00f"

SRC_URI = "gitsm://github.com/spatialaudio/python-sounddevice;protocol=https;branch=master"

PV = "0.4.4+git${SRCPV}"
SRCREV = "a56cdb96c9c8e3d23b877bbcc7d26bd0cda231e0"

S = "${WORKDIR}/git"

inherit setuptools3

# optional features
# PACKAGECONFIG ?= "numpy"
PACKAGECONFIG[numpy] = "python3-numpy"

DEPENDS += "python3-cffi-native"

RDEPENDS:${PN} += "portaudio-v19 python3-cffi python3-core python3-ctypes python3-numpy"

The sounddevice module (runtime) depends on the PortAudio library. The following recipe: portaudio-v19_19.7.0.bb was added.

vosk-server

GitHub Repo: https://github.com/alphacep/vosk-server

Recipe created: vosk-server_git.bb

This recipe compiles the Websocket server

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
DESCRIPTION = "WebSocket, gRPC and WebRTC speech recognition server based on Vosk and Kaldi libraries"
SUMMARY = "This is a server for highly accurate offline speech recognition using Kaldi and Vosk-API."
HOMEPAGE = "https://github.com/alphacep/vosk-server"
LICENSE = "Unknown"
LIC_FILES_CHKSUM = "file://COPYING;md5=d09bbd7a3746b6052fbd78b26a87396b"

SRC_URI = "git://github.com/alphacep/vosk-server;protocol=https;branch=master"

# Modify these as desired
PV = "1.0+git${SRCPV}"
SRCREV = "70f3d5321a40f2f5dffe9c833bc1fac4b3b451e7"

S = "${WORKDIR}/git"

DEPENDS = "python3-vosk-api openblas vosk boost"

do_configure () {
	# Specify any needed configure commands here
	:
}

do_compile () {
	# websocket-cpp
	cd websocket-cpp
	${CXX} -I${STAGING_INCDIR}/vosk -lvosk ${LDFLAGS} -o vosk-websocket-cpp asr_server.cpp
}

do_install () {
	# websocket-cpp
	install -d ${D}${bindir}
	cp websocket-cpp/vosk-websocket-cpp ${D}${bindir}
}

Build

All the recipes build successfully.

Created feature templates for the meta-offline-voice-agent layer:

Directory structure:

1
2
3
4
5
6
7
8
9
$ cd $AGL_TOP/master/meta-agl-devel/templates/
$ tree -L 3
.
└── feature
    |...
    └── agl-offline-voice-agent
        ├── 50_bblayers.conf.inc
        ├── 50_local.conf.inc
        └── README_feature_agl-offline-voice-agent.md
agl-demo-platform build complete:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
$ source meta-agl/scripts/aglsetup.sh -m qemux86-64 -b build5-voice-qemux86-64 agl-demo agl-offline-voice-agent ${AGL_META_PYTHON}

------------ aglsetup.sh: Starting                                                                                                                            
Generating configuration files:                                                                                                                               
   Build dir: /home/aman/AGL/master/build5-voice-qemux86-64                                                                                                   
   Machine: qemux86-64                                                                                                                                        
   Features: agl-app-framework agl-demo agl-offline-voice-agent agl-pipewire                                                                                  
   Running /home/aman/AGL/master/external/poky/oe-init-build-env                                                                                              
   Templates dir: /home/aman/AGL/master/meta-agl/templates/base                                                                                               
   Config: /home/aman/AGL/master/build5-voice-qemux86-64/conf/bblayers.conf                                                                                   
   Config: /home/aman/AGL/master/build5-voice-qemux86-64/conf/local.conf                                                                                      
   Setup script: /home/aman/AGL/master/build5-voice-qemux86-64/conf/setup.sh                                                                                  
   Executing setup script ... --- beginning of setup script                                                                                                   
--- fragment /home/aman/AGL/master/meta-agl/templates/base/01_setup_EULAfunc.sh                                                                               
--- fragment /home/aman/AGL/master/meta-agl/templates/base/01_setup_pkg_revision.sh                                                                          
--- fragment /home/aman/AGL/master/meta-agl/templates/base/99_setup_EULAconf.sh
--- end of setup script
OK
Generating setup manifest: /home/aman/AGL/master/build5-voice-qemux86-64/aglsetup.manifest ... OK                                                            
Generating setup file: /home/aman/AGL/master/build5-voice-qemux86-64/agl-init-build-env ... OK                                                               
------------ aglsetup.sh: Done
Common targets are:
- meta-agl layer:
  - included by default
    * agl-image-boot                (just enough to boot)
    * agl-image-minimal             (minimal filesystem with APIs)
    * agl-image-minimal-crosssdk    (crosssdk for ^^)

    * agl-image-weston              (minimal filesystem with weston)

- meta-agl-demo:                    (IVI demo with UI)
  - with 'agl-demo'
    * agl-image-ivi                 (base for IVI targets)
    * agl-image-ivi-crosssdk        (sdk for ^^)

    * agl-image-graphical-qt5       (weston plus qt5 framework libs)
    * agl-image-graphical-qt5-crosssdk  (sdk for ^^)

    * agl-image-graphical-html5     (weston plus chromium for html5)

    * agl-image-cluster             (minimal image with APIs for cluster)
    * agl-image-cluster-qt5         (image with QT5 and APIs for cluster)

    * agl-image-telematics          (image with APIs for telematics)

    * agl-demo-platform             (* default IVI demo target *)
    * agl-demo-platform-crosssdk    (sdk for ^^)

    * agl-cluster-demo-platform     (cluster demo image)
    * agl-cluster-demo-platform-crosssdk  (sdk for ^^)
    * agl-cluster-demo-qtcompositor (cluster demo using own compositor)

    * agl-telematics-demo-platform  (telematics demo image)
    * agl-telematics-demo-platform-crosssdk  (sdk for ^^)

$ bitbake agl-demo-platform
...(output ommitted)
NOTE: Tasks Summary: Attempted 8083 tasks of which 8062 didn\'t need to be rerun and all succeeded.

I want to thank my GSoC Mentor Jan-Simon Moeller for all his help and guidance in creating the recipes for the vosk and kaldi libraries.

meta-offline-voice-agent layer
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
.
├── conf
│   └── layer.conf
├── COPYING.MIT
├── dynamic-layers
│   └── meta-agl-demo
│       └── recipes-platform
├── README
├── recipes-blas
│   └── openblas
│       ├── files
│       └── openblas_git.bb
├── recipes-python
│   ├── python3-sounddevice
│   │   └── python3-sounddevice_0.4.4.bb
│   ├── python3-srt
│   │   └── python3-srt_3.5.2.bb
│   └── python3-vosk
│       └── python3-vosk-api_0.3.42.bb
└── recipes-vosk
    ├── vosk
    │   ├── vosk
    │   └── vosk_0.3.42.bb
    ├── vosk-kaldi
    │   ├── vosk-kaldi
    │   └── vosk-kaldi_git.bb
    ├── vosk-openfst
    │   ├── files
    │   └── vosk-openfst_1.8.0.bb
    └── vosk-server
        └── vosk-server_git.bb

WIP and to-do’s:

  • Testing the installed libraries in the image, and looking for errors or any other required runtime dependencies
  • To prepare recipe / class for the different models provided by Vosk (https://alphacephei.com/vosk/models)
  • Test offline voice recognition on a machine with AGL image installed

Helpful Resources: