Raspberry pi, Matrix Voice, Deepspeech & Jasper Project

Objectif

Faire fonctionner tout cela ensemble. Car rien n'a été testé au préalable. Cependant il y a une manière d'y arriver avec un peu de configuration.

J'utilise alsa au lieu de pyaudio. Alors Jasper doit être modifier.

Jasper n'utilise pas Deepspeech, alors on doit implanter le STT.

Matériels nécessaires

  • Raspberry pi 3
  • Matrix Voice
  • Serveur Deepspeech / tensorflow & nodeJS serveur

Partie 1 : Configurer le Matrix Voice

Connecter le Matrix voice dans le PI.

1. Modifier le fichier /etc/asound.conf
nano /etc/asound.conf

pcm.sc {
    type hw
    card 1
}

pcm.mic_channel0 {
  type file
  file "/tmp/matrix_micarray_channel_0"
  infile "/tmp/matrix_micarray_channel_0"
  format "raw"
  slave {
    pcm sc
  }
}


pcm.mic_channel1 {
  type file
  file "/tmp/matrix_micarray_channel_1"
  infile "/tmp/matrix_micarray_channel_1"
  format "raw"
  slave {
    pcm sc
  }
}


pcm.mic_channel2 {
  type file
  file "/tmp/matrix_micarray_channel_2"
  infile "/tmp/matrix_micarray_channel_2"
  format "raw"
  slave {
    pcm sc
  }
}


pcm.mic_channel3 {
  type file
  file "/tmp/matrix_micarray_channel_3"
  infile "/tmp/matrix_micarray_channel_3"
  format "raw"
  slave {
    pcm sc
  }
}

pcm.mic_channel4 {
  type file
  file "/tmp/matrix_micarray_channel_4"
  infile "/tmp/matrix_micarray_channel_4"
  format "raw"
  slave {
    pcm sc
  }
}


pcm.mic_channel5 {
  type file
  file "/tmp/matrix_micarray_channel_5"
  infile "/tmp/matrix_micarray_channel_5"
  format "raw"
  slave {
    pcm sc
  }
}

pcm.mic_channel6 {
  type file
  file "/tmp/matrix_micarray_channel_6"
  infile "/tmp/matrix_micarray_channel_6"
  format "raw"
  slave {
    pcm sc
  }
}

pcm.mic_channel7 {
  type file
  file "/tmp/matrix_micarray_channel_7"
  infile "/tmp/matrix_micarray_channel_7"
  format "raw"
  slave {
    pcm sc
  }
}

pcm.mic_channel8 {
  type file
  file "/tmp/matrix_micarray_channel_8"
  infile "/tmp/matrix_micarray_channel_8"
  format "raw"
  slave {
    pcm sc
  }
}

pcm.!default
{
  type asym
  playback.pcm {
    type hw
    card 0
    device 0
  }
  capture.pcm {
    type file
    file "/tmp/matrix_micarray_channel_0"
    infile "/tmp/matrix_micarray_channel_0"
    format "raw"
    slave {
        pcm sc
    }
  }
}
2. installation des drivers pour le Matrix Voice
curl https://apt.matrix.one/doc/apt-key.gpg | sudo apt-key add -
echo "deb https://apt.matrix.one/raspbian $(lsb_release -sc) main" | sudo tee /etc/apt/sources.list.d/matrixlabs.list

sudo apt-get update
sudo apt-get upgrade
sudo apt dist-upgrade
sudo apt install matrixio-malos

### REBOOT ###

ls /tmp
#### DOIT MONTRER CES CARTES 

matrix_micarray_channel_0
matrix_micarray_channel_1
matrix_micarray_channel_2
matrix_micarray_channel_3
matrix_micarray_channel_4
matrix_micarray_channel_5
matrix_micarray_channel_6
matrix_micarray_channel_7
matrix_micarray_channel_8

Partie 2 : Création d'un service pour le Matrix voice

On doit créer un service pour que les LEDs clignote au fur et à mesure que le PI entend quelque chose.

Aller chercher le code disponible sur Github: matrix-creator-hal

git clone https://github.com/matrix-io/matrix-creator-hal.git
cd matrix-creator-hal/demos
nano mic_demo.cpp

## remplacer tout le contenu par ceci : 

/*
 * Copyright 2016 <Admobilize>
 * All rights reserved.
 */
#include <gflags/gflags.h>
#include <wiringPi.h>

#include <cmath>
#include <iostream>
#include <string>
#include <valarray>

#include "../cpp/driver/everloop.h"
#include "../cpp/driver/everloop_image.h"
#include "../cpp/driver/microphone_array.h"
#include "../cpp/driver/wishbone_bus.h"
#include "./fir.h"

DEFINE_bool(big_menu, true, "Include 'advanced' options in the menu listing");
DEFINE_int32(sampling_frequency, 16000, "Sampling Frequency");

namespace hal = matrix_hal;

int main(int argc, char* agrv[]) {
  google::ParseCommandLineFlags(&argc, &agrv, true);

  hal::WishboneBus bus;
  bus.SpiInit();

  hal::Everloop everloop;
  everloop.Setup(&bus);

  hal::MicrophoneArray mics;
  mics.Setup(&bus);

  hal::EverloopImage image1d;

  int sampling_rate = FLAGS_sampling_frequency;
  mics.SetSamplingRate(sampling_rate);
  mics.SetGain(2);
  mics.ShowConfiguration();

  std::valarray<int> lookup = {23, 27, 32, 1, 6, 10, 14, 19};

  std::valarray<float> magnitude(mics.Channels());

  std::valarray<float> coeff_hp = {
      0.0599851024734,  -1.300381417101e-17, -0.1549721713331, -0.1626987043005,
      0.1053874898562,  0.2920599418361,     0.1053874898562,  -0.1626987043005,
      -0.1549721713331, -1.300381417101e-17, 0.0599851024734};
  std::valarray<FIR> filter_bank_hp(mics.Channels());
  for (auto& iir : filter_bank_hp) iir.Setup(coeff_hp);

  std::valarray<float> coeff_lp = {
      1.5414364192065861E-003, 2.3293447964816889E-003, 4.5521160193679406E-003,
      7.8514472469795749E-003, 1.1667089756749511E-002, 1.5336748122750850E-002,
      1.8215960726324591E-002, 1.9795701289425597E-002, 1.9795701289425597E-002,
      1.8215960726324591E-002, 1.5336748122750850E-002, 1.1667089756749511E-002,
      7.8514472469795749E-003, 4.5521160193679406E-003, 2.3293447964816889E-003,
      1.5414364192065861E-003};
  std::valarray<FIR> filter_bank_lp(mics.Channels());
  for (auto& iir : filter_bank_lp) iir.Setup(coeff_lp);

  while (true) {
    mics.Read();
    magnitude = 0.0;
    for (unsigned int s = 0; s < mics.NumberOfSamples(); s++) {
      for (unsigned int c = 0; c < mics.Channels(); c++) {
        float x = filter_bank_hp[c].Filter(mics.At(s, c));
        magnitude[c] += filter_bank_lp[c].Filter(x * x);
      }
    }

    for (auto& m : magnitude) {
      m = std::sqrt(1.0 / (float)mics.NumberOfSamples() * m);
    }

    for (unsigned int c = 0; c < mics.Channels(); c++) {
      image1d.leds[lookup[c]].green = magnitude[c] / 8;
      //std::cout << image1d.leds[lookup[c]].green << "\t";
    }
    //std::cout << std::endl;

    everloop.Write(&image1d);
  }

  return 0;
}

Après avoir modifier le fichier mix_demo.cpp, on peut le recompiler. Depuis le répertoire : matrix-creator-hal.

mkdir build
cd build
sudo apt-get install cmake
cmake ..
make
cp demos/mic_demo /home/pi/

On peut maintenant créer le service:

nano /etc/systemd/system/mic_detection.service


[Unit]
Description=Microphone Voice Detection

[Service]
ExecStart=/home/pi/mic_demo
StandardOutput=null

[Install]
WantedBy=multi-user.target
Alias=mic_detection.service

Modifier le chemin absolu suivant ExecStart= pour réfléter votre setup.

Activer le service:

sudo systemctl daemon-reload

sudo systemctl enable mic_detection
sudo systemctl start mic_detection

Partie 3 : Installer jasper

Pour cette partie, vous pouvez vous fier à la documentation officiel cependant il y a quelques trucs qu’on ne doit pas faire

Jasper Project Installation

sudo apt-get update
sudo apt-get upgrade --yes
sudo apt-get install nano git-core python-dev bison libasound2-dev libportaudio-dev python-pyaudio --yes
sudo apt-get remove python-pip
sudo easy_install pip

reboot

arecord -L
aplay -L

arecord test.wav
OU
arecord --fatal-error -r 16000 -f S16_LE -c 1 --device=mic_channel8 -t wav test.wav
OU
arecord -r 16000 -f S16_LE -c 1  -t wav test.wav

aplay -D hw:0,1 test.wav
OU
aplay -D hw:1,0 test.wav

Ajouter ces deux lignes à la fin du fichier ~/.bash_profile

export LD_LIBRARY_PATH="/usr/local/lib"
source .bashrc

Ajouter ceci dans le fichier ~/.bashrc

LD_LIBRARY_PATH="/usr/local/lib"
export LD_LIBRARY_PATH
PATH=$PATH:/usr/local/lib/
export PATH

Cloner le repo de jasper :

J'utilise la version de développement. Ensuite il faut installer les dépendances.

git clone https://github.com/jasperproject/jasper-client.git jasper
cd jasper 
git checkout jasper-dev
sudo pip install --upgrade setuptools
sudo pip install -r client/requirements.txt
sudo chmod +x Jasper.py

Installation de eSpeak TTS

sudo apt-get update
sudo apt-get install espeak

NOTE

Je n'installe pas de STT, car je dois créer un STT custom

Création Custom du STT

cd plugins/
tree -L 2 
├── stt
│   ├── deepspeech-stt
│   │   ├── deepspeech.py
│   │   ├── deepspeech.pyc
│   │   ├── __init__.py
│   │   ├── __init__.pyc
│   │   └── plugin.info

cd stt/
mkdir deepspeech-stt
cp google-stt/* deepspeech-stt/
## Renommer les fichiers pour refléter ce qui est plus haut.

cd deepspeech-stt
nano deepspeech.py

Voici le contenu de deepspeech.py

import json
import logging
import urllib
import urlparse
import wave
import requests
from jasper import plugin
#from requests_toolbelt import MultipartEncoder
from requests_toolbelt.multipart import encoder


class DeepspeechSTTPlugin(plugin.STTPlugin):
    """
    Speech-To-Text implementation which relies on the Tensorflow DeepSpeech API.

    Customized by Tommy Gingras.

    """

    def __init__(self, *args, **kwargs):
        plugin.STTPlugin.__init__(self, *args, **kwargs)

        self._logger = logging.getLogger(__name__)
        self._request_url = None
        self._http = requests.Session()


    @property
    def request_url(self):
        return self._request_url

    @classmethod
    def get_config(cls):
        # No need
        config = {}

        return config


    def transcribe(self, fp):
        """
        Performs STT via the Google Speech API, transcribing an audio file and
        returning an English string.

        Arguments:
        audio_file_path -- the path to the .wav file to be transcribed
        """
        dataWav = fp.read()
        # Change this line to take the paramters from the config file.
        r = requests.post("http://super.server.local:3000/recognize",
                          data=dataWav,
                          headers={'Content-Type': 'audio/wav'})
        try:
            r.raise_for_status()
        except requests.exceptions.HTTPError:
            self._logger.critical('Request failed with http status %d',
                                  r.status_code)
            if r.status_code == requests.codes['forbidden']:
                self._logger.warning('Status 403 is probably caused by an ' +
                                     'invalid request.')
            return []
        r.encoding = 'utf-8'
        try:
            recognition = r.json()
            print(recognition)
            print(recognition['response'])
            if recognition == {}:
                # Response result is empty
                raise ValueError('Nothing has been transcribed.')
        except ValueError as e:
            self._logger.warning('Empty response: %s', e.args[0])
            results = []
        else:
            # Convert all results to uppercase
            results = recognition['response'].upper()
            self._logger.info('Transcribed: %r', results)
        return results

    @classmethod
    def is_available(cls):

contenu de __init__.py

# -*- coding: utf-8 -*-
from .deepspeech import DeepspeechSTTPlugin

Contenu de plugin.info

[Plugin]
Name = deepspeech
Version = 1.0.0
License = MIT
URL = https://studiowebux.com/
Description = Speech-To-Text implementation which relies on the Deepspeech API.

[Author]
Name = Tommy Gingras
URL = https://studiowebux.com/

partie 4 : Configurer Jasper

Création du fichier profile.yml.

Par défaut, le fichier se trouve ici : ~/.jasper/profile.yml

cd jasper
pwd 
## jasper/jasper
python populate.py

Remplir les questions selon votre environnement.

Voici mon fichier:

carrier: [hidden]
first_name: Tommy
gmail_password: ''
last_name: Gingras
location: mirabel
phone_number: '[hidden]'
prefers_email: true
stt_engine: deepspeech
timezone: America/Toronto
tts_engine: espeak-tts
espeak-tts:
  voice: 'default'   # optional
  pitch_adjustment: 60  # optional
  words_per_minute: 100 # optional
audio_engine: alsa
input_device: 'mic-channel8'

Partie 5 : Connecter toutes les pièces ensemble

Maintenant à partir du dossier de Jasper, on peut lancer ./Jasper et tout devrait fonctionner.

./Jasper --debug

Troubleshooting

Pour tester que tout est bien connecté:

Source : Matrix voice

pip install pyalsaaudio

python (Execute the shell)
import alsaaudio
alsaaudio.__file__ # <-- check if the correct mpdule is used
alsaaudio.pcms()   # <-- list devices

this should return:
'/home/pi/.virtualenvs/matrix/local/lib/python2.7/site-packages/alsaaudio.so' <--my virtualenv install path

[u'null', u'sc', u'mic_channel0', u'mic_channel1', u'mic_channel2', u'mic_channel3', u'mic_channel4', u'mic_channel5', u'mic_channel6', u'mic_channel7', u'mic_channel8', u'default', u'sysdefault:CARD=ALSA', u'dmix:CARD=ALSA,DEV=0', u'dmix:CARD=ALSA,DEV=1', u'dsnoop:CARD=ALSA,DEV=0', u'dsnoop:CARD=ALSA,DEV=1', u'hw:CARD=ALSA,DEV=0', u'hw:CARD=ALSA,DEV=1', u'plughw:CARD=ALSA,DEV=0', u'plughw:CARD=ALSA,DEV=1', u'sysdefault:CARD=Dummy', u'dmix:CARD=Dummy,DEV=0', u'dsnoop:CARD=Dummy,DEV=0', u'hw:CARD=Dummy,DEV=0', u'plughw:CARD=Dummy,DEV=0']

 

Laisser un commentaire