Voice Detection (Beta)¶
This service takes an audio stream as an input and tries to discriminate what is human voice and what is not. If detecting noise in general, and not specifically human voice, use Sound Detection instead.
Getting Started¶
Using the Angus python SDK:
# -*- coding: utf-8 -*-
import angus.client.cloud
conn = angus.client.connect()
service = conn.services.get_service('voice_detection', version=1)
job = service.process({'sound': open("./sound.wav", 'rb'), 'sensitivity':0.7})
print job.result
Input¶
{'sound' : file,
'sensitivity' : 0.3}
sound
: a pythonFile Object
as returned for example byopen()
or aStringIO
buffer describing a wav file with the following format :PCM 16bit, Mono
, without constraints on sample rate.sensitivity
: modifies the ability of the algorithms to detect quiet voices.[0, 1]
. The higher the value is, the better the algorithm will detect quiet voices, but the more it will be sensitive to background noise.
Output¶
Events will be pushed to your client following that format:
{
"voice_activity" : "SILENCE"
}
voice_activity
: this field takes 4 different values:SILENCE
when no voice is detected,VOICE
when voice is detected,ON
when a transition occurs betweenSILENCE
andVOICE
, andOFF
when a transition occurs betweenVOICE
andSILENCE
.
Code Sample¶
requirements: PyAudio
This code sample retrieve the audio stream of a web cam and display the result of the voice_detection
service.
# -*- coding: utf-8 -*-
import Queue
import StringIO
import wave
import time
import angus.client
import pyaudio
import sys
import numpy as np
CHUNK = 8192
PYAUDIO_FORMAT = pyaudio.paInt16
NUMPY_FORMAT = np.int16
def list_inputs():
p = pyaudio.PyAudio()
for i in range(p.get_device_count()):
info = p.get_device_info_by_index(i)
if info['maxInputChannels'] > 0:
print("Device index={} name={}".format(info['index'], info['name']))
def prepare(in_data, channels, rate):
# Extract first channel
in_data = np.fromstring(in_data, dtype=NUMPY_FORMAT)
in_data = np.reshape(in_data, (CHUNK, channels))
in_data = in_data[:,0]
# Down sample if needed
srcx = np.arange(0, in_data.size, 1)
tgtx = np.arange(0, in_data.size, float(rate) / float(16000))
in_data = np.interp(tgtx, srcx, in_data).astype(NUMPY_FORMAT)
return in_data
def main(stream_index):
p = pyaudio.PyAudio()
# Device configuration
conf = p.get_device_info_by_index(stream_index)
channels = int(conf['maxInputChannels'])
if channels < 1:
raise RuntimeException("Bad device, no input channel")
rate = int(conf['defaultSampleRate'])
if rate < 16000:
raise RuntimeException("Bad device, sample rate is too low")
# Angus
conn = angus.client.connect()
service = conn.services.get_service('voice_detection', version=1)
service.enable_session()
# Record Process
stream_queue = Queue.Queue()
def chunk_callback(in_data, frame_count, time_info, status):
in_data = prepare(in_data, channels, rate)
stream_queue.put(in_data.tostring())
return (in_data, pyaudio.paContinue)
stream = p.open(format=PYAUDIO_FORMAT,
channels=channels,
rate=rate,
input=True,
frames_per_buffer=CHUNK,
input_device_index=stream_index,
stream_callback=chunk_callback)
stream.start_stream()
# Get data and send to Angus.ai
while True:
nb_buffer_available = stream_queue.qsize()
if nb_buffer_available == 0:
time.sleep(0.01)
continue
data = stream_queue.get()
buff = StringIO.StringIO()
wf = wave.open(buff, 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(PYAUDIO_FORMAT))
wf.setframerate(16000)
wf.writeframes(data)
wf.close()
job = service.process(
{'sound': StringIO.StringIO(buff.getvalue()), 'sensitivity': 0.2})
res = job.result["voice_activity"]
if res == "VOICE":
print "\033[A \033[A"
print "***************************"
print "***** VOICE !!!! ******"
print "***************************"
stream.stop_stream()
stream.close()
p.terminate()
if __name__ == "__main__":
if len(sys.argv) < 2:
list_inputs()
index = raw_input("Please select a device number:")
else:
index = sys.argv[1]
try:
index = int(index)
main(index)
except ValueError:
print("Not a valid index")
exit(1)