Mon Mar 31 07:38:01 2008

Asterisk developer's documentation


codec_speex.c

Go to the documentation of this file.
00001 /*
00002  * Asterisk -- An open source telephony toolkit.
00003  *
00004  * Copyright (C) 1999 - 2005, Digium, Inc.
00005  *
00006  * Mark Spencer <markster@digium.com>
00007  *
00008  *
00009  * See http://www.asterisk.org for more information about
00010  * the Asterisk project. Please do not directly contact
00011  * any of the maintainers of this project for assistance;
00012  * the project provides a web site, mailing lists and IRC
00013  * channels for your use.
00014  *
00015  * This program is free software, distributed under the terms of
00016  * the GNU General Public License Version 2. See the LICENSE file
00017  * at the top of the source tree.
00018  */
00019 
00020 /*! \file
00021  *
00022  * \brief Translate between signed linear and Speex (Open Codec)
00023  *
00024  * http://www.speex.org
00025  * \note This work was motivated by Jeremy McNamara 
00026  * hacked to be configurable by anthm and bkw 9/28/2004
00027  * \ingroup codecs
00028  */
00029 
00030 /*** MODULEINFO
00031    <depend>speex</depend>
00032    <use>speexdsp</use>
00033  ***/
00034 
00035 #include "asterisk.h"
00036 
00037 ASTERISK_FILE_VERSION(__FILE__, "$Revision$")
00038 
00039 #include <fcntl.h>
00040 #include <stdlib.h>
00041 #include <unistd.h>
00042 #include <netinet/in.h>
00043 #include <string.h>
00044 #include <stdio.h>
00045 #include <speex/speex.h>
00046 
00047 /* We require a post 1.1.8 version of Speex to enable preprocessing
00048    and better type handling */   
00049 #ifdef _SPEEX_TYPES_H
00050 #include <speex/speex_preprocess.h>
00051 #endif
00052 
00053 #include "asterisk/lock.h"
00054 #include "asterisk/translate.h"
00055 #include "asterisk/module.h"
00056 #include "asterisk/config.h"
00057 #include "asterisk/options.h"
00058 #include "asterisk/logger.h"
00059 #include "asterisk/channel.h"
00060 #include "asterisk/utils.h"
00061 
00062 /* Sample frame data */
00063 #include "slin_speex_ex.h"
00064 #include "speex_slin_ex.h"
00065 
00066 /* codec variables */
00067 static int quality = 3;
00068 static int complexity = 2;
00069 static int enhancement = 0;
00070 static int vad = 0;
00071 static int vbr = 0;
00072 static float vbr_quality = 4;
00073 static int abr = 0;
00074 static int dtx = 0;  /* set to 1 to enable silence detection */
00075 
00076 static int preproc = 0;
00077 static int pp_vad = 0;
00078 static int pp_agc = 0;
00079 static float pp_agc_level = 8000; /* XXX what is this 8000 ? */
00080 static int pp_denoise = 0;
00081 static int pp_dereverb = 0;
00082 static float pp_dereverb_decay = 0.4;
00083 static float pp_dereverb_level = 0.3;
00084 
00085 #define TYPE_SILENCE  0x2
00086 #define TYPE_HIGH  0x0
00087 #define TYPE_LOW   0x1
00088 #define TYPE_MASK  0x3
00089 
00090 #define  BUFFER_SAMPLES 8000
00091 #define  SPEEX_SAMPLES  160
00092 
00093 struct speex_coder_pvt {
00094    void *speex;
00095    SpeexBits bits;
00096    int framesize;
00097    int silent_state;
00098 #ifdef _SPEEX_TYPES_H
00099    SpeexPreprocessState *pp;
00100    spx_int16_t buf[BUFFER_SAMPLES];
00101 #else
00102    int16_t buf[BUFFER_SAMPLES];  /* input, waiting to be compressed */
00103 #endif
00104 };
00105 
00106 
00107 static int lintospeex_new(struct ast_trans_pvt *pvt)
00108 {
00109    struct speex_coder_pvt *tmp = pvt->pvt;
00110 
00111    if (!(tmp->speex = speex_encoder_init(&speex_nb_mode)))
00112       return -1;
00113 
00114    speex_bits_init(&tmp->bits);
00115    speex_bits_reset(&tmp->bits);
00116    speex_encoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
00117    speex_encoder_ctl(tmp->speex, SPEEX_SET_COMPLEXITY, &complexity);
00118 #ifdef _SPEEX_TYPES_H
00119    if (preproc) {
00120       tmp->pp = speex_preprocess_state_init(tmp->framesize, 8000); /* XXX what is this 8000 ? */
00121       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_VAD, &pp_vad);
00122       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC, &pp_agc);
00123       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC_LEVEL, &pp_agc_level);
00124       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DENOISE, &pp_denoise);
00125       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB, &pp_dereverb);
00126       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &pp_dereverb_decay);
00127       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &pp_dereverb_level);
00128    }
00129 #endif
00130    if (!abr && !vbr) {
00131       speex_encoder_ctl(tmp->speex, SPEEX_SET_QUALITY, &quality);
00132       if (vad)
00133          speex_encoder_ctl(tmp->speex, SPEEX_SET_VAD, &vad);
00134    }
00135    if (vbr) {
00136       speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR, &vbr);
00137       speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR_QUALITY, &vbr_quality);
00138    }
00139    if (abr)
00140       speex_encoder_ctl(tmp->speex, SPEEX_SET_ABR, &abr);
00141    if (dtx)
00142       speex_encoder_ctl(tmp->speex, SPEEX_SET_DTX, &dtx); 
00143    tmp->silent_state = 0;
00144 
00145    return 0;
00146 }
00147 
00148 static int speextolin_new(struct ast_trans_pvt *pvt)
00149 {
00150    struct speex_coder_pvt *tmp = pvt->pvt;
00151    
00152    if (!(tmp->speex = speex_decoder_init(&speex_nb_mode)))
00153       return -1;
00154 
00155    speex_bits_init(&tmp->bits);
00156    speex_decoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
00157    if (enhancement)
00158       speex_decoder_ctl(tmp->speex, SPEEX_SET_ENH, &enhancement);
00159 
00160    return 0;
00161 }
00162 
00163 static struct ast_frame *lintospeex_sample(void)
00164 {
00165    static struct ast_frame f;
00166    f.frametype = AST_FRAME_VOICE;
00167    f.subclass = AST_FORMAT_SLINEAR;
00168    f.datalen = sizeof(slin_speex_ex);
00169    /* Assume 8000 Hz */
00170    f.samples = sizeof(slin_speex_ex)/2;
00171    f.mallocd = 0;
00172    f.offset = 0;
00173    f.src = __PRETTY_FUNCTION__;
00174    f.data = slin_speex_ex;
00175    return &f;
00176 }
00177 
00178 static struct ast_frame *speextolin_sample(void)
00179 {
00180    static struct ast_frame f;
00181    f.frametype = AST_FRAME_VOICE;
00182    f.subclass = AST_FORMAT_SPEEX;
00183    f.datalen = sizeof(speex_slin_ex);
00184    /* All frames are 20 ms long */
00185    f.samples = SPEEX_SAMPLES;
00186    f.mallocd = 0;
00187    f.offset = 0;
00188    f.src = __PRETTY_FUNCTION__;
00189    f.data = speex_slin_ex;
00190    return &f;
00191 }
00192 
00193 /*! \brief convert and store into outbuf */
00194 static int speextolin_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
00195 {
00196    struct speex_coder_pvt *tmp = pvt->pvt;
00197 
00198    /* Assuming there's space left, decode into the current buffer at
00199       the tail location.  Read in as many frames as there are */
00200    int x;
00201    int res;
00202    int16_t *dst = (int16_t *)pvt->outbuf;
00203    /* XXX fout is a temporary buffer, may have different types */
00204 #ifdef _SPEEX_TYPES_H
00205    spx_int16_t fout[1024];
00206 #else
00207    float fout[1024];
00208 #endif
00209 
00210    if (f->datalen == 0) {  /* Native PLC interpolation */
00211       if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
00212          ast_log(LOG_WARNING, "Out of buffer space\n");
00213          return -1;
00214       }
00215 #ifdef _SPEEX_TYPES_H
00216       speex_decode_int(tmp->speex, NULL, dst + pvt->samples);
00217 #else
00218       speex_decode(tmp->speex, NULL, fout);
00219       for (x=0;x<tmp->framesize;x++) {
00220          dst[pvt->samples + x] = (int16_t)fout[x];
00221       }
00222 #endif
00223       pvt->samples += tmp->framesize;
00224       pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
00225       return 0;
00226    }
00227 
00228    /* Read in bits */
00229    speex_bits_read_from(&tmp->bits, f->data, f->datalen);
00230    for (;;) {
00231 #ifdef _SPEEX_TYPES_H
00232       res = speex_decode_int(tmp->speex, &tmp->bits, fout);
00233 #else
00234       res = speex_decode(tmp->speex, &tmp->bits, fout);
00235 #endif
00236       if (res < 0)
00237          break;
00238       if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
00239          ast_log(LOG_WARNING, "Out of buffer space\n");
00240          return -1;
00241       }
00242       for (x = 0 ; x < tmp->framesize; x++)
00243          dst[pvt->samples + x] = (int16_t)fout[x];
00244       pvt->samples += tmp->framesize;
00245       pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
00246    }
00247    return 0;
00248 }
00249 
00250 /*! \brief store input frame in work buffer */
00251 static int lintospeex_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
00252 {
00253    struct speex_coder_pvt *tmp = pvt->pvt;
00254 
00255    /* XXX We should look at how old the rest of our stream is, and if it
00256       is too old, then we should overwrite it entirely, otherwise we can
00257       get artifacts of earlier talk that do not belong */
00258    memcpy(tmp->buf + pvt->samples, f->data, f->datalen);
00259    pvt->samples += f->samples;
00260    return 0;
00261 }
00262 
00263 /*! \brief convert work buffer and produce output frame */
00264 static struct ast_frame *lintospeex_frameout(struct ast_trans_pvt *pvt)
00265 {
00266    struct speex_coder_pvt *tmp = pvt->pvt;
00267    int is_speech=1;
00268    int datalen = 0;  /* output bytes */
00269    int samples = 0;  /* output samples */
00270 
00271    /* We can't work on anything less than a frame in size */
00272    if (pvt->samples < tmp->framesize)
00273       return NULL;
00274    speex_bits_reset(&tmp->bits);
00275    while (pvt->samples >= tmp->framesize) {
00276 #ifdef _SPEEX_TYPES_H
00277       /* Preprocess audio */
00278       if (preproc)
00279          is_speech = speex_preprocess(tmp->pp, tmp->buf + samples, NULL);
00280       /* Encode a frame of data */
00281       if (is_speech) {
00282          /* If DTX enabled speex_encode returns 0 during silence */
00283          is_speech = speex_encode_int(tmp->speex, tmp->buf + samples, &tmp->bits) || !dtx;
00284       } else {
00285          /* 5 zeros interpreted by Speex as silence (submode 0) */
00286          speex_bits_pack(&tmp->bits, 0, 5);
00287       }
00288 #else
00289       {
00290          float fbuf[1024];
00291          int x;
00292          /* Convert to floating point */
00293          for (x = 0; x < tmp->framesize; x++)
00294             fbuf[x] = tmp->buf[samples + x];
00295          /* Encode a frame of data */
00296          is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx;
00297       }
00298 #endif
00299       samples += tmp->framesize;
00300       pvt->samples -= tmp->framesize;
00301    }
00302 
00303    /* Move the data at the end of the buffer to the front */
00304    if (pvt->samples)
00305       memmove(tmp->buf, tmp->buf + samples, pvt->samples * 2);
00306 
00307    /* Use AST_FRAME_CNG to signify the start of any silence period */
00308    if (is_speech) {
00309       tmp->silent_state = 0;
00310    } else {
00311       if (tmp->silent_state) {
00312          return NULL;
00313       } else {
00314          tmp->silent_state = 1;
00315          speex_bits_reset(&tmp->bits);
00316          memset(&pvt->f, 0, sizeof(pvt->f));
00317          pvt->f.frametype = AST_FRAME_CNG;
00318          pvt->f.samples = samples;
00319          /* XXX what now ? format etc... */
00320       }
00321    }
00322 
00323    /* Terminate bit stream */
00324    speex_bits_pack(&tmp->bits, 15, 5);
00325    datalen = speex_bits_write(&tmp->bits, pvt->outbuf, pvt->t->buf_size);
00326    return ast_trans_frameout(pvt, datalen, samples);
00327 }
00328 
00329 static void speextolin_destroy(struct ast_trans_pvt *arg)
00330 {
00331    struct speex_coder_pvt *pvt = arg->pvt;
00332 
00333    speex_decoder_destroy(pvt->speex);
00334    speex_bits_destroy(&pvt->bits);
00335 }
00336 
00337 static void lintospeex_destroy(struct ast_trans_pvt *arg)
00338 {
00339    struct speex_coder_pvt *pvt = arg->pvt;
00340 #ifdef _SPEEX_TYPES_H
00341    if (preproc)
00342       speex_preprocess_state_destroy(pvt->pp);
00343 #endif
00344    speex_encoder_destroy(pvt->speex);
00345    speex_bits_destroy(&pvt->bits);
00346 }
00347 
00348 static struct ast_translator speextolin = {
00349    .name = "speextolin", 
00350    .srcfmt = AST_FORMAT_SPEEX,
00351    .dstfmt =  AST_FORMAT_SLINEAR,
00352    .newpvt = speextolin_new,
00353    .framein = speextolin_framein,
00354    .destroy = speextolin_destroy,
00355    .sample = speextolin_sample,
00356    .desc_size = sizeof(struct speex_coder_pvt),
00357    .buffer_samples = BUFFER_SAMPLES,
00358    .buf_size = BUFFER_SAMPLES * 2,
00359    .native_plc = 1,
00360 };
00361 
00362 static struct ast_translator lintospeex = {
00363    .name = "lintospeex", 
00364    .srcfmt = AST_FORMAT_SLINEAR,
00365    .dstfmt = AST_FORMAT_SPEEX,
00366    .newpvt = lintospeex_new,
00367    .framein = lintospeex_framein,
00368    .frameout = lintospeex_frameout,
00369    .destroy = lintospeex_destroy,
00370    .sample = lintospeex_sample,
00371    .desc_size = sizeof(struct speex_coder_pvt),
00372    .buffer_samples = BUFFER_SAMPLES,
00373    .buf_size = BUFFER_SAMPLES * 2, /* XXX maybe a lot less ? */
00374 };
00375 
00376 static void parse_config(void) 
00377 {
00378    struct ast_config *cfg = ast_config_load("codecs.conf");
00379    struct ast_variable *var;
00380    int res;
00381    float res_f;
00382 
00383    if (cfg == NULL)
00384       return;
00385 
00386    for (var = ast_variable_browse(cfg, "speex"); var; var = var->next) {
00387       if (!strcasecmp(var->name, "quality")) {
00388          res = abs(atoi(var->value));
00389          if (res > -1 && res < 11) {
00390             if (option_verbose > 2)
00391                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting Quality to %d\n",res);
00392             quality = res;
00393          } else 
00394             ast_log(LOG_ERROR,"Error Quality must be 0-10\n");
00395       } else if (!strcasecmp(var->name, "complexity")) {
00396          res = abs(atoi(var->value));
00397          if (res > -1 && res < 11) {
00398             if (option_verbose > 2)
00399                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting Complexity to %d\n",res);
00400             complexity = res;
00401          } else 
00402             ast_log(LOG_ERROR,"Error! Complexity must be 0-10\n");
00403       } else if (!strcasecmp(var->name, "vbr_quality")) {
00404          if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0 && res_f <= 10) {
00405             if (option_verbose > 2)
00406                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting VBR Quality to %f\n",res_f);
00407             vbr_quality = res_f;
00408          } else
00409             ast_log(LOG_ERROR,"Error! VBR Quality must be 0-10\n");
00410       } else if (!strcasecmp(var->name, "abr_quality")) {
00411          ast_log(LOG_ERROR,"Error! ABR Quality setting obsolete, set ABR to desired bitrate\n");
00412       } else if (!strcasecmp(var->name, "enhancement")) {
00413          enhancement = ast_true(var->value) ? 1 : 0;
00414          if (option_verbose > 2)
00415             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Perceptual Enhancement Mode. [%s]\n",enhancement ? "on" : "off");
00416       } else if (!strcasecmp(var->name, "vbr")) {
00417          vbr = ast_true(var->value) ? 1 : 0;
00418          if (option_verbose > 2)
00419             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: VBR Mode. [%s]\n",vbr ? "on" : "off");
00420       } else if (!strcasecmp(var->name, "abr")) {
00421          res = abs(atoi(var->value));
00422          if (res >= 0) {
00423             if (option_verbose > 2) {
00424                if (res > 0)
00425                   ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting ABR target bitrate to %d\n",res);
00426                else
00427                   ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Disabling ABR\n");
00428             }
00429             abr = res;
00430          } else 
00431             ast_log(LOG_ERROR,"Error! ABR target bitrate must be >= 0\n");
00432       } else if (!strcasecmp(var->name, "vad")) {
00433          vad = ast_true(var->value) ? 1 : 0;
00434          if (option_verbose > 2)
00435             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: VAD Mode. [%s]\n",vad ? "on" : "off");
00436       } else if (!strcasecmp(var->name, "dtx")) {
00437          dtx = ast_true(var->value) ? 1 : 0;
00438          if (option_verbose > 2)
00439             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: DTX Mode. [%s]\n",dtx ? "on" : "off");
00440       } else if (!strcasecmp(var->name, "preprocess")) {
00441          preproc = ast_true(var->value) ? 1 : 0;
00442          if (option_verbose > 2)
00443             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessing. [%s]\n",preproc ? "on" : "off");
00444       } else if (!strcasecmp(var->name, "pp_vad")) {
00445          pp_vad = ast_true(var->value) ? 1 : 0;
00446          if (option_verbose > 2)
00447             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor VAD. [%s]\n",pp_vad ? "on" : "off");
00448       } else if (!strcasecmp(var->name, "pp_agc")) {
00449          pp_agc = ast_true(var->value) ? 1 : 0;
00450          if (option_verbose > 2)
00451             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor AGC. [%s]\n",pp_agc ? "on" : "off");
00452       } else if (!strcasecmp(var->name, "pp_agc_level")) {
00453          if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
00454             if (option_verbose > 2)
00455                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor AGC Level to %f\n",res_f);
00456             pp_agc_level = res_f;
00457          } else
00458             ast_log(LOG_ERROR,"Error! Preprocessor AGC Level must be >= 0\n");
00459       } else if (!strcasecmp(var->name, "pp_denoise")) {
00460          pp_denoise = ast_true(var->value) ? 1 : 0;
00461          if (option_verbose > 2)
00462             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor Denoise. [%s]\n",pp_denoise ? "on" : "off");
00463       } else if (!strcasecmp(var->name, "pp_dereverb")) {
00464          pp_dereverb = ast_true(var->value) ? 1 : 0;
00465          if (option_verbose > 2)
00466             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor Dereverb. [%s]\n",pp_dereverb ? "on" : "off");
00467       } else if (!strcasecmp(var->name, "pp_dereverb_decay")) {
00468          if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
00469             if (option_verbose > 2)
00470                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor Dereverb Decay to %f\n",res_f);
00471             pp_dereverb_decay = res_f;
00472          } else
00473             ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Decay must be >= 0\n");
00474       } else if (!strcasecmp(var->name, "pp_dereverb_level")) {
00475          if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
00476             if (option_verbose > 2)
00477                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor Dereverb Level to %f\n",res_f);
00478             pp_dereverb_level = res_f;
00479          } else
00480             ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Level must be >= 0\n");
00481       }
00482    }
00483    ast_config_destroy(cfg);
00484 }
00485 
00486 static int reload(void) 
00487 {
00488    parse_config();
00489 
00490    return 0;
00491 }
00492 
00493 static int unload_module(void)
00494 {
00495    int res;
00496 
00497    res = ast_unregister_translator(&lintospeex);
00498    res |= ast_unregister_translator(&speextolin);
00499 
00500    return res;
00501 }
00502 
00503 static int load_module(void)
00504 {
00505    int res;
00506 
00507    parse_config();
00508    res=ast_register_translator(&speextolin);
00509    if (!res) 
00510       res=ast_register_translator(&lintospeex);
00511    else
00512       ast_unregister_translator(&speextolin);
00513 
00514    return res;
00515 }
00516 
00517 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Speex Coder/Decoder",
00518       .load = load_module,
00519       .unload = unload_module,
00520       .reload = reload,
00521           );

Generated on Mon Mar 31 07:38:01 2008 for Asterisk - the Open Source PBX by  doxygen 1.5.1