Mon May 14 04:42:58 2007

Asterisk developer's documentation


codec_speex.c

Go to the documentation of this file.
00001 /*
00002  * Asterisk -- An open source telephony toolkit.
00003  *
00004  * Copyright (C) 1999 - 2005, Digium, Inc.
00005  *
00006  * Mark Spencer <markster@digium.com>
00007  *
00008  *
00009  * See http://www.asterisk.org for more information about
00010  * the Asterisk project. Please do not directly contact
00011  * any of the maintainers of this project for assistance;
00012  * the project provides a web site, mailing lists and IRC
00013  * channels for your use.
00014  *
00015  * This program is free software, distributed under the terms of
00016  * the GNU General Public License Version 2. See the LICENSE file
00017  * at the top of the source tree.
00018  */
00019 
00020 /*! \file
00021  *
00022  * \brief Translate between signed linear and Speex (Open Codec)
00023  *
00024  * http://www.speex.org
00025  * \note This work was motivated by Jeremy McNamara 
00026  * hacked to be configurable by anthm and bkw 9/28/2004
00027  * \ingroup codecs
00028  */
00029 
00030 /*** MODULEINFO
00031    <depend>speex</depend>
00032  ***/
00033 
00034 #include "asterisk.h"
00035 
00036 ASTERISK_FILE_VERSION(__FILE__, "$Revision$")
00037 
00038 #include <fcntl.h>
00039 #include <stdlib.h>
00040 #include <unistd.h>
00041 #include <netinet/in.h>
00042 #include <string.h>
00043 #include <stdio.h>
00044 #include <speex/speex.h>
00045 
00046 /* We require a post 1.1.8 version of Speex to enable preprocessing
00047    and better type handling */   
00048 #ifdef _SPEEX_TYPES_H
00049 #include <speex/speex_preprocess.h>
00050 #endif
00051 
00052 #include "asterisk/lock.h"
00053 #include "asterisk/translate.h"
00054 #include "asterisk/module.h"
00055 #include "asterisk/config.h"
00056 #include "asterisk/options.h"
00057 #include "asterisk/logger.h"
00058 #include "asterisk/channel.h"
00059 #include "asterisk/utils.h"
00060 
00061 /* Sample frame data */
00062 #include "slin_speex_ex.h"
00063 #include "speex_slin_ex.h"
00064 
00065 /* codec variables */
00066 static int quality = 3;
00067 static int complexity = 2;
00068 static int enhancement = 0;
00069 static int vad = 0;
00070 static int vbr = 0;
00071 static float vbr_quality = 4;
00072 static int abr = 0;
00073 static int dtx = 0;  /* set to 1 to enable silence detection */
00074 
00075 static int preproc = 0;
00076 static int pp_vad = 0;
00077 static int pp_agc = 0;
00078 static float pp_agc_level = 8000; /* XXX what is this 8000 ? */
00079 static int pp_denoise = 0;
00080 static int pp_dereverb = 0;
00081 static float pp_dereverb_decay = 0.4;
00082 static float pp_dereverb_level = 0.3;
00083 
00084 #define TYPE_SILENCE  0x2
00085 #define TYPE_HIGH  0x0
00086 #define TYPE_LOW   0x1
00087 #define TYPE_MASK  0x3
00088 
00089 #define  BUFFER_SAMPLES 8000
00090 #define  SPEEX_SAMPLES  160
00091 
00092 struct speex_coder_pvt {
00093    void *speex;
00094    SpeexBits bits;
00095    int framesize;
00096    int silent_state;
00097 #ifdef _SPEEX_TYPES_H
00098    SpeexPreprocessState *pp;
00099    spx_int16_t buf[BUFFER_SAMPLES];
00100 #else
00101    int16_t buf[BUFFER_SAMPLES];  /* input, waiting to be compressed */
00102 #endif
00103 };
00104 
00105 
00106 static int lintospeex_new(struct ast_trans_pvt *pvt)
00107 {
00108    struct speex_coder_pvt *tmp = pvt->pvt;
00109 
00110    if (!(tmp->speex = speex_encoder_init(&speex_nb_mode)))
00111       return -1;
00112 
00113    speex_bits_init(&tmp->bits);
00114    speex_bits_reset(&tmp->bits);
00115    speex_encoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
00116    speex_encoder_ctl(tmp->speex, SPEEX_SET_COMPLEXITY, &complexity);
00117 #ifdef _SPEEX_TYPES_H
00118    if (preproc) {
00119       tmp->pp = speex_preprocess_state_init(tmp->framesize, 8000); /* XXX what is this 8000 ? */
00120       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_VAD, &pp_vad);
00121       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC, &pp_agc);
00122       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC_LEVEL, &pp_agc_level);
00123       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DENOISE, &pp_denoise);
00124       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB, &pp_dereverb);
00125       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &pp_dereverb_decay);
00126       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &pp_dereverb_level);
00127    }
00128 #endif
00129    if (!abr && !vbr) {
00130       speex_encoder_ctl(tmp->speex, SPEEX_SET_QUALITY, &quality);
00131       if (vad)
00132          speex_encoder_ctl(tmp->speex, SPEEX_SET_VAD, &vad);
00133    }
00134    if (vbr) {
00135       speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR, &vbr);
00136       speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR_QUALITY, &vbr_quality);
00137    }
00138    if (abr)
00139       speex_encoder_ctl(tmp->speex, SPEEX_SET_ABR, &abr);
00140    if (dtx)
00141       speex_encoder_ctl(tmp->speex, SPEEX_SET_DTX, &dtx); 
00142    tmp->silent_state = 0;
00143 
00144    return 0;
00145 }
00146 
00147 static int speextolin_new(struct ast_trans_pvt *pvt)
00148 {
00149    struct speex_coder_pvt *tmp = pvt->pvt;
00150    
00151    if (!(tmp->speex = speex_decoder_init(&speex_nb_mode)))
00152       return -1;
00153 
00154    speex_bits_init(&tmp->bits);
00155    speex_decoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
00156    if (enhancement)
00157       speex_decoder_ctl(tmp->speex, SPEEX_SET_ENH, &enhancement);
00158 
00159    return 0;
00160 }
00161 
00162 static struct ast_frame *lintospeex_sample(void)
00163 {
00164    static struct ast_frame f;
00165    f.frametype = AST_FRAME_VOICE;
00166    f.subclass = AST_FORMAT_SLINEAR;
00167    f.datalen = sizeof(slin_speex_ex);
00168    /* Assume 8000 Hz */
00169    f.samples = sizeof(slin_speex_ex)/2;
00170    f.mallocd = 0;
00171    f.offset = 0;
00172    f.src = __PRETTY_FUNCTION__;
00173    f.data = slin_speex_ex;
00174    return &f;
00175 }
00176 
00177 static struct ast_frame *speextolin_sample(void)
00178 {
00179    static struct ast_frame f;
00180    f.frametype = AST_FRAME_VOICE;
00181    f.subclass = AST_FORMAT_SPEEX;
00182    f.datalen = sizeof(speex_slin_ex);
00183    /* All frames are 20 ms long */
00184    f.samples = SPEEX_SAMPLES;
00185    f.mallocd = 0;
00186    f.offset = 0;
00187    f.src = __PRETTY_FUNCTION__;
00188    f.data = speex_slin_ex;
00189    return &f;
00190 }
00191 
00192 /*! \brief convert and store into outbuf */
00193 static int speextolin_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
00194 {
00195    struct speex_coder_pvt *tmp = pvt->pvt;
00196 
00197    /* Assuming there's space left, decode into the current buffer at
00198       the tail location.  Read in as many frames as there are */
00199    int x;
00200    int res;
00201    int16_t *dst = (int16_t *)pvt->outbuf;
00202    /* XXX fout is a temporary buffer, may have different types */
00203 #ifdef _SPEEX_TYPES_H
00204    spx_int16_t fout[1024];
00205 #else
00206    float fout[1024];
00207 #endif
00208 
00209    if (f->datalen == 0) {  /* Native PLC interpolation */
00210       if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
00211          ast_log(LOG_WARNING, "Out of buffer space\n");
00212          return -1;
00213       }
00214 #ifdef _SPEEX_TYPES_H
00215       speex_decode_int(tmp->speex, NULL, dst + pvt->samples);
00216 #else
00217       speex_decode(tmp->speex, NULL, fout);
00218       for (x=0;x<tmp->framesize;x++) {
00219          dst[pvt->samples + x] = (int16_t)fout[x];
00220       }
00221 #endif
00222       pvt->samples += tmp->framesize;
00223       return 0;
00224    }
00225 
00226    /* Read in bits */
00227    speex_bits_read_from(&tmp->bits, f->data, f->datalen);
00228    for (;;) {
00229 #ifdef _SPEEX_TYPES_H
00230       res = speex_decode_int(tmp->speex, &tmp->bits, fout);
00231 #else
00232       res = speex_decode(tmp->speex, &tmp->bits, fout);
00233 #endif
00234       if (res < 0)
00235          break;
00236       if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
00237          ast_log(LOG_WARNING, "Out of buffer space\n");
00238          return -1;
00239       }
00240       for (x = 0 ; x < tmp->framesize; x++)
00241          dst[pvt->samples + x] = (int16_t)fout[x];
00242       pvt->samples += tmp->framesize;
00243       pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
00244    }
00245    return 0;
00246 }
00247 
00248 /*! \brief store input frame in work buffer */
00249 static int lintospeex_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
00250 {
00251    struct speex_coder_pvt *tmp = pvt->pvt;
00252 
00253    /* XXX We should look at how old the rest of our stream is, and if it
00254       is too old, then we should overwrite it entirely, otherwise we can
00255       get artifacts of earlier talk that do not belong */
00256    memcpy(tmp->buf + pvt->samples, f->data, f->datalen);
00257    pvt->samples += f->samples;
00258    return 0;
00259 }
00260 
00261 /*! \brief convert work buffer and produce output frame */
00262 static struct ast_frame *lintospeex_frameout(struct ast_trans_pvt *pvt)
00263 {
00264    struct speex_coder_pvt *tmp = pvt->pvt;
00265    int is_speech=1;
00266    int datalen = 0;  /* output bytes */
00267    int samples = 0;  /* output samples */
00268 
00269    /* We can't work on anything less than a frame in size */
00270    if (pvt->samples < tmp->framesize)
00271       return NULL;
00272    speex_bits_reset(&tmp->bits);
00273    while (pvt->samples >= tmp->framesize) {
00274 #ifdef _SPEEX_TYPES_H
00275       /* Preprocess audio */
00276       if (preproc)
00277          is_speech = speex_preprocess(tmp->pp, tmp->buf + samples, NULL);
00278       /* Encode a frame of data */
00279       if (is_speech) {
00280          /* If DTX enabled speex_encode returns 0 during silence */
00281          is_speech = speex_encode_int(tmp->speex, tmp->buf + samples, &tmp->bits) || !dtx;
00282       } else {
00283          /* 5 zeros interpreted by Speex as silence (submode 0) */
00284          speex_bits_pack(&tmp->bits, 0, 5);
00285       }
00286 #else
00287       {
00288          float fbuf[1024];
00289          int x;
00290          /* Convert to floating point */
00291          for (x = 0; x < tmp->framesize; x++)
00292             fbuf[x] = tmp->buf[samples + x];
00293          /* Encode a frame of data */
00294          is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx;
00295       }
00296 #endif
00297       samples += tmp->framesize;
00298       pvt->samples -= tmp->framesize;
00299    }
00300 
00301    /* Move the data at the end of the buffer to the front */
00302    if (pvt->samples)
00303       memmove(tmp->buf, tmp->buf + samples, pvt->samples * 2);
00304 
00305    /* Use AST_FRAME_CNG to signify the start of any silence period */
00306    if (is_speech) {
00307       tmp->silent_state = 0;
00308    } else {
00309       if (tmp->silent_state) {
00310          return NULL;
00311       } else {
00312          tmp->silent_state = 1;
00313          speex_bits_reset(&tmp->bits);
00314          memset(&pvt->f, 0, sizeof(pvt->f));
00315          pvt->f.frametype = AST_FRAME_CNG;
00316          pvt->f.samples = samples;
00317          /* XXX what now ? format etc... */
00318       }
00319    }
00320 
00321    /* Terminate bit stream */
00322    speex_bits_pack(&tmp->bits, 15, 5);
00323    datalen = speex_bits_write(&tmp->bits, pvt->outbuf, pvt->t->buf_size);
00324    return ast_trans_frameout(pvt, datalen, samples);
00325 }
00326 
00327 static void speextolin_destroy(struct ast_trans_pvt *arg)
00328 {
00329    struct speex_coder_pvt *pvt = arg->pvt;
00330 
00331    speex_decoder_destroy(pvt->speex);
00332    speex_bits_destroy(&pvt->bits);
00333 }
00334 
00335 static void lintospeex_destroy(struct ast_trans_pvt *arg)
00336 {
00337    struct speex_coder_pvt *pvt = arg->pvt;
00338 #ifdef _SPEEX_TYPES_H
00339    if (preproc)
00340       speex_preprocess_state_destroy(pvt->pp);
00341 #endif
00342    speex_encoder_destroy(pvt->speex);
00343    speex_bits_destroy(&pvt->bits);
00344 }
00345 
00346 static struct ast_translator speextolin = {
00347    .name = "speextolin", 
00348    .srcfmt = AST_FORMAT_SPEEX,
00349    .dstfmt =  AST_FORMAT_SLINEAR,
00350    .newpvt = speextolin_new,
00351    .framein = speextolin_framein,
00352    .destroy = speextolin_destroy,
00353    .sample = speextolin_sample,
00354    .desc_size = sizeof(struct speex_coder_pvt),
00355    .buffer_samples = BUFFER_SAMPLES,
00356    .buf_size = BUFFER_SAMPLES * 2,
00357 };
00358 
00359 static struct ast_translator lintospeex = {
00360    .name = "lintospeex", 
00361    .srcfmt = AST_FORMAT_SLINEAR,
00362    .dstfmt = AST_FORMAT_SPEEX,
00363    .newpvt = lintospeex_new,
00364    .framein = lintospeex_framein,
00365    .frameout = lintospeex_frameout,
00366    .destroy = lintospeex_destroy,
00367    .sample = lintospeex_sample,
00368    .desc_size = sizeof(struct speex_coder_pvt),
00369    .buffer_samples = BUFFER_SAMPLES,
00370    .buf_size = BUFFER_SAMPLES * 2, /* XXX maybe a lot less ? */
00371 };
00372 
00373 static void parse_config(void) 
00374 {
00375    struct ast_config *cfg = ast_config_load("codecs.conf");
00376    struct ast_variable *var;
00377    int res;
00378    float res_f;
00379 
00380    if (cfg == NULL)
00381       return;
00382 
00383    for (var = ast_variable_browse(cfg, "speex"); var; var = var->next) {
00384       if (!strcasecmp(var->name, "quality")) {
00385          res = abs(atoi(var->value));
00386          if (res > -1 && res < 11) {
00387             if (option_verbose > 2)
00388                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting Quality to %d\n",res);
00389             quality = res;
00390          } else 
00391             ast_log(LOG_ERROR,"Error Quality must be 0-10\n");
00392       } else if (!strcasecmp(var->name, "complexity")) {
00393          res = abs(atoi(var->value));
00394          if (res > -1 && res < 11) {
00395             if (option_verbose > 2)
00396                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting Complexity to %d\n",res);
00397             complexity = res;
00398          } else 
00399             ast_log(LOG_ERROR,"Error! Complexity must be 0-10\n");
00400       } else if (!strcasecmp(var->name, "vbr_quality")) {
00401          if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0 && res_f <= 10) {
00402             if (option_verbose > 2)
00403                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting VBR Quality to %f\n",res_f);
00404             vbr_quality = res_f;
00405          } else
00406             ast_log(LOG_ERROR,"Error! VBR Quality must be 0-10\n");
00407       } else if (!strcasecmp(var->name, "abr_quality")) {
00408          ast_log(LOG_ERROR,"Error! ABR Quality setting obsolete, set ABR to desired bitrate\n");
00409       } else if (!strcasecmp(var->name, "enhancement")) {
00410          enhancement = ast_true(var->value) ? 1 : 0;
00411          if (option_verbose > 2)
00412             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Perceptual Enhancement Mode. [%s]\n",enhancement ? "on" : "off");
00413       } else if (!strcasecmp(var->name, "vbr")) {
00414          vbr = ast_true(var->value) ? 1 : 0;
00415          if (option_verbose > 2)
00416             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: VBR Mode. [%s]\n",vbr ? "on" : "off");
00417       } else if (!strcasecmp(var->name, "abr")) {
00418          res = abs(atoi(var->value));
00419          if (res >= 0) {
00420             if (option_verbose > 2) {
00421                if (res > 0)
00422                   ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting ABR target bitrate to %d\n",res);
00423                else
00424                   ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Disabling ABR\n");
00425             }
00426             abr = res;
00427          } else 
00428             ast_log(LOG_ERROR,"Error! ABR target bitrate must be >= 0\n");
00429       } else if (!strcasecmp(var->name, "vad")) {
00430          vad = ast_true(var->value) ? 1 : 0;
00431          if (option_verbose > 2)
00432             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: VAD Mode. [%s]\n",vad ? "on" : "off");
00433       } else if (!strcasecmp(var->name, "dtx")) {
00434          dtx = ast_true(var->value) ? 1 : 0;
00435          if (option_verbose > 2)
00436             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: DTX Mode. [%s]\n",dtx ? "on" : "off");
00437       } else if (!strcasecmp(var->name, "preprocess")) {
00438          preproc = ast_true(var->value) ? 1 : 0;
00439          if (option_verbose > 2)
00440             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessing. [%s]\n",preproc ? "on" : "off");
00441       } else if (!strcasecmp(var->name, "pp_vad")) {
00442          pp_vad = ast_true(var->value) ? 1 : 0;
00443          if (option_verbose > 2)
00444             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor VAD. [%s]\n",pp_vad ? "on" : "off");
00445       } else if (!strcasecmp(var->name, "pp_agc")) {
00446          pp_agc = ast_true(var->value) ? 1 : 0;
00447          if (option_verbose > 2)
00448             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor AGC. [%s]\n",pp_agc ? "on" : "off");
00449       } else if (!strcasecmp(var->name, "pp_agc_level")) {
00450          if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
00451             if (option_verbose > 2)
00452                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor AGC Level to %f\n",res_f);
00453             pp_agc_level = res_f;
00454          } else
00455             ast_log(LOG_ERROR,"Error! Preprocessor AGC Level must be >= 0\n");
00456       } else if (!strcasecmp(var->name, "pp_denoise")) {
00457          pp_denoise = ast_true(var->value) ? 1 : 0;
00458          if (option_verbose > 2)
00459             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor Denoise. [%s]\n",pp_denoise ? "on" : "off");
00460       } else if (!strcasecmp(var->name, "pp_dereverb")) {
00461          pp_dereverb = ast_true(var->value) ? 1 : 0;
00462          if (option_verbose > 2)
00463             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor Dereverb. [%s]\n",pp_dereverb ? "on" : "off");
00464       } else if (!strcasecmp(var->name, "pp_dereverb_decay")) {
00465          if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
00466             if (option_verbose > 2)
00467                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor Dereverb Decay to %f\n",res_f);
00468             pp_dereverb_decay = res_f;
00469          } else
00470             ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Decay must be >= 0\n");
00471       } else if (!strcasecmp(var->name, "pp_dereverb_level")) {
00472          if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
00473             if (option_verbose > 2)
00474                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor Dereverb Level to %f\n",res_f);
00475             pp_dereverb_level = res_f;
00476          } else
00477             ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Level must be >= 0\n");
00478       }
00479    }
00480    ast_config_destroy(cfg);
00481 }
00482 
00483 static int reload(void) 
00484 {
00485    parse_config();
00486 
00487    return 0;
00488 }
00489 
00490 static int unload_module(void)
00491 {
00492    int res;
00493 
00494    res = ast_unregister_translator(&lintospeex);
00495    res |= ast_unregister_translator(&speextolin);
00496 
00497    return res;
00498 }
00499 
00500 static int load_module(void)
00501 {
00502    int res;
00503 
00504    parse_config();
00505    res=ast_register_translator(&speextolin);
00506    if (!res) 
00507       res=ast_register_translator(&lintospeex);
00508    else
00509       ast_unregister_translator(&speextolin);
00510 
00511    return res;
00512 }
00513 
00514 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Speex Coder/Decoder",
00515       .load = load_module,
00516       .unload = unload_module,
00517       .reload = reload,
00518           );

Generated on Mon May 14 04:42:58 2007 for Asterisk - the Open Source PBX by  doxygen 1.5.1