SphinxBase 0.6

include/sphinxbase/ad.h

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * ad.h -- generic live audio interface for recording and playback
00039  * 
00040  * **********************************************
00041  * CMU ARPA Speech Project
00042  *
00043  * Copyright (c) 1996 Carnegie Mellon University.
00044  * ALL RIGHTS RESERVED.
00045  * **********************************************
00046  * 
00047  * HISTORY
00048  * 
00049  * $Log: ad.h,v $
00050  * Revision 1.8  2005/06/22 08:00:06  arthchan2003
00051  * Completed all doxygen documentation on file description for libs3decoder/libutil/libs3audio and programs.
00052  *
00053  * Revision 1.7  2004/12/14 00:39:49  arthchan2003
00054  * add <s3types.h> to the code, change some comments to doxygen style
00055  *
00056  * Revision 1.6  2004/12/06 11:17:55  arthchan2003
00057  * Update the copyright information of ad.h, *sigh* start to feel tired of updating documentation system.  Anyone who has time, please take up libs3audio. That is the last place which is undocumented
00058  *
00059  * Revision 1.5  2004/07/23 23:44:46  egouvea
00060  * Changed the cygwin code to use the same audio files as the MS Visual code, removed unused variables from fe_interface.c
00061  *
00062  * Revision 1.4  2004/02/29 23:48:31  egouvea
00063  * Updated configure.in to the recent automake/autoconf, fixed win32
00064  * references in audio files.
00065  *
00066  * Revision 1.3  2002/11/10 19:27:38  egouvea
00067  * Fixed references to sun's implementation of audio interface,
00068  * referring to the correct .h file, and replacing sun4 with sunos.
00069  *
00070  * Revision 1.2  2001/12/11 04:40:55  lenzo
00071  * License cleanup.
00072  *
00073  * Revision 1.1.1.1  2001/12/03 16:01:45  egouvea
00074  * Initial import of sphinx3
00075  *
00076  * Revision 1.1.1.1  2001/01/17 05:17:14  ricky
00077  * Initial Import of the s3.3 decoder, has working decodeaudiofile, s3.3_live
00078  *
00079  * 
00080  * 19-Jan-1999  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00081  *              Added AD_ return codes.  Added ad_open_sps_bufsize(), and
00082  *              ad_rec_t.n_buf.
00083  * 
00084  * 17-Apr-98    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00085  *              Added ad_open_play_sps().
00086  * 
00087  * 07-Mar-98    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00088  *              Added ad_open_sps().
00089  * 
00090  * 10-Jun-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00091  *              Added ad_wbuf_t, ad_rec_t, and ad_play_t types, and augmented all
00092  *              recording functions with ad_rec_t, and playback functions with
00093  *              ad_play_t.
00094  * 
00095  * 06-Jun-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00096  *              Created.
00097  */
00098 
00103 #ifndef _AD_H_
00104 #define _AD_H_
00105 
00106 #include <sphinx_config.h>
00107 
00108 #if defined (__CYGWIN__)
00109 #include <w32api/windows.h>
00110 #include <w32api/mmsystem.h>
00111 #elif (defined(WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE)
00112 #include <windows.h>
00113 #include <mmsystem.h>
00114 #elif defined(AD_BACKEND_ALSA)
00115 #include <alsa/asoundlib.h>
00116 #endif
00117 
00118 /* Win32/WinCE DLL gunk */
00119 #include <sphinxbase/sphinxbase_export.h>
00120 
00121 #include <sphinxbase/prim_type.h>
00122 
00123 #ifdef __cplusplus
00124 extern "C" {
00125 #endif
00126 #if 0
00127 /* Fool Emacs. */
00128 }
00129 #endif
00130 
00131 #define AD_SAMPLE_SIZE          (sizeof(int16))
00132 #define DEFAULT_SAMPLES_PER_SEC 16000
00133 
00134 /* Return codes */
00135 #define AD_OK           0
00136 #define AD_EOF          -1
00137 #define AD_ERR_GEN      -1
00138 #define AD_ERR_NOT_OPEN -2
00139 #define AD_ERR_WAVE     -3
00140 
00141 
00142 #if  (defined(WIN32) || defined(AD_BACKEND_WIN32)) && !defined(GNUWINCE)
00143 typedef struct {
00144     HGLOBAL h_whdr;
00145     LPWAVEHDR p_whdr;
00146     HGLOBAL h_buf;
00147     LPSTR p_buf;
00148 } ad_wbuf_t;
00149 #endif
00150 
00151 
00152 /* ------------ RECORDING -------------- */
00153 
00154 /*
00155  * NOTE: ad_rec_t and ad_play_t are READ-ONLY structures for the user.
00156  */
00157 
00158 #if (defined(WIN32) || defined(AD_BACKEND_WIN32)) && !defined(GNUWINCE)
00159 
00160 #define DEFAULT_DEVICE (char*)DEV_MAPPER
00161 
00165 typedef struct ad_rec_s {
00166     HWAVEIN h_wavein;   /* "HANDLE" to the audio input device */
00167     ad_wbuf_t *wi_buf;  /* Recording buffers provided to system */
00168     int32 n_buf;        /* #Recording buffers provided to system */
00169     int32 opened;       /* Flag; A/D opened for recording */
00170     int32 recording;
00171     int32 curbuf;       /* Current buffer with data for application */
00172     int32 curoff;       /* Start of data for application in curbuf */
00173     int32 curlen;       /* #samples of data from curoff in curbuf */
00174     int32 lastbuf;      /* Last buffer containing data after recording stopped */
00175     int32 sps;          /* Samples/sec */
00176     int32 bps;          /* Bytes/sample */
00177 } ad_rec_t;
00178 
00179 #elif defined(AD_BACKEND_OSS)
00180 
00181 #define DEFAULT_DEVICE "/dev/dsp"
00182 
00187 /* Added by jd5q+@andrew.cmu.edu, 10/3/1997: */
00188 typedef struct {
00189     int32 dspFD;        /* Audio device descriptor */
00190     int32 recording;
00191     int32 sps;          /* Samples/sec */
00192     int32 bps;          /* Bytes/sample */
00193 } ad_rec_t;
00194 
00195 #elif defined(AD_BACKEND_ESD)
00196 
00197 #define DEFAULT_DEVICE NULL
00198 typedef struct {
00199     int32 fd;
00200     int32 recording;
00201     int32 sps;
00202     int32 bps;
00203 } ad_rec_t;
00204 
00205 #elif defined(AD_BACKEND_ALSA)
00206 
00207 #define DEFAULT_DEVICE "default"
00208 typedef struct {
00209     snd_pcm_t *dspH;
00210     int32 recording;
00211     int32 sps;
00212     int32 bps;
00213 } ad_rec_t;
00214 
00215 #elif defined(AD_BACKEND_S60)
00216 
00217 typedef struct ad_rec_s {
00218     void* recorder;
00219     int32 recording;
00220     int32 sps;
00221     int32 bps;
00222 } ad_rec_t;
00223 
00224 SPHINXBASE_EXPORT
00225 ad_rec_t *ad_open_sps_bufsize (int32 samples_per_sec, int32 bufsize_msec);
00226 
00227 #else
00228 
00229 #define DEFAULT_DEVICE NULL
00230 typedef struct {
00231     int32 sps;          
00232     int32 bps;          
00233 } ad_rec_t;     
00234 
00235 
00236 #endif
00237 
00238 
00248 SPHINXBASE_EXPORT
00249 ad_rec_t *ad_open_dev (
00250         const char *dev, 
00251         int32 samples_per_sec 
00252         );
00253 
00257 SPHINXBASE_EXPORT
00258 ad_rec_t *ad_open_sps (
00259                        int32 samples_per_sec 
00260                        );
00261 
00262 
00266 SPHINXBASE_EXPORT
00267 ad_rec_t *ad_open ( void );
00268 
00269 
00270 #if defined(WIN32) && !defined(GNUWINCE)
00271 /*
00272  * Like ad_open_sps but specifies buffering required within driver.  This function is
00273  * useful if the default (5000 msec worth) is too small and results in loss of data.
00274  */
00275 SPHINXBASE_EXPORT
00276 ad_rec_t *ad_open_sps_bufsize (int32 samples_per_sec, int32 bufsize_msec);
00277 #endif
00278 
00279 
00280 /* Start audio recording.  Return value: 0 if successful, <0 otherwise */
00281 SPHINXBASE_EXPORT
00282 int32 ad_start_rec (ad_rec_t *);
00283 
00284 
00285 /* Stop audio recording.  Return value: 0 if successful, <0 otherwise */
00286 SPHINXBASE_EXPORT
00287 int32 ad_stop_rec (ad_rec_t *);
00288 
00289 
00290 /* Close the recording device.  Return value: 0 if successful, <0 otherwise */
00291 SPHINXBASE_EXPORT
00292 int32 ad_close (ad_rec_t *);
00293 
00294 
00295 /*
00296  * Read next block of audio samples while recording; read upto max samples into buf.
00297  * Return value: # samples actually read (could be 0 since non-blocking); -1 if not
00298  * recording and no more samples remaining to be read from most recent recording.
00299  */
00300 SPHINXBASE_EXPORT
00301 int32 ad_read (ad_rec_t *, int16 *buf, int32 max);
00302 
00303 
00304 /* ------ PLAYBACK; SIMILAR TO RECORDING ------- */
00305 
00306 #if defined(WIN32) && !defined(GNUWINCE)
00307 
00308 typedef struct {
00309     HWAVEOUT h_waveout; /* "HANDLE" to the audio output device */
00310     ad_wbuf_t *wo_buf;  /* Playback buffers given to the system */
00311     int32 opened;       /* Flag; A/D opened for playback */
00312     int32 playing;
00313     char *busy;         /* flags [N_WO_BUF] indicating whether given to system */
00314     int32 nxtbuf;       /* Next buffer [0..N_WO_BUF-1] to be used for playback data */
00315     int32 sps;          /* Samples/sec */
00316     int32 bps;          /* Bytes/sample */
00317 } ad_play_t;
00318 
00319 #else
00320 
00321 typedef struct {
00322     int32 sps;          /* Samples/sec */
00323     int32 bps;          /* Bytes/sample */
00324 } ad_play_t;    /* Dummy definition for systems without A/D stuff */
00325 
00326 #endif
00327 
00328 
00329 SPHINXBASE_EXPORT
00330 ad_play_t *ad_open_play_sps (int32 samples_per_sec);
00331 
00332 SPHINXBASE_EXPORT
00333 ad_play_t *ad_open_play ( void );
00334 
00335 SPHINXBASE_EXPORT
00336 int32 ad_start_play (ad_play_t *);
00337 
00338 SPHINXBASE_EXPORT
00339 int32 ad_stop_play (ad_play_t *);
00340 
00341 SPHINXBASE_EXPORT
00342 int32 ad_close_play (ad_play_t *);
00343 
00344 
00353 SPHINXBASE_EXPORT
00354 int32 ad_write (ad_play_t *, int16 *buf, int32 len);
00355 
00356 
00357 /* ------ MISCELLANEOUS ------- */
00358 
00362 SPHINXBASE_EXPORT
00363 void ad_mu2li (int16 *outbuf,           /* Out: PCM data placed here (allocated by user) */
00364                unsigned char *inbuf,    /* In: Input buffer with mulaw data */
00365                int32 n_samp);           /* In: #Samples in inbuf */
00366 
00367 #ifdef __cplusplus
00368 }
00369 #endif
00370 
00371 
00372 #endif