eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

sonic.h 6.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. /* Sonic library
  2. Copyright 2010
  3. Bill Cox
  4. This file is part of the Sonic Library.
  5. The Sonic Library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public
  7. License as published by the Free Software Foundation; either
  8. version 2.1 of the License, or (at your option) any later version.
  9. The GNU C Library is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public
  14. License along with the GNU C Library; if not, write to the Free
  15. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  16. 02111-1307 USA. */
  17. /*
  18. The Sonic Library implements Pitch Based Resampling, which is a new algorithm
  19. invented by Bill Cox for the specific purpose of speeding up speech by high
  20. factors at high quality. It generates smooth speech at speed up factors as high
  21. as 6X, possibly more. It is also capable of slowing down speech, and generates
  22. high quality results regardless of the speed up or slow down factor. For
  23. speeding up speech by 2X or more, the following equation is used:
  24. newSamples = period/(speed - 1.0)
  25. scale = 1.0/newSamples;
  26. where period is the current pitch period, determined using AMDF or any other
  27. pitch estimator, and speed is the speedup factor. If the current position in
  28. the input stream is pointed to by "samples", and the current output stream
  29. position is pointed to by "out", then newSamples number of samples can be
  30. generated with:
  31. out[t] = (samples[t]*(newSamples - t) + samples[t + period]*t)/newSamples;
  32. where t = 0 to newSamples - 1.
  33. For speed factors < 2X, an algorithm similar to PICOLA is used. The above
  34. algorithm is first used to double the speed of one pitch period. Then, enough
  35. input is directly copied from the input to the output to achieve the desired
  36. speed up facter, where 1.0 < speed < 2.0. The amount of data copied is derived:
  37. speed = (2*period + length)/(period + length)
  38. speed*length + speed*period = 2*period + length
  39. length(speed - 1) = 2*period - speed*period
  40. length = period*(2 - speed)/(speed - 1)
  41. For slowing down speech where 0.5 < speed < 1.0, a pitch period is inserted into
  42. the output twice, and length of input is copied from the input to the output
  43. until the output desired speed is reached. The length of data copied is:
  44. length = period*(speed - 0.5)/(1 - speed)
  45. For slow down factors between 0.5 and 0.5, no data is copied, and an algorithm
  46. similar to high speed factors is used.
  47. */
  48. #ifdef __cplusplus
  49. extern "C" {
  50. #endif
  51. /* This specifies the range of voice pitches we try to match.
  52. Note that if we go lower than 65, we could overflow in findPitchInRange */
  53. #define SONIC_MIN_PITCH 65
  54. #define SONIC_MAX_PITCH 400
  55. /* These are used to down-sample some inputs to improve speed */
  56. #define SONIC_AMDF_FREQ 4000
  57. struct sonicStreamStruct;
  58. typedef struct sonicStreamStruct *sonicStream;
  59. /* For all of the following functions, numChannels is multiplied by numSamples
  60. to determine the actual number of values read or returned. */
  61. /* Create a sonic stream. Return NULL only if we are out of memory and cannot
  62. allocate the stream. Set numChannels to 1 for mono, and 2 for stereo. */
  63. sonicStream sonicCreateStream(int sampleRate, int numChannels);
  64. /* Destroy the sonic stream. */
  65. void sonicDestroyStream(sonicStream stream);
  66. /* Use this to write floating point data to be speed up or down into the stream.
  67. Values must be between -1 and 1. Return 0 if memory realloc failed, otherwise 1 */
  68. int sonicWriteFloatToStream(sonicStream stream, float *samples, int numSamples);
  69. /* Use this to write 16-bit data to be speed up or down into the stream.
  70. Return 0 if memory realloc failed, otherwise 1 */
  71. int sonicWriteShortToStream(sonicStream stream, short *samples, int numSamples);
  72. /* Use this to write 8-bit unsigned data to be speed up or down into the stream.
  73. Return 0 if memory realloc failed, otherwise 1 */
  74. int sonicWriteUnsignedCharToStream(sonicStream stream, unsigned char *samples, int numSamples);
  75. /* Use this to read floating point data out of the stream. Sometimes no data
  76. will be available, and zero is returned, which is not an error condition. */
  77. int sonicReadFloatFromStream(sonicStream stream, float *samples, int maxSamples);
  78. /* Use this to read 16-bit data out of the stream. Sometimes no data will
  79. be available, and zero is returned, which is not an error condition. */
  80. int sonicReadShortFromStream(sonicStream stream, short *samples, int maxSamples);
  81. /* Use this to read 8-bit unsigned data out of the stream. Sometimes no data will
  82. be available, and zero is returned, which is not an error condition. */
  83. int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char *samples, int maxSamples);
  84. /* Force the sonic stream to generate output using whatever data it currently
  85. has. No extra delay will be added to the output, but flushing in the middle of
  86. words could introduce distortion. */
  87. int sonicFlushStream(sonicStream stream);
  88. /* Return the number of samples in the output buffer */
  89. int sonicSamplesAvailable(sonicStream stream);
  90. /* Get the speed of the stream. */
  91. float sonicGetSpeed(sonicStream stream);
  92. /* Set the speed of the stream. */
  93. void sonicSetSpeed(sonicStream stream, float speed);
  94. /* Get the pitch of the stream. */
  95. float sonicGetPitch(sonicStream stream);
  96. /* Set the pitch of the stream. */
  97. void sonicSetPitch(sonicStream stream, float pitch);
  98. /* Get the scaling factor of the stream. */
  99. float sonicGetVolume(sonicStream stream);
  100. /* Set the scaling factor of the stream. */
  101. void sonicSetVolume(sonicStream stream, float volume);
  102. /* Get the sample rate of the stream. */
  103. int sonicGetSampleRate(sonicStream stream);
  104. /* Get the number of channels. */
  105. int sonicGetNumChannels(sonicStream stream);
  106. /* This is a non-stream oriented interface to just change the speed of a sound
  107. sample. It works in-place on the sample array, so there must be at least
  108. speed*numSamples available space in the array. Returns the new number of samples. */
  109. int sonicChangeFloatSpeed(float *samples, int numSamples, float speed, float pitch,
  110. float volume, int sampleRate, int numChannels);
  111. /* This is a non-stream oriented interface to just change the speed of a sound
  112. sample. It works in-place on the sample array, so there must be at least
  113. speed*numSamples available space in the array. Returns the new number of samples. */
  114. int sonicChangeShortSpeed(short *samples, int numSamples, float speed, float pitch,
  115. float volume, int sampleRate, int numChannels);
  116. #ifdef __cplusplus
  117. }
  118. #endif