eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

spect.h 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2007 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, write see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include "wx/spinctrl.h"
  20. #include "wx/notebook.h"
  21. #define PROGRAM_NAME _T("Voice Editor")
  22. #define FRAME_WIDTH 1000 // max width for 8000kHz frame
  23. #define MAX_DISPLAY_FREQ 9500
  24. #define FRAME_HEIGHT 240
  25. #define T_ZOOMOUT 301
  26. #define T_ZOOMIN 302
  27. #define T_USEPITCHENV 303
  28. #define T_SAMPRATE 304
  29. #define T_PITCH1 305
  30. #define T_PITCH2 306
  31. #define T_DURATION 307
  32. #define T_AMPLITUDE 308
  33. #define T_AMPFRAME 309
  34. #define T_TIMEFRAME 310
  35. #define T_TIMESEQ 311
  36. #define T_AV 312
  37. #define T_AVP 313
  38. #define T_FRIC 314
  39. #define T_FRICBP 315
  40. #define T_ASPR 316
  41. #define T_TURB 317
  42. #define T_SKEW 318
  43. #define T_TILT 319
  44. #define T_KOPEN 320
  45. #define T_FNZ 321
  46. #define FILEID1_SPECTSEQ 0x43455053
  47. #define FILEID2_SPECTSEQ 0x51455354 // for eSpeak sequence
  48. #define FILEID2_SPECTSEK 0x4b455354 // for Klatt sequence
  49. #define FILEID2_SPECTSQ2 0x32515354 // with Klatt data
  50. #define FILEID1_SPC2 0x32435053 // an old format for spectrum files
  51. #define FILEID1_PITCHENV 0x43544950
  52. #define FILEID2_PITCHENV 0x564e4548
  53. #define FILEID1_PRAATSEQ 0x41415250
  54. #define FILEID2_PRAATSEQ 0x51455354
  55. class MyFrame;
  56. typedef struct {
  57. unsigned short pitch1;
  58. unsigned short pitch2;
  59. unsigned char env[128];
  60. } PitchEnvelope;
  61. typedef struct {
  62. short freq;
  63. short bandw;
  64. } formant_t;
  65. typedef struct {
  66. short pkfreq;
  67. short pkheight;
  68. short pkwidth;
  69. short pkright;
  70. short klt_bw;
  71. short klt_ap;
  72. short klt_bp;
  73. } peak_t;
  74. //===============================================================================================
  75. // Old "SPC2" format
  76. typedef struct {
  77. unsigned char freq; /* *factor for f0-f7 of [4,5,11,20,20,25,32,32] */
  78. unsigned char height;
  79. unsigned char width_l; /* * 8 Hz */
  80. unsigned char width_r; /* * 8 Hz */
  81. } PEAKS7;
  82. /* flags: bit 0 label is present in data[]
  83. bit 1 mark cycle - blue background
  84. bit 2 mark cycle - green square
  85. bit 3 mark cycle - 'lengthen' indicator
  86. bit 4 mark cycle - 'standard'
  87. bit 7 hide peaks
  88. */
  89. typedef struct {
  90. short pitch; /* pitch of fundamental, 1/16 Hz */
  91. short length; /* length in 1/15625 sec */
  92. unsigned char n_harm; /* number of harmonic data, h1 ... hn */
  93. unsigned char flags; /* bits 0 */
  94. unsigned char aspiration;
  95. unsigned char hf_voicing;
  96. unsigned char spare1;
  97. unsigned char spare2;
  98. unsigned char spare3;
  99. unsigned char spare4;
  100. PEAKS7 peak_data[7];
  101. unsigned char data[1]; /* firstly harmonic data, then f3,f4,f5 data */
  102. } CYCLE; /* VARIABLE LENGTH Structure */
  103. extern int SPC2_size_cycle(CYCLE *cy);
  104. //==============================================================================================
  105. class SpectFrame
  106. {//=============
  107. public:
  108. SpectFrame(SpectFrame *copy=NULL);
  109. ~SpectFrame();
  110. int Import(wxInputStream &stream);
  111. int ImportSPC2(wxInputStream &stream, float &time_acc);
  112. int Load(wxInputStream &stream, int file_format_type);
  113. int Save(wxOutputStream &stream, int file_format_type);
  114. void Draw(wxDC &dc, int offy, int frame_width, double scalex, double scaley);
  115. void ZeroPeaks();
  116. void CopyPeaks(SpectFrame *sf);
  117. void ToggleMarker(int n);
  118. void ApplyVoiceMods();
  119. void MakeWaveF(int peaks, PitchEnvelope &pitch, int amplitude, int duration);
  120. void MakeHtab(int numh, int *htab, int pitch);
  121. double GetRms(int amp);
  122. void KlattDefaults();
  123. int selected;
  124. int keyframe;
  125. int amp_adjust;
  126. float length_adjust;
  127. double rms;
  128. float time;
  129. float pitch;
  130. float length;
  131. float dx;
  132. int nx;
  133. int markers;
  134. int max_y;
  135. USHORT *spect; // sqrt of harmonic amplitudes, 1-nx at 'pitch'
  136. short klatt_param[N_KLATTP2];
  137. formant_t formants[N_PEAKS]; // this is just the estimate given by Praat
  138. peak_t peaks[N_PEAKS];
  139. private:
  140. void DrawPeaks(wxDC *dc, int offy, int frame_width, int seq_amplitude, double scalex);
  141. };
  142. class SpectSeq
  143. {//===========
  144. public:
  145. SpectSeq(int nframes=0);
  146. ~SpectSeq();
  147. int Import(wxInputStream& stream);
  148. int ImportSPC2(wxInputStream& stream);
  149. int Save(wxOutputStream& stream, int selection);
  150. int Load(wxInputStream& stream);
  151. void Draw(wxDC &dc, int start_y, int end_y);
  152. void MakePitchenv(PitchEnvelope &pitch, int start_frame, int end_frame);
  153. void MakeWave(int start, int end, PitchEnvelope &pitch);
  154. void InterpolatePeaks(int on);
  155. void InterpolateAdjacent(void);
  156. void CopyDown(int frame, int direction);
  157. void SelectAll(int yes);
  158. int CountSelected();
  159. void DeleteSelected();
  160. void ClipboardCopy();
  161. int ClipboardInsert(int insert_at);
  162. float GetFrameLength(int frame, int plus, int *original_mS);
  163. float GetKeyedLength();
  164. void SetFrameLengths();
  165. void ConstructVowel(void);
  166. int numframes;
  167. int amplitude;
  168. int spare;
  169. wxString name;
  170. SpectFrame **frames;
  171. PitchEnvelope pitchenv;
  172. int pitch1;
  173. int pitch2;
  174. int duration;
  175. int grid;
  176. int bass_reduction;
  177. int max_x;
  178. int max_y;
  179. int file_format;
  180. private:
  181. void Load2(wxInputStream& stream, int import, int n);
  182. void InterpolatePeak(int peak);
  183. void ApplyAmp_adjust(SpectFrame *sp, peak_t *peaks);
  184. double scalex;
  185. double scaley;
  186. };
  187. class SpectDisplay: public wxScrolledWindow
  188. {//========================================
  189. public:
  190. SpectDisplay(wxWindow *parent, const wxPoint& pos, const wxSize& size, SpectSeq *spect);
  191. ~SpectDisplay();
  192. virtual void OnDraw(wxDC& dc);
  193. void OnMouse(wxMouseEvent& event);
  194. void OnKey(wxKeyEvent& event);
  195. void OnActivate(int active);
  196. void OnMenu(wxCommandEvent& event);
  197. void Save(const wxString &path=_T(""),int selection=0);
  198. void SavePitchenv(PitchEnvelope &pitch);
  199. void OnZoom(int command);
  200. SpectSeq *spectseq;
  201. float zoomx;
  202. float zoomy;
  203. int zoom;
  204. wxString savepath;
  205. void ReadDialogValues();
  206. void WriteDialogValues();
  207. void WriteDialogLength();
  208. void RefreshDialogValues(int type);
  209. int ScrollToFrame(int frame, int centre);
  210. void SelectFrame(int frame);
  211. private:
  212. void RefreshFrame(int frame);
  213. void SetKeyframe(SpectFrame *sf, int yes);
  214. void PlayChild(int number, PitchEnvelope pitchenv);
  215. void SetExtent();
  216. int sframe;
  217. int pk_num;
  218. DECLARE_EVENT_TABLE()
  219. };
  220. class ByteGraph: public wxScrolledWindow
  221. {//=====================================
  222. public:
  223. ByteGraph(wxWindow *parent, const wxPoint& pos, const wxSize &size);
  224. virtual void OnDraw(wxDC &dc);
  225. void SetData(int nx, unsigned char *data);
  226. void ShowSpectrum(int yes);
  227. private:
  228. int npoints;
  229. unsigned char *graph;
  230. int show_spectrum;
  231. double spectrum_scale;
  232. void DrawSpectrum(wxDC &dc);
  233. void OnMouse(wxMouseEvent& event);
  234. DECLARE_EVENT_TABLE()
  235. };
  236. class FormantDlg : public wxPanel
  237. {//==============================
  238. public:
  239. FormantDlg(wxWindow *parent);
  240. void ShowFrame(SpectSeq *spectseq, int frame, int pk, int field);
  241. void GetValues(SpectSeq *spectseq, int frame);
  242. void OnCommand(wxCommandEvent& event);
  243. void OnSpin(wxSpinEvent& event);
  244. void HideFields(int synth_type);
  245. wxCheckBox *usepitchenv;
  246. wxSpinCtrl *t_amplitude;
  247. wxSpinCtrl *t_ampframe;
  248. wxSpinCtrl *t_timeframe;
  249. wxTextCtrl *tt_timeframe;
  250. wxSpinCtrl *t_timeseq;
  251. wxStaticText *t_pitch;
  252. wxStaticText *t_orig_frame;
  253. wxStaticText *t_orig_seq;
  254. wxSpinCtrl *s_klatt[N_KLATTP];
  255. wxStaticText *t_klatt[N_KLATTP];
  256. private:
  257. wxStaticText *t_lab[10];
  258. wxStaticText *t_labpk[N_PEAKS];
  259. wxTextCtrl *t_pkfreq[N_PEAKS];
  260. wxTextCtrl *t_pkheight[N_PEAKS];
  261. wxTextCtrl *t_pkwidth[N_PEAKS];
  262. wxTextCtrl *t_klt_bw[N_PEAKS];
  263. wxTextCtrl *t_klt_ap[N_PEAKS];
  264. wxTextCtrl *t_klt_bp[N_PEAKS];
  265. wxRadioButton *t_select_peak[N_PEAKS];
  266. wxButton *t_zoomout;
  267. wxButton *t_zoomin;
  268. DECLARE_EVENT_TABLE()
  269. };
  270. class VoiceDlg : public wxPanel
  271. {//=============================
  272. public:
  273. VoiceDlg(wxWindow *parent);
  274. void ReadParams();
  275. void WriteParams();
  276. void Save();
  277. void Load();
  278. void SetFromSpect(SpectSeq *spect);
  279. voice_t *voice_p;
  280. PitchEnvelope pitchenv;
  281. wxSpinCtrl* vd_pitch1;
  282. wxSpinCtrl* vd_pitch2;
  283. wxTextCtrl* vd_duration;
  284. private:
  285. wxButton *vd_defaults;
  286. wxButton *vd_load;
  287. wxButton *vd_save;
  288. wxStaticText* vd_labpk[N_PEAKS+1];
  289. wxSpinCtrl* vd[N_PEAKS+1][3];
  290. wxCheckBox* vd_usepitch;
  291. wxStaticText* vd_lab[3];
  292. ByteGraph *vd_pitchgraph;
  293. wxComboBox *vd_pitchchoice;
  294. int pitch1;
  295. int pitch2;
  296. void OnCommand(wxCommandEvent& event);
  297. void OnSpin(wxSpinEvent& event);
  298. DECLARE_EVENT_TABLE()
  299. };
  300. extern SpectSeq *clipboard_spect;
  301. extern int pk_select;
  302. extern int samplerate;
  303. extern unsigned char env_fall[];
  304. extern FormantDlg* formantdlg;
  305. extern VoiceDlg* voicedlg;
  306. extern wxNotebook* notebook;
  307. extern ByteGraph* pitchgraph;
  308. extern SpectDisplay *currentcanvas;
  309. extern float polint(float xa[],float ya[],int n,float x);
  310. extern void WavegenInit(int samplerate, int wavemult_fact);
  311. extern void WavegenInitPkData(int); // initialise envelope data
  312. extern void SetPitch(int length, unsigned char *env, int pitch1, int pitch2);
  313. extern void SetSynthHtab(int length_mS, USHORT *ht1, int nh1, float pitch1, USHORT *ht2, int nh2, float pitch2);
  314. extern void MakeWaveFile(int synthesis_method);
  315. extern void MakeWaveBuf(int length, UCHAR *buf);
  316. extern int OpenWaveFile(const char *path, int samplerate);
  317. extern int OpenWaveFile2(const char *path);
  318. extern void CloseWaveFile(int rate);
  319. extern void PlayWavFile(const char *fname);
  320. extern void SetSpinCtrl(wxSpinCtrl *t, int value);
  321. extern int GetNumeric(wxTextCtrl *t);
  322. extern void SetNumeric(wxTextCtrl *t, int value);
  323. extern int use_spin_controls;