eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

spect.h 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2007 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * Copyright (C) 2013 by Reece H. Dunn *
  5. * *
  6. * This program is free software; you can redistribute it and/or modify *
  7. * it under the terms of the GNU General Public License as published by *
  8. * the Free Software Foundation; either version 3 of the License, or *
  9. * (at your option) any later version. *
  10. * *
  11. * This program is distributed in the hope that it will be useful, *
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  14. * GNU General Public License for more details. *
  15. * *
  16. * You should have received a copy of the GNU General Public License *
  17. * along with this program; if not, write see: *
  18. * <http://www.gnu.org/licenses/>. *
  19. ***************************************************************************/
  20. #include "wx/spinctrl.h"
  21. #include "wx/notebook.h"
  22. #define PROGRAM_NAME _T("Voice Editor")
  23. #define FRAME_WIDTH 1000 // max width for 8000kHz frame
  24. #define MAX_DISPLAY_FREQ 9500
  25. #define FRAME_HEIGHT 240
  26. #define T_ZOOMOUT 301
  27. #define T_ZOOMIN 302
  28. #define T_USEPITCHENV 303
  29. #define T_SAMPRATE 304
  30. #define T_PITCH1 305
  31. #define T_PITCH2 306
  32. #define T_DURATION 307
  33. #define T_AMPLITUDE 308
  34. #define T_AMPFRAME 309
  35. #define T_TIMEFRAME 310
  36. #define T_TIMESEQ 311
  37. #define T_AV 312
  38. #define T_AVP 313
  39. #define T_FRIC 314
  40. #define T_FRICBP 315
  41. #define T_ASPR 316
  42. #define T_TURB 317
  43. #define T_SKEW 318
  44. #define T_TILT 319
  45. #define T_KOPEN 320
  46. #define T_FNZ 321
  47. #define FILEID1_SPECTSEQ 0x43455053
  48. #define FILEID2_SPECTSEQ 0x51455354 // for eSpeak sequence
  49. #define FILEID2_SPECTSEK 0x4b455354 // for Klatt sequence
  50. #define FILEID2_SPECTSQ2 0x32515354 // with Klatt data
  51. #define FILEID1_SPC2 0x32435053 // an old format for spectrum files
  52. #define FILEID1_PITCHENV 0x43544950
  53. #define FILEID2_PITCHENV 0x564e4548
  54. #define FILEID1_PRAATSEQ 0x41415250
  55. #define FILEID2_PRAATSEQ 0x51455354
  56. class MyFrame;
  57. typedef struct {
  58. unsigned short pitch1;
  59. unsigned short pitch2;
  60. unsigned char env[128];
  61. } PitchEnvelope;
  62. typedef struct {
  63. short freq;
  64. short bandw;
  65. } formant_t;
  66. typedef struct {
  67. short pkfreq;
  68. short pkheight;
  69. short pkwidth;
  70. short pkright;
  71. short klt_bw;
  72. short klt_ap;
  73. short klt_bp;
  74. } peak_t;
  75. //===============================================================================================
  76. // Old "SPC2" format
  77. typedef struct {
  78. unsigned char freq; /* *factor for f0-f7 of [4,5,11,20,20,25,32,32] */
  79. unsigned char height;
  80. unsigned char width_l; /* * 8 Hz */
  81. unsigned char width_r; /* * 8 Hz */
  82. } PEAKS7;
  83. /* flags: bit 0 label is present in data[]
  84. bit 1 mark cycle - blue background
  85. bit 2 mark cycle - green square
  86. bit 3 mark cycle - 'lengthen' indicator
  87. bit 4 mark cycle - 'standard'
  88. bit 7 hide peaks
  89. */
  90. typedef struct {
  91. short pitch; /* pitch of fundamental, 1/16 Hz */
  92. short length; /* length in 1/15625 sec */
  93. unsigned char n_harm; /* number of harmonic data, h1 ... hn */
  94. unsigned char flags; /* bits 0 */
  95. unsigned char aspiration;
  96. unsigned char hf_voicing;
  97. unsigned char spare1;
  98. unsigned char spare2;
  99. unsigned char spare3;
  100. unsigned char spare4;
  101. PEAKS7 peak_data[7];
  102. unsigned char data[1]; /* firstly harmonic data, then f3,f4,f5 data */
  103. } CYCLE; /* VARIABLE LENGTH Structure */
  104. extern int SPC2_size_cycle(CYCLE *cy);
  105. //==============================================================================================
  106. class SpectFrame
  107. {//=============
  108. public:
  109. SpectFrame(SpectFrame *copy=NULL);
  110. ~SpectFrame();
  111. int Import(wxInputStream &stream);
  112. int ImportSPC2(wxInputStream &stream, float &time_acc);
  113. int Load(wxInputStream &stream, int file_format_type);
  114. int Save(wxOutputStream &stream, int file_format_type);
  115. void Draw(wxDC &dc, int offy, int frame_width, double scalex, double scaley);
  116. void ZeroPeaks();
  117. void CopyPeaks(SpectFrame *sf);
  118. void ToggleMarker(int n);
  119. void ApplyVoiceMods();
  120. void MakeWaveF(int peaks, PitchEnvelope &pitch, int amplitude, int duration);
  121. void MakeHtab(int numh, int *htab, int pitch);
  122. double GetRms(int amp);
  123. void KlattDefaults();
  124. int selected;
  125. int keyframe;
  126. int amp_adjust;
  127. float length_adjust;
  128. double rms;
  129. float time;
  130. float pitch;
  131. float length;
  132. float dx;
  133. int nx;
  134. int markers;
  135. int max_y;
  136. USHORT *spect; // sqrt of harmonic amplitudes, 1-nx at 'pitch'
  137. short klatt_param[N_KLATTP2];
  138. formant_t formants[N_PEAKS]; // this is just the estimate given by Praat
  139. peak_t peaks[N_PEAKS];
  140. private:
  141. void DrawPeaks(wxDC *dc, int offy, int frame_width, int seq_amplitude, double scalex);
  142. wxFont FONT_SMALL;
  143. wxFont FONT_MEDIUM;
  144. };
  145. class SpectSeq
  146. {//===========
  147. public:
  148. SpectSeq(int nframes=0);
  149. ~SpectSeq();
  150. int Import(wxInputStream& stream);
  151. int ImportSPC2(wxInputStream& stream);
  152. int Save(wxOutputStream& stream, int selection);
  153. int Load(wxInputStream& stream);
  154. void Draw(wxDC &dc, int start_y, int end_y);
  155. void MakePitchenv(PitchEnvelope &pitch, int start_frame, int end_frame);
  156. void MakeWave(int start, int end, PitchEnvelope &pitch);
  157. void InterpolatePeaks(int on);
  158. void InterpolateAdjacent(void);
  159. void CopyDown(int frame, int direction);
  160. void SelectAll(int yes);
  161. int CountSelected();
  162. void DeleteSelected();
  163. void ClipboardCopy();
  164. int ClipboardInsert(int insert_at);
  165. float GetFrameLength(int frame, int plus, int *original_mS);
  166. float GetKeyedLength();
  167. void SetFrameLengths();
  168. void ConstructVowel(void);
  169. int numframes;
  170. int amplitude;
  171. int spare;
  172. wxString name;
  173. SpectFrame **frames;
  174. PitchEnvelope pitchenv;
  175. int pitch1;
  176. int pitch2;
  177. int duration;
  178. int grid;
  179. int bass_reduction;
  180. int max_x;
  181. int max_y;
  182. int file_format;
  183. private:
  184. void Load2(wxInputStream& stream, int import, int n);
  185. void InterpolatePeak(int peak);
  186. void ApplyAmp_adjust(SpectFrame *sp, peak_t *peaks);
  187. double scalex;
  188. double scaley;
  189. };
  190. class SpectDisplay: public wxScrolledWindow
  191. {//========================================
  192. public:
  193. SpectDisplay(wxWindow *parent, const wxPoint& pos, const wxSize& size, SpectSeq *spect);
  194. ~SpectDisplay();
  195. virtual void OnDraw(wxDC& dc);
  196. void OnMouse(wxMouseEvent& event);
  197. void OnKey(wxKeyEvent& event);
  198. void OnActivate(int active);
  199. void OnMenu(wxCommandEvent& event);
  200. void Save(const wxString &path=_T(""),int selection=0);
  201. void SavePitchenv(PitchEnvelope &pitch);
  202. void OnZoom(int command);
  203. SpectSeq *spectseq;
  204. float zoomx;
  205. float zoomy;
  206. int zoom;
  207. wxString savepath;
  208. void ReadDialogValues();
  209. void WriteDialogValues();
  210. void WriteDialogLength();
  211. void RefreshDialogValues(int type);
  212. int ScrollToFrame(int frame, int centre);
  213. void SelectFrame(int frame);
  214. private:
  215. void RefreshFrame(int frame);
  216. void SetKeyframe(SpectFrame *sf, int yes);
  217. void PlayChild(int number, PitchEnvelope pitchenv);
  218. void SetExtent();
  219. int sframe;
  220. int pk_num;
  221. DECLARE_EVENT_TABLE()
  222. };
  223. class ByteGraph: public wxScrolledWindow
  224. {//=====================================
  225. public:
  226. ByteGraph(wxWindow *parent, const wxPoint& pos, const wxSize &size);
  227. virtual void OnDraw(wxDC &dc);
  228. void SetData(int nx, unsigned char *data);
  229. void ShowSpectrum(int yes);
  230. private:
  231. int npoints;
  232. unsigned char *graph;
  233. int show_spectrum;
  234. double spectrum_scale;
  235. void DrawSpectrum(wxDC &dc);
  236. void OnMouse(wxMouseEvent& event);
  237. DECLARE_EVENT_TABLE()
  238. };
  239. class FormantDlg : public wxPanel
  240. {//==============================
  241. public:
  242. FormantDlg(wxWindow *parent);
  243. void ShowFrame(SpectSeq *spectseq, int frame, int pk, int field);
  244. void GetValues(SpectSeq *spectseq, int frame);
  245. void OnCommand(wxCommandEvent& event);
  246. void OnSpin(wxSpinEvent& event);
  247. void HideFields(int synth_type);
  248. wxCheckBox *usepitchenv;
  249. wxSpinCtrl *t_amplitude;
  250. wxSpinCtrl *t_ampframe;
  251. wxSpinCtrl *t_timeframe;
  252. wxTextCtrl *tt_timeframe;
  253. wxSpinCtrl *t_timeseq;
  254. wxStaticText *t_pitch;
  255. wxStaticText *t_orig_frame;
  256. wxStaticText *t_orig_seq;
  257. wxSpinCtrl *s_klatt[N_KLATTP];
  258. wxStaticText *t_klatt[N_KLATTP];
  259. private:
  260. wxStaticText *t_lab[10];
  261. wxStaticText *t_labpk[N_PEAKS];
  262. wxTextCtrl *t_pkfreq[N_PEAKS];
  263. wxTextCtrl *t_pkheight[N_PEAKS];
  264. wxTextCtrl *t_pkwidth[N_PEAKS];
  265. wxTextCtrl *t_klt_bw[N_PEAKS];
  266. wxTextCtrl *t_klt_ap[N_PEAKS];
  267. wxTextCtrl *t_klt_bp[N_PEAKS];
  268. wxRadioButton *t_select_peak[N_PEAKS];
  269. wxButton *t_zoomout;
  270. wxButton *t_zoomin;
  271. DECLARE_EVENT_TABLE()
  272. };
  273. class VoiceDlg : public wxPanel
  274. {//=============================
  275. public:
  276. VoiceDlg(wxWindow *parent);
  277. void ReadParams();
  278. void WriteParams();
  279. void Save();
  280. void Load();
  281. void SetFromSpect(SpectSeq *spect);
  282. voice_t *voice_p;
  283. PitchEnvelope pitchenv;
  284. wxSpinCtrl* vd_pitch1;
  285. wxSpinCtrl* vd_pitch2;
  286. wxTextCtrl* vd_duration;
  287. private:
  288. wxButton *vd_defaults;
  289. wxButton *vd_load;
  290. wxButton *vd_save;
  291. wxStaticText* vd_labpk[N_PEAKS+1];
  292. wxSpinCtrl* vd[N_PEAKS+1][3];
  293. wxCheckBox* vd_usepitch;
  294. wxStaticText* vd_lab[3];
  295. ByteGraph *vd_pitchgraph;
  296. wxComboBox *vd_pitchchoice;
  297. int pitch1;
  298. int pitch2;
  299. void OnCommand(wxCommandEvent& event);
  300. void OnSpin(wxSpinEvent& event);
  301. DECLARE_EVENT_TABLE()
  302. };
  303. extern SpectSeq *clipboard_spect;
  304. extern int pk_select;
  305. extern int samplerate;
  306. extern unsigned char env_fall[];
  307. extern FormantDlg* formantdlg;
  308. extern VoiceDlg* voicedlg;
  309. extern wxNotebook* notebook;
  310. extern ByteGraph* pitchgraph;
  311. extern SpectDisplay *currentcanvas;
  312. extern float polint(float xa[],float ya[],int n,float x);
  313. extern void WavegenInit(int samplerate, int wavemult_fact);
  314. extern void WavegenInitPkData(int); // initialise envelope data
  315. extern void SetPitch(int length, unsigned char *env, int pitch1, int pitch2);
  316. extern void SetSynthHtab(int length_mS, USHORT *ht1, int nh1, float pitch1, USHORT *ht2, int nh2, float pitch2);
  317. extern void MakeWaveFile(int synthesis_method);
  318. extern void MakeWaveBuf(int length, UCHAR *buf);
  319. extern int OpenWaveFile(const char *path, int samplerate);
  320. extern int OpenWaveFile2(const char *path);
  321. extern void CloseWaveFile(int rate);
  322. extern void PlayWavFile(const char *fname);
  323. extern void SetSpinCtrl(wxSpinCtrl *t, int value);
  324. extern int GetNumeric(wxTextCtrl *t);
  325. extern void SetNumeric(wxTextCtrl *t, int value);
  326. extern int use_spin_controls;