mahta.fetrat
/
HomoFast-eSpeak-Persian


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
							/*
 * Copyright (C) 2012-2017 Reece H. Dunn
 *
 * This file is part of ucd-tools.
 *
 * ucd-tools is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * ucd-tools is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with ucd-tools.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "ucd/ucd.h"

#include <locale.h>
#include <string.h>
#include <stdio.h>
#include <wchar.h>
#include <wctype.h>

void fput_utf8c(FILE *out, codepoint_t c)
{
	if (c < 0x80)
		fputc((uint8_t)c, out);
	else if (c < 0x800)
	{
		fputc(0xC0 | (c >> 6), out);
		fputc(0x80 + (c & 0x3F), out);
	}
	else if (c < 0x10000)
	{
		fputc(0xE0 | (c >> 12), out);
		fputc(0x80 + ((c >> 6) & 0x3F), out);
		fputc(0x80 + (c & 0x3F), out);
	}
	else if (c < 0x200000)
	{
		fputc(0xF0 | (c >> 18), out);
		fputc(0x80 + ((c >> 12) & 0x3F), out);
		fputc(0x80 + ((c >>  6) & 0x3F), out);
		fputc(0x80 + (c & 0x3F), out);
	}
}

int fget_utf8c(FILE *in, codepoint_t *c)
{
	int ch = EOF;
	if ((ch = fgetc(in)) == EOF) return 0;
	if ((uint8_t)ch < 0x80)
		*c = (uint8_t)ch;
	else switch ((uint8_t)ch & 0xF0)
	{
	default:
		*c = (uint8_t)ch & 0x1F;
		if ((ch = fgetc(in)) == EOF) return 0;
		*c = (*c << 6) + ((uint8_t)ch & 0x3F);
		break;
	case 0xE0:
		*c = (uint8_t)ch & 0x0F;
		if ((ch = fgetc(in)) == EOF) return 0;
		*c = (*c << 6) + ((uint8_t)ch & 0x3F);
		if ((ch = fgetc(in)) == EOF) return 0;
		*c = (*c << 6) + ((uint8_t)ch & 0x3F);
		break;
	case 0xF0:
		*c = (uint8_t)ch & 0x07;
		if ((ch = fgetc(in)) == EOF) return 0;
		*c = (*c << 6) + ((uint8_t)ch & 0x3F);
		if ((ch = fgetc(in)) == EOF) return 0;
		*c = (*c << 6) + ((uint8_t)ch & 0x3F);
		if ((ch = fgetc(in)) == EOF) return 0;
		*c = (*c << 6) + ((uint8_t)ch & 0x3F);
		break;
	}
	return 1;
}

void uprintf_codepoint(FILE *out, codepoint_t c, char mode)
{
	switch (mode)
	{
	case 'c': // character
		switch (c)
		{
		case '\t': fputs("\\t", out);  break;
		case '\r': fputs("\\r", out);  break;
		case '\n': fputs("\\n", out);  break;
		default:   fput_utf8c(out, c); break;
		}
		break;
	case 'h': // hexadecimal (lower)
		fprintf(out, "%06x", c);
		break;
	case 'H': // hexadecimal (upper)
		fprintf(out, "%06X", c);
		break;
	}
}

void uprintf_is(FILE *out, codepoint_t c, char mode)
{
	switch (mode)
	{
	case 'A': // alpha-numeric
		fputc(iswalnum(c) ? '1' : '0', out);
		break;
	case 'a': // alpha
		fputc(iswalpha(c) ? '1' : '0', out);
		break;
	case 'c': // control
		fputc(iswcntrl(c) ? '1' : '0', out);
		break;
	case 'd': // numeric
		fputc(iswdigit(c) ? '1' : '0', out);
		break;
	case 'g': // glyph
		fputc(iswgraph(c) ? '1' : '0', out);
		break;
	case 'l': // lower case
		fputc(iswlower(c) ? '1' : '0', out);
		break;
	case 'P': // printable
		fputc(iswprint(c) ? '1' : '0', out);
		break;
	case 'p': // punctuation
		fputc(iswpunct(c) ? '1' : '0', out);
		break;
	case 's': // whitespace
		fputc(iswspace(c) ? '1' : '0', out);
		break;
	case 'u': // upper case
		fputc(iswupper(c) ? '1' : '0', out);
		break;
	}
}

void uprintf(FILE *out, codepoint_t c, const char *format)
{
	while (*format) switch (*format)
	{
	case '%':
		switch (*++format)
		{
		case 'c': // category
			fputs(ucd_get_category_string(ucd_lookup_category(c)), out);
			break;
		case 'C': // category group
			fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out);
			break;
		case 'p': // codepoint
			uprintf_codepoint(out, c, *++format);
			break;
		case 'i': // is*
			uprintf_is(out, c, *++format);
			break;
		case 'L': // lowercase
			uprintf_codepoint(out, towlower(c), *++format);
			break;
		case 's': // script
			fputs(ucd_get_script_string(ucd_lookup_script(c)), out);
			break;
		case 'T': // titlecase
			uprintf_codepoint(out, ucd_totitle(c), *++format);
			break;
		case 'U': // uppercase
			uprintf_codepoint(out, towupper(c), *++format);
			break;
		case 'W': // whitespace
			if (iswspace(c))
				fputs("White_Space", out);
			break;
		}
		++format;
		break;
	case '\\':
		switch (*++format) {
		case 0:
			break;
		case 't':
			fputc('\t', out);
			++format;
			break;
		case 'r':
			fputc('\r', out);
			++format;
			break;
		case 'n':
			fputc('\n', out);
			++format;
			break;
		default:
			fputc(*format, out);
			++format;
			break;
		}
		break;
	default:
		fputc(*format, out);
		++format;
		break;
	}
}

void print_file(FILE *in, const char *format)
{
	codepoint_t c = 0;
	while (fget_utf8c(in, &c))
		uprintf(stdout, c, format ? format : "%pc\t%pH\t%s\t%c\t%Uc\t%Lc\t%Tc\t%W\n");
}

int main(int argc, char **argv)
{
	FILE *in = NULL;
	const char *format = NULL;
	for (int argn = 1; argn != argc; ++argn)
	{
		const char *arg = argv[argn];
		if (!strcmp(arg, "--stdin") || !strcmp(arg, "-"))
			in = stdin;
		else if (!strncmp(arg, "--format=", 9))
			format = arg + 9;
		else if (!strncmp(arg, "--locale=", 9))
			setlocale(LC_CTYPE, arg + 9);
		else if (in == NULL)
		{
			in = fopen(arg, "r");
			if (!in)
				fprintf(stdout, "cannot open `%s`\n", argv[1]);
		}
	}

	if (in == stdin)
		print_file(stdin, format);
	else if (in != NULL)
	{
		print_file(in, format);
		fclose(in);
	}
	else
	{
		for (codepoint_t c = 0; c <= 0x10FFFF; ++c)
			uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %W\n");
	}
	return 0;
}