mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-12-29 11:40:38 +00:00
713f74f4f9
Import the code from gst-plugins-rs (origin is https://github.com/szatmary/libcaption) Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/4929>
261 lines
7 KiB
C
261 lines
7 KiB
C
/**********************************************************************************************/
|
|
/* The MIT License */
|
|
/* */
|
|
/* Copyright 2016-2017 Twitch Interactive, Inc. or its affiliates. All Rights Reserved. */
|
|
/* */
|
|
/* Permission is hereby granted, free of charge, to any person obtaining a copy */
|
|
/* of this software and associated documentation files (the "Software"), to deal */
|
|
/* in the Software without restriction, including without limitation the rights */
|
|
/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */
|
|
/* copies of the Software, and to permit persons to whom the Software is */
|
|
/* furnished to do so, subject to the following conditions: */
|
|
/* */
|
|
/* The above copyright notice and this permission notice shall be included in */
|
|
/* all copies or substantial portions of the Software. */
|
|
/* */
|
|
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */
|
|
/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */
|
|
/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */
|
|
/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */
|
|
/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */
|
|
/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN */
|
|
/* THE SOFTWARE. */
|
|
/**********************************************************************************************/
|
|
|
|
#include "utf8.h"
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
const utf8_char_t *
|
|
utf8_char_next (const utf8_char_t * c)
|
|
{
|
|
const utf8_char_t *n = c + utf8_char_length (c);
|
|
return n == c ? 0 : n;
|
|
}
|
|
|
|
// returnes the length of the char in bytes
|
|
size_t
|
|
utf8_char_length (const utf8_char_t * c)
|
|
{
|
|
// count null term as zero size
|
|
if (!c || 0x00 == c[0]) {
|
|
return 0;
|
|
}
|
|
|
|
static const size_t _utf8_char_length[] = {
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2,
|
|
2, 2, 2, 3, 3, 4, 0
|
|
};
|
|
|
|
return _utf8_char_length[(c[0] >> 3) & 0x1F];
|
|
}
|
|
|
|
int
|
|
utf8_char_whitespace (const utf8_char_t * c)
|
|
{
|
|
// 0x7F is DEL
|
|
if (!c || (c[0] >= 0 && c[0] <= ' ') || c[0] == 0x7F) {
|
|
return 1;
|
|
}
|
|
// EIA608_CHAR_NO_BREAK_SPACE TODO other utf8 spaces
|
|
if (0xC2 == (unsigned char) c[0] && 0xA0 == (unsigned char) c[1]) {
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// returns length of the string in bytes
|
|
// size is number of charcter to count (0 to count until NULL term)
|
|
size_t
|
|
utf8_string_length (const utf8_char_t * data, utf8_size_t size)
|
|
{
|
|
size_t char_length, byts = 0;
|
|
|
|
if (0 == size) {
|
|
size = utf8_char_count (data, 0);
|
|
}
|
|
|
|
for (; 0 < size; --size) {
|
|
if (0 == (char_length = utf8_char_length (data))) {
|
|
break;
|
|
}
|
|
|
|
data += char_length;
|
|
byts += char_length;
|
|
}
|
|
|
|
return byts;
|
|
}
|
|
|
|
size_t
|
|
utf8_char_copy (utf8_char_t * dst, const utf8_char_t * src)
|
|
{
|
|
size_t bytes = utf8_char_length (src);
|
|
|
|
if (bytes && dst) {
|
|
memcpy (dst, src, bytes);
|
|
dst[bytes] = '\0';
|
|
}
|
|
|
|
return bytes;
|
|
}
|
|
|
|
// returnes the number of utf8 charcters in a string given the number of bytes
|
|
// to count until the a null terminator, pass 0 for size
|
|
utf8_size_t
|
|
utf8_char_count (const char *data, size_t size)
|
|
{
|
|
size_t i, bytes = 0;
|
|
utf8_size_t count = 0;
|
|
|
|
if (0 == size) {
|
|
size = strlen (data);
|
|
}
|
|
|
|
for (i = 0; i < size; ++count, i += bytes) {
|
|
if (0 == (bytes = utf8_char_length (&data[i]))) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return count;
|
|
}
|
|
|
|
// returns the length of the line in bytes triming not printable charcters at the end
|
|
size_t
|
|
utf8_trimmed_length (const utf8_char_t * data, utf8_size_t charcters)
|
|
{
|
|
size_t l, t = 0, split_at = 0;
|
|
for (size_t c = 0; (*data) && c < charcters; ++c) {
|
|
l = utf8_char_length (data);
|
|
if (!utf8_char_whitespace (data)) {
|
|
split_at = t + l;
|
|
}
|
|
t += l, data += l;
|
|
}
|
|
|
|
return split_at;
|
|
}
|
|
|
|
size_t
|
|
_utf8_newline (const utf8_char_t * data)
|
|
{
|
|
if ('\r' == data[0]) {
|
|
return '\n' == data[1] ? 2 : 1; // windows/unix
|
|
} else if ('\n' == data[0]) {
|
|
return '\r' == data[1] ? 2 : 1; // riscos/macos
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
// returns the length in bytes of the line including the new line charcter(s)
|
|
// auto detects between windows(CRLF), unix(LF), mac(CR) and riscos (LFCR) line endings
|
|
size_t
|
|
utf8_line_length (const utf8_char_t * data)
|
|
{
|
|
size_t n, len = 0;
|
|
|
|
for (len = 0; 0 != data[len]; ++len) {
|
|
if (0 < (n = _utf8_newline (data))) {
|
|
return len + n;
|
|
}
|
|
|
|
data += utf8_char_length (data);
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
// returns number of chars to include before split
|
|
utf8_size_t
|
|
utf8_wrap_length (const utf8_char_t * data, utf8_size_t size)
|
|
{
|
|
// Set split_at to size, so if a split point cna not be found, retuns the size passed in
|
|
size_t char_length, char_count, split_at = size;
|
|
|
|
for (char_count = 0; char_count <= size; ++char_count) {
|
|
if (_utf8_newline (data)) {
|
|
return char_count;
|
|
} else if (utf8_char_whitespace (data)) {
|
|
split_at = char_count;
|
|
}
|
|
|
|
char_length = utf8_char_length (data);
|
|
data += char_length;
|
|
}
|
|
|
|
return split_at;
|
|
}
|
|
|
|
int
|
|
utf8_line_count (const utf8_char_t * data)
|
|
{
|
|
size_t len = 0;
|
|
int count = 0;
|
|
|
|
do {
|
|
len = utf8_line_length (data);
|
|
data += len;
|
|
++count;
|
|
} while (0 < len);
|
|
|
|
return count - 1;
|
|
}
|
|
|
|
utf8_char_t *
|
|
utf8_load_text_file (const char *path, size_t * size)
|
|
{
|
|
utf8_char_t *data = NULL;
|
|
FILE *file = fopen (path, "r");
|
|
|
|
if (file) {
|
|
fseek (file, 0, SEEK_END);
|
|
size_t file_size = ftell (file);
|
|
fseek (file, 0, SEEK_SET);
|
|
|
|
if (0 == (*size) || file_size <= (*size)) {
|
|
(*size) = 0;
|
|
data = (utf8_char_t *) malloc (1 + file_size);
|
|
memset (data, '\0', file_size);
|
|
|
|
if (data) {
|
|
utf8_char_t *pos = data;
|
|
size_t bytes_read = 0;
|
|
|
|
while (0 < (bytes_read = fread (pos, 1, file_size - (*size), file))) {
|
|
pos += bytes_read;
|
|
(*size) += bytes_read;
|
|
}
|
|
}
|
|
|
|
fclose (file);
|
|
}
|
|
}
|
|
|
|
data[*size] = 0;
|
|
return data;
|
|
}
|
|
|
|
#ifndef strnstr
|
|
char *
|
|
strnstr (const char *string1, const char *string2, size_t len)
|
|
{
|
|
size_t length2;
|
|
|
|
length2 = strlen (string2);
|
|
if (!length2) {
|
|
return (char *) string1;
|
|
}
|
|
|
|
while (len >= length2) {
|
|
len--;
|
|
if (!memcmp (string1, string2, length2))
|
|
return (char *) string1;
|
|
string1++;
|
|
}
|
|
return NULL;
|
|
}
|
|
#endif
|