Blender  V3.3
string_utf8.c
Go to the documentation of this file.
1 /* SPDX-License-Identifier: GPL-2.0-or-later
2  * Copyright 2011 Blender Foundation.
3  * Code from gutf8.c Copyright 1999 Tom Tromey
4  * Copyright 2000 Red Hat, Inc.
5  * All rights reserved. */
6 
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <wchar.h>
15 #include <wctype.h>
16 #include <wcwidth.h>
17 
18 #include "BLI_utildefines.h"
19 
20 #include "BLI_string_utf8.h" /* own include */
21 #ifdef WIN32
22 # include "utfconv.h"
23 #endif
24 #ifdef __GNUC__
25 # pragma GCC diagnostic error "-Wsign-conversion"
26 #endif
27 
28 // #define DEBUG_STRSIZE
29 
35 static const size_t utf8_skip_data[256] = {
36  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
43  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1,
44 };
45 
46 ptrdiff_t BLI_str_utf8_invalid_byte(const char *str, size_t length)
47 {
48  /* NOTE(@campbellbarton): from libswish3, originally called u8_isvalid(),
49  * modified to return the index of the bad character (byte index not UTF).
50  * http://svn.swish-e.org/libswish3/trunk/src/libswish3/utf8.c r3044.
51  *
52  * Comment from code in: `libswish3`.
53  * Based on the `valid_utf8` routine from the PCRE library by Philip Hazel
54  *
55  * length is in bytes, since without knowing whether the string is valid
56  * it's hard to know how many characters there are! */
57 
58  const unsigned char *p, *perr, *pend = (const unsigned char *)str + length;
59  unsigned char c;
60  int ab;
61 
62  for (p = (const unsigned char *)str; p < pend; p++, length--) {
63  c = *p;
64  perr = p; /* Erroneous char is always the first of an invalid utf8 sequence... */
65  if (ELEM(c, 0xfe, 0xff, 0x00)) {
66  /* Those three values are not allowed in utf8 string. */
67  goto utf8_error;
68  }
69  if (c < 128) {
70  continue;
71  }
72  if ((c & 0xc0) != 0xc0) {
73  goto utf8_error;
74  }
75 
76  /* Note that since we always increase p (and decrease length) by one byte in main loop,
77  * we only add/subtract extra utf8 bytes in code below
78  * (ab number, aka number of bytes remaining in the utf8 sequence after the initial one). */
79  ab = (int)utf8_skip_data[c] - 1;
80  if (length <= ab) {
81  goto utf8_error;
82  }
83 
84  /* Check top bits in the second byte */
85  p++;
86  length--;
87  if ((*p & 0xc0) != 0x80) {
88  goto utf8_error;
89  }
90 
91  /* Check for overlong sequences for each different length */
92  switch (ab) {
93  case 1:
94  /* Check for xx00 000x */
95  if ((c & 0x3e) == 0) {
96  goto utf8_error;
97  }
98  continue; /* We know there aren't any more bytes to check */
99 
100  case 2:
101  /* Check for 1110 0000, xx0x xxxx */
102  if (c == 0xe0 && (*p & 0x20) == 0) {
103  goto utf8_error;
104  }
105  /* Some special cases, see section 5 of utf-8 decoder stress-test by Markus Kuhn
106  * (https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt). */
107  /* From section 5.1 (and 5.2) */
108  if (c == 0xed) {
109  if (*p == 0xa0 && *(p + 1) == 0x80) {
110  goto utf8_error;
111  }
112  if (*p == 0xad && *(p + 1) == 0xbf) {
113  goto utf8_error;
114  }
115  if (*p == 0xae && *(p + 1) == 0x80) {
116  goto utf8_error;
117  }
118  if (*p == 0xaf && *(p + 1) == 0xbf) {
119  goto utf8_error;
120  }
121  if (*p == 0xb0 && *(p + 1) == 0x80) {
122  goto utf8_error;
123  }
124  if (*p == 0xbe && *(p + 1) == 0x80) {
125  goto utf8_error;
126  }
127  if (*p == 0xbf && *(p + 1) == 0xbf) {
128  goto utf8_error;
129  }
130  }
131  /* From section 5.3 */
132  if (c == 0xef) {
133  if (*p == 0xbf && *(p + 1) == 0xbe) {
134  goto utf8_error;
135  }
136  if (*p == 0xbf && *(p + 1) == 0xbf) {
137  goto utf8_error;
138  }
139  }
140  break;
141 
142  case 3:
143  /* Check for 1111 0000, xx00 xxxx */
144  if (c == 0xf0 && (*p & 0x30) == 0) {
145  goto utf8_error;
146  }
147  break;
148 
149  case 4:
150  /* Check for 1111 1000, xx00 0xxx */
151  if (c == 0xf8 && (*p & 0x38) == 0) {
152  goto utf8_error;
153  }
154  break;
155 
156  case 5:
157  /* Check for 1111 1100, xx00 00xx */
158  if (c == 0xfc && (*p & 0x3c) == 0) {
159  goto utf8_error;
160  }
161  break;
162  }
163 
164  /* Check for valid bytes after the 2nd, if any; all must start 10 */
165  while (--ab > 0) {
166  p++;
167  length--;
168  if ((*p & 0xc0) != 0x80) {
169  goto utf8_error;
170  }
171  }
172  }
173 
174  return -1;
175 
176 utf8_error:
177 
178  return ((const char *)perr - (const char *)str);
179 }
180 
182 {
183  ptrdiff_t bad_char;
184  int tot = 0;
185 
186  BLI_assert(str[length] == '\0');
187 
188  while ((bad_char = BLI_str_utf8_invalid_byte(str, length)) != -1) {
189  str += bad_char;
190  length -= (size_t)(bad_char + 1);
191 
192  if (length == 0) {
193  /* last character bad, strip it */
194  *str = '\0';
195  tot++;
196  break;
197  }
198  /* strip, keep looking */
199  memmove(str, str + 1, length + 1); /* +1 for NULL char! */
200  tot++;
201  }
202 
203  return tot;
204 }
205 
207 #define BLI_STR_UTF8_CPY(dst, src, maxncpy) \
208  { \
209  size_t utf8_size; \
210  while (*src != '\0' && (utf8_size = utf8_skip_data[*src]) < maxncpy) { \
211  maxncpy -= utf8_size; \
212  switch (utf8_size) { \
213  case 6: \
214  *dst++ = *src++; \
215  ATTR_FALLTHROUGH; \
216  case 5: \
217  *dst++ = *src++; \
218  ATTR_FALLTHROUGH; \
219  case 4: \
220  *dst++ = *src++; \
221  ATTR_FALLTHROUGH; \
222  case 3: \
223  *dst++ = *src++; \
224  ATTR_FALLTHROUGH; \
225  case 2: \
226  *dst++ = *src++; \
227  ATTR_FALLTHROUGH; \
228  case 1: \
229  *dst++ = *src++; \
230  } \
231  } \
232  *dst = '\0'; \
233  } \
234  (void)0
235 
236 char *BLI_strncpy_utf8(char *__restrict dst, const char *__restrict src, size_t maxncpy)
237 {
238  char *r_dst = dst;
239 
240  BLI_assert(maxncpy != 0);
241 
242 #ifdef DEBUG_STRSIZE
243  memset(dst, 0xff, sizeof(*dst) * maxncpy);
244 #endif
245 
246  /* NOTE: currently we don't attempt to deal with invalid utf8 chars. */
247  BLI_STR_UTF8_CPY(dst, src, maxncpy);
248 
249  return r_dst;
250 }
251 
252 size_t BLI_strncpy_utf8_rlen(char *__restrict dst, const char *__restrict src, size_t maxncpy)
253 {
254  char *r_dst = dst;
255 
256  BLI_assert(maxncpy != 0);
257 
258 #ifdef DEBUG_STRSIZE
259  memset(dst, 0xff, sizeof(*dst) * maxncpy);
260 #endif
261 
262  /* NOTE: currently we don't attempt to deal with invalid utf8 chars. */
263  BLI_STR_UTF8_CPY(dst, src, maxncpy);
264 
265  return (size_t)(dst - r_dst);
266 }
267 
268 #undef BLI_STR_UTF8_CPY
269 
270 /* --------------------------------------------------------------------------*/
271 /* wchar_t / utf8 functions */
272 
273 size_t BLI_strncpy_wchar_as_utf8(char *__restrict dst,
274  const wchar_t *__restrict src,
275  const size_t maxncpy)
276 {
277  BLI_assert(maxncpy != 0);
278  size_t len = 0;
279 #ifdef DEBUG_STRSIZE
280  memset(dst, 0xff, sizeof(*dst) * maxncpy);
281 #endif
282  while (*src && len < maxncpy) {
283  len += BLI_str_utf8_from_unicode((uint)*src++, dst + len, maxncpy - len);
284  }
285  dst[len] = '\0';
286  /* Return the correct length when part of the final byte did not fit into the string. */
287  while ((len > 0) && UNLIKELY(dst[len - 1] == '\0')) {
288  len--;
289  }
290  return len;
291 }
292 
293 size_t BLI_wstrlen_utf8(const wchar_t *src)
294 {
295  size_t len = 0;
296 
297  while (*src) {
299  }
300 
301  return len;
302 }
303 
304 size_t BLI_strlen_utf8_ex(const char *strc, size_t *r_len_bytes)
305 {
306  size_t len;
307  const char *strc_orig = strc;
308 
309  for (len = 0; *strc; len++) {
310  strc += BLI_str_utf8_size_safe(strc);
311  }
312 
313  *r_len_bytes = (size_t)(strc - strc_orig);
314  return len;
315 }
316 
317 size_t BLI_strlen_utf8(const char *strc)
318 {
319  size_t len_bytes;
320  return BLI_strlen_utf8_ex(strc, &len_bytes);
321 }
322 
323 size_t BLI_strnlen_utf8_ex(const char *strc, const size_t maxlen, size_t *r_len_bytes)
324 {
325  size_t len = 0;
326  const char *strc_orig = strc;
327  const char *strc_end = strc + maxlen;
328 
329  while (true) {
330  size_t step = (size_t)BLI_str_utf8_size_safe(strc);
331  if (!*strc || strc + step > strc_end) {
332  break;
333  }
334  strc += step;
335  len++;
336  }
337 
338  *r_len_bytes = (size_t)(strc - strc_orig);
339  return len;
340 }
341 
342 size_t BLI_strnlen_utf8(const char *strc, const size_t maxlen)
343 {
344  size_t len_bytes;
345  return BLI_strnlen_utf8_ex(strc, maxlen, &len_bytes);
346 }
347 
348 size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w,
349  const char *__restrict src_c,
350  const size_t maxncpy)
351 {
352 #ifdef WIN32
353  conv_utf_8_to_16(src_c, dst_w, maxncpy);
354  /* NOTE: it would be more efficient to calculate the length as part of #conv_utf_8_to_16. */
355  return wcslen(dst_w);
356 #else
357  return BLI_str_utf8_as_utf32((char32_t *)dst_w, src_c, maxncpy);
358 #endif
359 }
360 
361 /* end wchar_t / utf8 functions */
362 /* --------------------------------------------------------------------------*/
363 
364 int BLI_wcwidth(char32_t ucs)
365 {
366  /* Treat private use areas (icon fonts), symbols, and emoticons as double-width. */
367  if (ucs >= 0xf0000 || (ucs >= 0xe000 && ucs < 0xf8ff) || (ucs >= 0x1f300 && ucs < 0x1fbff)) {
368  return 2;
369  }
370  return mk_wcwidth(ucs);
371 }
372 
373 int BLI_wcswidth(const char32_t *pwcs, size_t n)
374 {
375  return mk_wcswidth(pwcs, n);
376 }
377 
378 int BLI_str_utf8_char_width(const char *p)
379 {
380  uint unicode = BLI_str_utf8_as_unicode(p);
381  if (unicode == BLI_UTF8_ERR) {
382  return -1;
383  }
384 
385  return BLI_wcwidth((char32_t)unicode);
386 }
387 
389 {
390  int columns;
391 
392  uint unicode = BLI_str_utf8_as_unicode(p);
393  if (unicode == BLI_UTF8_ERR) {
394  return 1;
395  }
396 
397  columns = BLI_wcwidth((char32_t)unicode);
398 
399  return (columns < 0) ? 1 : columns;
400 }
401 
402 /* --------------------------------------------------------------------------*/
403 
404 /* copied from glib's gutf8.c, added 'Err' arg */
405 
406 /* NOTE(campbell): glib uses uint for unicode, best we do the same,
407  * though we don't typedef it. */
408 
409 #define UTF8_COMPUTE(Char, Mask, Len, Err) \
410  if (Char < 128) { \
411  Len = 1; \
412  Mask = 0x7f; \
413  } \
414  else if ((Char & 0xe0) == 0xc0) { \
415  Len = 2; \
416  Mask = 0x1f; \
417  } \
418  else if ((Char & 0xf0) == 0xe0) { \
419  Len = 3; \
420  Mask = 0x0f; \
421  } \
422  else if ((Char & 0xf8) == 0xf0) { \
423  Len = 4; \
424  Mask = 0x07; \
425  } \
426  else if ((Char & 0xfc) == 0xf8) { \
427  Len = 5; \
428  Mask = 0x03; \
429  } \
430  else if ((Char & 0xfe) == 0xfc) { \
431  Len = 6; \
432  Mask = 0x01; \
433  } \
434  else { \
435  Len = Err; /* -1 is the typical error value or 1 to skip */ \
436  } \
437  (void)0
438 
439 /* same as glib define but added an 'Err' arg */
440 #define UTF8_GET(Result, Chars, Count, Mask, Len, Err) \
441  (Result) = (Chars)[0] & (Mask); \
442  for ((Count) = 1; (Count) < (Len); ++(Count)) { \
443  if (((Chars)[(Count)] & 0xc0) != 0x80) { \
444  (Result) = Err; \
445  break; \
446  } \
447  (Result) <<= 6; \
448  (Result) |= ((Chars)[(Count)] & 0x3f); \
449  } \
450  (void)0
451 
452 int BLI_str_utf8_size(const char *p)
453 {
454  /* NOTE: uses glib functions but not from GLIB. */
455 
456  int mask = 0, len;
457  const unsigned char c = (unsigned char)*p;
458 
459  UTF8_COMPUTE(c, mask, len, -1);
460 
461  (void)mask; /* quiet warning */
462 
463  return len;
464 }
465 
466 int BLI_str_utf8_size_safe(const char *p)
467 {
468  int mask = 0, len;
469  const unsigned char c = (unsigned char)*p;
470 
471  UTF8_COMPUTE(c, mask, len, 1);
472 
473  (void)mask; /* quiet warning */
474 
475  return len;
476 }
477 
479 {
480  /* Originally `g_utf8_get_char` in GLIB. */
481 
482  int i, len;
483  uint mask = 0;
484  uint result;
485  const unsigned char c = (unsigned char)*p;
486 
487  UTF8_COMPUTE(c, mask, len, -1);
488  if (UNLIKELY(len == -1)) {
489  return BLI_UTF8_ERR;
490  }
491  UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
492 
493  return result;
494 }
495 
497  const size_t p_len,
498  size_t *__restrict index)
499 {
500  int i, len;
501  uint mask = 0;
502  uint result;
503  const unsigned char c = (unsigned char)*(p += *index);
504 
505  BLI_assert(*index < p_len);
506  BLI_assert(c != '\0');
507 
508  UTF8_COMPUTE(c, mask, len, -1);
509  if (UNLIKELY(len == -1) || (*index + (size_t)len > p_len)) {
510  return BLI_UTF8_ERR;
511  }
512  UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
513  if (UNLIKELY(result == BLI_UTF8_ERR)) {
514  return BLI_UTF8_ERR;
515  }
516  *index += (size_t)len;
517  BLI_assert(*index <= p_len);
518  return result;
519 }
520 
521 uint BLI_str_utf8_as_unicode_step(const char *__restrict p,
522  const size_t p_len,
523  size_t *__restrict index)
524 {
526  if (UNLIKELY(result == BLI_UTF8_ERR)) {
527  result = (uint)p[*index];
528  *index += 1;
529  }
530  BLI_assert(*index <= p_len);
531  return result;
532 }
533 
534 /* was g_unichar_to_utf8 */
535 
536 #define UTF8_VARS_FROM_CHAR32(Char, First, Len) \
537  if (Char < 0x80) { \
538  First = 0; \
539  Len = 1; \
540  } \
541  else if (Char < 0x800) { \
542  First = 0xc0; \
543  Len = 2; \
544  } \
545  else if (Char < 0x10000) { \
546  First = 0xe0; \
547  Len = 3; \
548  } \
549  else if (Char < 0x200000) { \
550  First = 0xf0; \
551  Len = 4; \
552  } \
553  else if (Char < 0x4000000) { \
554  First = 0xf8; \
555  Len = 5; \
556  } \
557  else { \
558  First = 0xfc; \
559  Len = 6; \
560  } \
561  (void)0
562 
564 {
565  /* If this gets modified, also update the copy in g_string_insert_unichar() */
566  uint len = 0;
567  uint first;
568 
569  UTF8_VARS_FROM_CHAR32(c, first, len);
570  (void)first;
571 
572  return len;
573 }
574 
575 size_t BLI_str_utf8_from_unicode(uint c, char *outbuf, const size_t outbuf_len)
576 
577 {
578  /* If this gets modified, also update the copy in g_string_insert_unichar() */
579  uint len = 0;
580  uint first;
581 
582  UTF8_VARS_FROM_CHAR32(c, first, len);
583 
584  if (UNLIKELY(outbuf_len < len)) {
585  /* NULL terminate instead of writing a partial byte. */
586  memset(outbuf, 0x0, outbuf_len);
587  return outbuf_len;
588  }
589 
590  for (uint i = len - 1; i > 0; i--) {
591  outbuf[i] = (c & 0x3f) | 0x80;
592  c >>= 6;
593  }
594  outbuf[0] = c | first;
595 
596  return len;
597 }
598 
599 size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w,
600  const char *__restrict src_c,
601  const size_t maxncpy)
602 {
603  const size_t maxlen = maxncpy - 1;
604  size_t len = 0;
605 
606  BLI_assert(maxncpy != 0);
607 
608 #ifdef DEBUG_STRSIZE
609  memset(dst_w, 0xff, sizeof(*dst_w) * maxncpy);
610 #endif
611 
612  const size_t src_c_len = strlen(src_c);
613  const char *src_c_end = src_c + src_c_len;
614  size_t index = 0;
615  while ((index < src_c_len) && (len != maxlen)) {
616  const uint unicode = BLI_str_utf8_as_unicode_step_or_error(src_c, src_c_len, &index);
617  if (unicode != BLI_UTF8_ERR) {
618  *dst_w = unicode;
619  }
620  else {
621  *dst_w = '?';
622  const char *src_c_next = BLI_str_find_next_char_utf8(src_c + index, src_c_end);
623  index = (size_t)(src_c_next - src_c);
624  }
625  dst_w++;
626  len++;
627  }
628 
629  *dst_w = 0;
630 
631  return len;
632 }
633 
634 size_t BLI_str_utf32_as_utf8(char *__restrict dst,
635  const char32_t *__restrict src,
636  const size_t maxncpy)
637 {
638  BLI_assert(maxncpy != 0);
639  size_t len = 0;
640 #ifdef DEBUG_STRSIZE
641  memset(dst, 0xff, sizeof(*dst) * maxncpy);
642 #endif
643  while (*src && len < maxncpy) {
644  len += BLI_str_utf8_from_unicode((uint)*src++, dst + len, maxncpy - len);
645  }
646  dst[len] = '\0';
647  /* Return the correct length when part of the final byte did not fit into the string. */
648  while ((len > 0) && UNLIKELY(dst[len - 1] == '\0')) {
649  len--;
650  }
651  return len;
652 }
653 
654 size_t BLI_str_utf32_as_utf8_len(const char32_t *src)
655 {
656  size_t len = 0;
657 
658  while (*src) {
660  }
661 
662  return len;
663 }
664 
665 const char *BLI_str_find_prev_char_utf8(const char *p, const char *str_start)
666 {
667  /* Originally `g_utf8_find_prev_char` in GLIB. */
668 
669  BLI_assert(p >= str_start);
670  if (str_start < p) {
671  for (--p; p >= str_start; p--) {
672  if ((*p & 0xc0) != 0x80) {
673  return (char *)p;
674  }
675  }
676  }
677  return p;
678 }
679 
680 const char *BLI_str_find_next_char_utf8(const char *p, const char *str_end)
681 {
682  /* Originally `g_utf8_find_next_char` in GLIB. */
683 
684  BLI_assert(p <= str_end);
685  if ((p < str_end) && (*p != '\0')) {
686  for (++p; p < str_end && (*p & 0xc0) == 0x80; p++) {
687  /* do nothing */
688  }
689  }
690  return p;
691 }
692 
693 size_t BLI_str_partition_utf8(const char *str,
694  const uint delim[],
695  const char **sep,
696  const char **suf)
697 {
698  return BLI_str_partition_ex_utf8(str, NULL, delim, sep, suf, false);
699 }
700 
701 size_t BLI_str_rpartition_utf8(const char *str,
702  const uint delim[],
703  const char **sep,
704  const char **suf)
705 {
706  return BLI_str_partition_ex_utf8(str, NULL, delim, sep, suf, true);
707 }
708 
709 size_t BLI_str_partition_ex_utf8(const char *str,
710  const char *end,
711  const uint delim[],
712  const char **sep,
713  const char **suf,
714  const bool from_right)
715 {
716  const size_t str_len = end ? (size_t)(end - str) : strlen(str);
717  if (end == NULL) {
718  end = str + str_len;
719  }
720 
721  /* Note that here, we assume end points to a valid utf8 char! */
722  BLI_assert((end >= str) && (BLI_str_utf8_as_unicode(end) != BLI_UTF8_ERR));
723 
724  *suf = (char *)(str + str_len);
725 
726  size_t index;
727  for (*sep = (char *)(from_right ? BLI_str_find_prev_char_utf8(end, str) : str), index = 0;
728  from_right ? (*sep > str) : ((*sep < end) && (**sep != '\0'));
729  *sep = (char *)(from_right ? (str != *sep ? BLI_str_find_prev_char_utf8(*sep, str) : NULL) :
730  str + index)) {
731  size_t index_ofs = 0;
732  const uint c = BLI_str_utf8_as_unicode_step_or_error(*sep, (size_t)(end - *sep), &index_ofs);
733  index += index_ofs;
734 
735  if (c == BLI_UTF8_ERR) {
736  *suf = *sep = NULL;
737  break;
738  }
739 
740  for (const uint *d = delim; *d != '\0'; d++) {
741  if (*d == c) {
742  /* *suf is already correct in case from_right is true. */
743  if (!from_right) {
744  *suf = (char *)(str + index);
745  }
746  return (size_t)(*sep - str);
747  }
748  }
749 
750  *suf = *sep; /* Useful in 'from_right' case! */
751  }
752 
753  *suf = *sep = NULL;
754  return str_len;
755 }
756 
757 /* -------------------------------------------------------------------- */
762 {
763  int index = 0, pos = 0;
764  while (pos != offset) {
766  index++;
767  }
768  return index;
769 }
770 
771 int BLI_str_utf8_offset_from_index(const char *str, int index)
772 {
773  int offset = 0, pos = 0;
774  while (pos != index) {
776  pos++;
777  }
778  return offset;
779 }
780 
782 {
783  int column = 0, pos = 0;
784  while (pos < offset) {
785  column += BLI_str_utf8_char_width_safe(str + pos);
787  }
788  return column;
789 }
790 
791 int BLI_str_utf8_offset_from_column(const char *str, int column)
792 {
793  int offset = 0, pos = 0, col;
794  while (*(str + offset) && pos < column) {
796  if (pos + col > column) {
797  break;
798  }
800  pos += col;
801  }
802  return offset;
803 }
804 
#define BLI_assert(a)
Definition: BLI_assert.h:46
#define BLI_UTF8_ERR
unsigned int uint
Definition: BLI_sys_types.h:67
#define UNLIKELY(x)
#define ELEM(...)
SyclQueue void void * src
SyclQueue void void size_t num_bytes void
int len
Definition: draw_manager.c:108
#define str(s)
uint pos
uint col
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int offset
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)
Definition: math_float4.h:513
static unsigned c
Definition: RandGen.cpp:83
T length(const vec_base< T, Size > &a)
#define UTF8_COMPUTE(Char, Mask, Len, Err)
Definition: string_utf8.c:409
ptrdiff_t BLI_str_utf8_invalid_byte(const char *str, size_t length)
Definition: string_utf8.c:46
int BLI_wcwidth(char32_t ucs)
Definition: string_utf8.c:364
char * BLI_strncpy_utf8(char *__restrict dst, const char *__restrict src, size_t maxncpy)
Definition: string_utf8.c:236
size_t BLI_str_utf8_from_unicode_len(const uint c)
Definition: string_utf8.c:563
size_t BLI_strncpy_utf8_rlen(char *__restrict dst, const char *__restrict src, size_t maxncpy)
Definition: string_utf8.c:252
size_t BLI_strlen_utf8_ex(const char *strc, size_t *r_len_bytes)
Definition: string_utf8.c:304
size_t BLI_strnlen_utf8_ex(const char *strc, const size_t maxlen, size_t *r_len_bytes)
Definition: string_utf8.c:323
size_t BLI_str_partition_ex_utf8(const char *str, const char *end, const uint delim[], const char **sep, const char **suf, const bool from_right)
Definition: string_utf8.c:709
size_t BLI_strnlen_utf8(const char *strc, const size_t maxlen)
Definition: string_utf8.c:342
size_t BLI_str_utf32_as_utf8(char *__restrict dst, const char32_t *__restrict src, const size_t maxncpy)
Definition: string_utf8.c:634
int BLI_str_utf8_offset_from_column(const char *str, int column)
Definition: string_utf8.c:791
size_t BLI_strlen_utf8(const char *strc)
Definition: string_utf8.c:317
#define UTF8_VARS_FROM_CHAR32(Char, First, Len)
Definition: string_utf8.c:536
size_t BLI_str_utf32_as_utf8_len(const char32_t *src)
Definition: string_utf8.c:654
#define BLI_STR_UTF8_CPY(dst, src, maxncpy)
Definition: string_utf8.c:207
const char * BLI_str_find_next_char_utf8(const char *p, const char *str_end)
Definition: string_utf8.c:680
static const size_t utf8_skip_data[256]
Definition: string_utf8.c:35
const char * BLI_str_find_prev_char_utf8(const char *p, const char *str_start)
Definition: string_utf8.c:665
size_t BLI_strncpy_wchar_as_utf8(char *__restrict dst, const wchar_t *__restrict src, const size_t maxncpy)
Definition: string_utf8.c:273
int BLI_str_utf8_offset_to_index(const char *str, int offset)
Definition: string_utf8.c:761
size_t BLI_str_partition_utf8(const char *str, const uint delim[], const char **sep, const char **suf)
Definition: string_utf8.c:693
size_t BLI_wstrlen_utf8(const wchar_t *src)
Definition: string_utf8.c:293
int BLI_str_utf8_char_width(const char *p)
Definition: string_utf8.c:378
size_t BLI_str_rpartition_utf8(const char *str, const uint delim[], const char **sep, const char **suf)
Definition: string_utf8.c:701
#define UTF8_GET(Result, Chars, Count, Mask, Len, Err)
Definition: string_utf8.c:440
int BLI_wcswidth(const char32_t *pwcs, size_t n)
Definition: string_utf8.c:373
int BLI_str_utf8_size(const char *p)
Definition: string_utf8.c:452
int BLI_str_utf8_invalid_strip(char *str, size_t length)
Definition: string_utf8.c:181
uint BLI_str_utf8_as_unicode(const char *p)
Definition: string_utf8.c:478
int BLI_str_utf8_size_safe(const char *p)
Definition: string_utf8.c:466
uint BLI_str_utf8_as_unicode_step(const char *__restrict p, const size_t p_len, size_t *__restrict index)
Definition: string_utf8.c:521
int BLI_str_utf8_offset_to_column(const char *str, int offset)
Definition: string_utf8.c:781
uint BLI_str_utf8_as_unicode_step_or_error(const char *__restrict p, const size_t p_len, size_t *__restrict index)
Definition: string_utf8.c:496
size_t BLI_str_utf8_from_unicode(uint c, char *outbuf, const size_t outbuf_len)
Definition: string_utf8.c:575
int BLI_str_utf8_char_width_safe(const char *p)
Definition: string_utf8.c:388
size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w, const char *__restrict src_c, const size_t maxncpy)
Definition: string_utf8.c:599
size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w, const char *__restrict src_c, const size_t maxncpy)
Definition: string_utf8.c:348
int BLI_str_utf8_offset_from_index(const char *str, int index)
Definition: string_utf8.c:771
int conv_utf_8_to_16(const char *in8, wchar_t *out16, size_t size16)
Definition: utfconv.c:181