Blender  V3.3
string_cursor_utf8.c
Go to the documentation of this file.
1 /* SPDX-License-Identifier: GPL-2.0-or-later
2  * Copyright 2011 Blender Foundation. All rights reserved. */
3 
8 #include <stdio.h>
9 #include <stdlib.h>
10 
11 #include "BLI_string_utf8.h"
12 #include "BLI_utildefines.h"
13 
14 #include "BLI_string_cursor_utf8.h" /* own include */
15 
16 #ifdef __GNUC__
17 # pragma GCC diagnostic error "-Wsign-conversion"
18 #endif
19 
20 typedef enum eStrCursorDelimType {
30 
32 {
33  switch (uch) {
34  case ',':
35  case '.':
36  return STRCUR_DELIM_PUNCT;
37 
38  case '{':
39  case '}':
40  case '[':
41  case ']':
42  case '(':
43  case ')':
44  return STRCUR_DELIM_BRACE;
45 
46  case '+':
47  case '-':
48  case '=':
49  case '~':
50  case '%':
51  case '/':
52  case '<':
53  case '>':
54  case '^':
55  case '*':
56  case '&':
57  case '|':
58  return STRCUR_DELIM_OPERATOR;
59 
60  case '\'':
61  case '\"':
62  return STRCUR_DELIM_QUOTE;
63 
64  case ' ':
65  case '\t':
66  case '\n':
68 
69  case '\\':
70  case '@':
71  case '#':
72  case '$':
73  case ':':
74  case ';':
75  case '?':
76  case '!':
77  case 0xA3: /* pound */
78  case 0x80: /* euro */
79  /* case '_': */ /* special case, for python */
80  return STRCUR_DELIM_OTHER;
81 
82  default:
83  break;
84  }
85  return STRCUR_DELIM_ALPHANUMERIC; /* Not quite true, but ok for now */
86 }
87 
88 static eStrCursorDelimType cursor_delim_type_utf8(const char *ch_utf8,
89  const size_t ch_utf8_len,
90  const int pos)
91 {
92  /* for full unicode support we really need to have large lookup tables to figure
93  * out what's what in every possible char set - and python, glib both have these. */
94  size_t index = (size_t)pos;
95  uint uch = BLI_str_utf8_as_unicode_step_or_error(ch_utf8, ch_utf8_len, &index);
96  return cursor_delim_type_unicode(uch);
97 }
98 
99 bool BLI_str_cursor_step_next_utf8(const char *str, size_t maxlen, int *pos)
100 {
101  const char *str_end = str + (maxlen + 1);
102  const char *str_pos = str + (*pos);
103  const char *str_next = BLI_str_find_next_char_utf8(str_pos, str_end);
104  if (str_next != str_end) {
105  (*pos) += (str_next - str_pos);
106  if ((*pos) > (int)maxlen) {
107  (*pos) = (int)maxlen;
108  }
109  return true;
110  }
111 
112  return false;
113 }
114 
115 bool BLI_str_cursor_step_prev_utf8(const char *str, size_t UNUSED(maxlen), int *pos)
116 {
117  if ((*pos) > 0) {
118  const char *str_pos = str + (*pos);
119  const char *str_prev = BLI_str_find_prev_char_utf8(str_pos, str);
120  (*pos) -= (str_pos - str_prev);
121  return true;
122  }
123 
124  return false;
125 }
126 
127 void BLI_str_cursor_step_utf8(const char *str,
128  size_t maxlen,
129  int *pos,
130  eStrCursorJumpDirection direction,
132  bool use_init_step)
133 {
134  const int pos_orig = *pos;
135 
136  if (direction == STRCUR_DIR_NEXT) {
137  if (use_init_step) {
139  }
140  else {
142  }
143 
144  if (jump != STRCUR_JUMP_NONE) {
145  const eStrCursorDelimType delim_type = (*pos) < maxlen ?
146  cursor_delim_type_utf8(str, maxlen, *pos) :
148  /* jump between special characters (/,\,_,-, etc.),
149  * look at function cursor_delim_type() for complete
150  * list of special character, ctr -> */
151  while ((*pos) < maxlen) {
152  if (BLI_str_cursor_step_next_utf8(str, maxlen, pos)) {
153  if (*pos == maxlen) {
154  break;
155  }
156  if ((jump != STRCUR_JUMP_ALL) &&
157  (delim_type != cursor_delim_type_utf8(str, maxlen, *pos))) {
158  break;
159  }
160  }
161  else {
162  break; /* unlikely but just in case */
163  }
164  }
165  }
166  }
167  else if (direction == STRCUR_DIR_PREV) {
168  if (use_init_step) {
170  }
171  else {
173  }
174 
175  if (jump != STRCUR_JUMP_NONE) {
176  const eStrCursorDelimType delim_type = (*pos) > 0 ?
177  cursor_delim_type_utf8(str, maxlen, *pos - 1) :
179  /* jump between special characters (/,\,_,-, etc.),
180  * look at function cursor_delim_type() for complete
181  * list of special character, ctr -> */
182  while ((*pos) > 0) {
183  const int pos_prev = *pos;
184  if (BLI_str_cursor_step_prev_utf8(str, maxlen, pos)) {
185  if ((jump != STRCUR_JUMP_ALL) &&
186  (delim_type != cursor_delim_type_utf8(str, maxlen, *pos))) {
187  /* left only: compensate for index/change in direction */
188  if ((pos_orig - (*pos)) >= 1) {
189  *pos = pos_prev;
190  }
191  break;
192  }
193  }
194  else {
195  break;
196  }
197  }
198  }
199  }
200  else {
202  }
203 }
204 
205 /* UTF32 version of BLI_str_cursor_step_utf8 (keep in sync!)
206  * less complex since it doesn't need to do multi-byte stepping.
207  */
208 
209 /* helper funcs so we can match BLI_str_cursor_step_utf8 */
210 static bool cursor_step_next_utf32(const char32_t *UNUSED(str), size_t maxlen, int *pos)
211 {
212  if ((*pos) >= (int)maxlen) {
213  return false;
214  }
215  (*pos)++;
216  return true;
217 }
218 
219 static bool cursor_step_prev_utf32(const char32_t *UNUSED(str), size_t UNUSED(maxlen), int *pos)
220 {
221  if ((*pos) <= 0) {
222  return false;
223  }
224  (*pos)--;
225  return true;
226 }
227 
228 void BLI_str_cursor_step_utf32(const char32_t *str,
229  size_t maxlen,
230  int *pos,
231  eStrCursorJumpDirection direction,
233  bool use_init_step)
234 {
235  const int pos_orig = *pos;
236 
237  if (direction == STRCUR_DIR_NEXT) {
238  if (use_init_step) {
239  cursor_step_next_utf32(str, maxlen, pos);
240  }
241  else {
243  }
244 
245  if (jump != STRCUR_JUMP_NONE) {
246  const eStrCursorDelimType delim_type = (*pos) < maxlen ?
249  /* jump between special characters (/,\,_,-, etc.),
250  * look at function cursor_delim_type_unicode() for complete
251  * list of special character, ctr -> */
252  while ((*pos) < maxlen) {
253  if (cursor_step_next_utf32(str, maxlen, pos)) {
254  if ((jump != STRCUR_JUMP_ALL) &&
255  (delim_type != cursor_delim_type_unicode((uint)str[*pos]))) {
256  break;
257  }
258  }
259  else {
260  break; /* unlikely but just in case */
261  }
262  }
263  }
264  }
265  else if (direction == STRCUR_DIR_PREV) {
266  if (use_init_step) {
267  cursor_step_prev_utf32(str, maxlen, pos);
268  }
269  else {
271  }
272 
273  if (jump != STRCUR_JUMP_NONE) {
274  const eStrCursorDelimType delim_type = (*pos) > 0 ?
277  /* jump between special characters (/,\,_,-, etc.),
278  * look at function cursor_delim_type() for complete
279  * list of special character, ctr -> */
280  while ((*pos) > 0) {
281  const int pos_prev = *pos;
282  if (cursor_step_prev_utf32(str, maxlen, pos)) {
283  if ((jump != STRCUR_JUMP_ALL) &&
284  (delim_type != cursor_delim_type_unicode((uint)str[*pos]))) {
285  /* left only: compensate for index/change in direction */
286  if ((pos_orig - (*pos)) >= 1) {
287  *pos = pos_prev;
288  }
289  break;
290  }
291  }
292  else {
293  break;
294  }
295  }
296  }
297  }
298  else {
300  }
301 }
#define BLI_assert_unreachable()
Definition: BLI_assert.h:93
#define BLI_assert(a)
Definition: BLI_assert.h:46
eStrCursorJumpDirection
@ STRCUR_DIR_NEXT
@ STRCUR_DIR_PREV
eStrCursorJumpType
@ STRCUR_JUMP_ALL
@ STRCUR_JUMP_NONE
@ STRCUR_JUMP_DELIM
const char const char * BLI_str_find_next_char_utf8(const char *p, const char *str_end) ATTR_WARN_UNUSED_RESULT ATTR_RETURNS_NONNULL ATTR_NONNULL(1
const char * BLI_str_find_prev_char_utf8(const char *p, const char *str_start) ATTR_WARN_UNUSED_RESULT ATTR_RETURNS_NONNULL ATTR_NONNULL(1
unsigned int unsigned int BLI_str_utf8_as_unicode_step_or_error(const char *__restrict p, size_t p_len, size_t *__restrict index) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1
unsigned int uint
Definition: BLI_sys_types.h:67
#define UNUSED(x)
void jump(const btVector3 &v=btVector3(0, 0, 0))
#define str(s)
uint pos
void BLI_str_cursor_step_utf8(const char *str, size_t maxlen, int *pos, eStrCursorJumpDirection direction, eStrCursorJumpType jump, bool use_init_step)
static eStrCursorDelimType cursor_delim_type_unicode(const uint uch)
static bool cursor_step_prev_utf32(const char32_t *UNUSED(str), size_t UNUSED(maxlen), int *pos)
bool BLI_str_cursor_step_next_utf8(const char *str, size_t maxlen, int *pos)
eStrCursorDelimType
@ STRCUR_DELIM_OTHER
@ STRCUR_DELIM_BRACE
@ STRCUR_DELIM_QUOTE
@ STRCUR_DELIM_WHITESPACE
@ STRCUR_DELIM_ALPHANUMERIC
@ STRCUR_DELIM_PUNCT
@ STRCUR_DELIM_NONE
@ STRCUR_DELIM_OPERATOR
void BLI_str_cursor_step_utf32(const char32_t *str, size_t maxlen, int *pos, eStrCursorJumpDirection direction, eStrCursorJumpType jump, bool use_init_step)
static bool cursor_step_next_utf32(const char32_t *UNUSED(str), size_t maxlen, int *pos)
static eStrCursorDelimType cursor_delim_type_utf8(const char *ch_utf8, const size_t ch_utf8_len, const int pos)
bool BLI_str_cursor_step_prev_utf8(const char *str, size_t UNUSED(maxlen), int *pos)