Package Martel :: Module msre_constants
[hide private]
[frames] | no frames]

Source Code for Module Martel.msre_constants

  1  # 
  2  # Secret Labs' Regular Expression Engine 
  3  # 
  4  # various symbols used by the regular expression engine. 
  5  # run this script to update the _sre include files! 
  6  # 
  7  # Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved. 
  8  # 
  9  # See the sre.py file in Python 2.1 for information on usage and 
 10  # redistribution. 
 11  # 
 12  # Changes for Martel Copyright 2000-2001 by Dalke Scientific Software, LLC 
 13  # Distributed under the Biopython License Agreement (see the LICENSE file). 
 14   
 15  # update when constants are added or removed 
 16   
 17  MAGIC = 20010320 
 18   
 19  # max code word in this release 
 20   
 21  MAXREPEAT = 65535 
 22   
 23  # SRE standard exception (access as sre.error) 
 24  # should this really be here? 
 25   
26 -class error(Exception):
27 pass
28 29 # operators 30 31 FAILURE = "failure" 32 SUCCESS = "success" 33 34 ANY = "any" 35 ANY_ALL = "any_all" 36 ASSERT = "assert" 37 ASSERT_NOT = "assert_not" 38 AT = "at" 39 BRANCH = "branch" 40 CALL = "call" 41 CATEGORY = "category" 42 CHARSET = "charset" 43 GROUPREF = "groupref" 44 GROUPREF_IGNORE = "groupref_ignore" 45 IN = "in" 46 IN_IGNORE = "in_ignore" 47 INFO = "info" 48 JUMP = "jump" 49 LITERAL = "literal" 50 LITERAL_IGNORE = "literal_ignore" 51 MARK = "mark" 52 MAX_REPEAT = "max_repeat" 53 MAX_UNTIL = "max_until" 54 MIN_REPEAT = "min_repeat" 55 MIN_UNTIL = "min_until" 56 NEGATE = "negate" 57 NEWLINE = "newline" # Martel specific extension 58 NOT_LITERAL = "not_literal" 59 NOT_LITERAL_IGNORE = "not_literal_ignore" 60 RANGE = "range" 61 REPEAT = "repeat" 62 REPEAT_ONE = "repeat_one" 63 SUBPATTERN = "subpattern" 64 65 # positions 66 AT_BEGINNING = "at_beginning" 67 AT_BEGINNING_LINE = "at_beginning_line" 68 AT_BEGINNING_STRING = "at_beginning_string" 69 AT_BOUNDARY = "at_boundary" 70 AT_NON_BOUNDARY = "at_non_boundary" 71 AT_END = "at_end" 72 AT_END_LINE = "at_end_line" 73 AT_END_STRING = "at_end_string" 74 AT_LOC_BOUNDARY = "at_loc_boundary" 75 AT_LOC_NON_BOUNDARY = "at_loc_non_boundary" 76 AT_UNI_BOUNDARY = "at_uni_boundary" 77 AT_UNI_NON_BOUNDARY = "at_uni_non_boundary" 78 79 # categories 80 CATEGORY_DIGIT = "category_digit" 81 CATEGORY_NOT_DIGIT = "category_not_digit" 82 CATEGORY_SPACE = "category_space" 83 CATEGORY_NOT_SPACE = "category_not_space" 84 CATEGORY_WORD = "category_word" 85 CATEGORY_NOT_WORD = "category_not_word" 86 CATEGORY_LINEBREAK = "category_linebreak" 87 CATEGORY_NOT_LINEBREAK = "category_not_linebreak" 88 CATEGORY_LOC_WORD = "category_loc_word" 89 CATEGORY_LOC_NOT_WORD = "category_loc_not_word" 90 CATEGORY_UNI_DIGIT = "category_uni_digit" 91 CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit" 92 CATEGORY_UNI_SPACE = "category_uni_space" 93 CATEGORY_UNI_NOT_SPACE = "category_uni_not_space" 94 CATEGORY_UNI_WORD = "category_uni_word" 95 CATEGORY_UNI_NOT_WORD = "category_uni_not_word" 96 CATEGORY_UNI_LINEBREAK = "category_uni_linebreak" 97 CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak" 98 99 CATEGORY_NEWLINE = "category_newline" # Martel specific extension 100 101 OPCODES = [ 102 103 # failure=0 success=1 (just because it looks better that way :-) 104 FAILURE, SUCCESS, 105 106 ANY, ANY_ALL, 107 ASSERT, ASSERT_NOT, 108 AT, 109 BRANCH, 110 CALL, 111 CATEGORY, 112 CHARSET, 113 GROUPREF, GROUPREF_IGNORE, 114 IN, IN_IGNORE, 115 INFO, 116 JUMP, 117 LITERAL, LITERAL_IGNORE, 118 MARK, 119 MAX_UNTIL, 120 MIN_UNTIL, 121 NOT_LITERAL, NOT_LITERAL_IGNORE, 122 NEGATE, 123 RANGE, 124 REPEAT, 125 REPEAT_ONE, 126 SUBPATTERN 127 128 ] 129 130 ATCODES = [ 131 AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY, 132 AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING, 133 AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY, 134 AT_UNI_NON_BOUNDARY 135 ] 136 137 CHCODES = [ 138 CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE, 139 CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD, 140 CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD, 141 CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT, 142 CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD, 143 CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK, 144 CATEGORY_UNI_NOT_LINEBREAK 145 ] 146
147 -def makedict(list):
148 d = {} 149 i = 0 150 for item in list: 151 d[item] = i 152 i = i + 1 153 return d
154 155 OPCODES = makedict(OPCODES) 156 ATCODES = makedict(ATCODES) 157 CHCODES = makedict(CHCODES) 158 159 # replacement operations for "ignore case" mode 160 OP_IGNORE = { 161 GROUPREF: GROUPREF_IGNORE, 162 IN: IN_IGNORE, 163 LITERAL: LITERAL_IGNORE, 164 NOT_LITERAL: NOT_LITERAL_IGNORE 165 } 166 167 AT_MULTILINE = { 168 AT_BEGINNING: AT_BEGINNING_LINE, 169 AT_END: AT_END_LINE 170 } 171 172 AT_LOCALE = { 173 AT_BOUNDARY: AT_LOC_BOUNDARY, 174 AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY 175 } 176 177 AT_UNICODE = { 178 AT_BOUNDARY: AT_UNI_BOUNDARY, 179 AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY 180 } 181 182 CH_LOCALE = { 183 CATEGORY_DIGIT: CATEGORY_DIGIT, 184 CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT, 185 CATEGORY_SPACE: CATEGORY_SPACE, 186 CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE, 187 CATEGORY_WORD: CATEGORY_LOC_WORD, 188 CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD, 189 CATEGORY_LINEBREAK: CATEGORY_LINEBREAK, 190 CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK 191 } 192 193 CH_UNICODE = { 194 CATEGORY_DIGIT: CATEGORY_UNI_DIGIT, 195 CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT, 196 CATEGORY_SPACE: CATEGORY_UNI_SPACE, 197 CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE, 198 CATEGORY_WORD: CATEGORY_UNI_WORD, 199 CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD, 200 CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK, 201 CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK 202 } 203 204 # flags 205 SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking) 206 SRE_FLAG_IGNORECASE = 2 # case insensitive 207 SRE_FLAG_LOCALE = 4 # honour system locale 208 SRE_FLAG_MULTILINE = 8 # treat target as multiline string 209 SRE_FLAG_DOTALL = 16 # treat target as a single string 210 SRE_FLAG_UNICODE = 32 # use unicode locale 211 SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments 212 SRE_FLAG_DEBUG = 128 # debugging 213 214 # flags for INFO primitive 215 SRE_INFO_PREFIX = 1 # has prefix 216 SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) 217 SRE_INFO_CHARSET = 4 # pattern starts with character from given set 218 219 if __name__ == "__main__": 220 raise NotImplementedError("Not supported by Martel's modified sre_constants") 221 import string
222 - def dump(f, d, prefix):
223 items = d.items() 224 items.sort(lambda a, b: cmp(a[1], b[1])) 225 for k, v in items: 226 f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
227 f = open("sre_constants.h", "w") 228 f.write("""\ 229 /* 230 * Secret Labs' Regular Expression Engine 231 * 232 * regular expression matching engine 233 * 234 * NOTE: This file is generated by sre_constants.py. If you need 235 * to change anything in here, edit sre_constants.py and run it. 236 * 237 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. 238 * 239 * See the _sre.c file for information on usage and redistribution. 240 */ 241 242 """) 243 244 f.write("#define SRE_MAGIC %d\n" % MAGIC) 245 246 dump(f, OPCODES, "SRE_OP") 247 dump(f, ATCODES, "SRE") 248 dump(f, CHCODES, "SRE") 249 250 f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE) 251 f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE) 252 f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE) 253 f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE) 254 f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL) 255 f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE) 256 f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE) 257 258 f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX) 259 f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL) 260 f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET) 261 262 f.close() 263 print "done" 264