#!/usr/bin/env python # SOSID - Switch On String Identifier for C/C++ # # Copyright (C) 2007 Tim Janik # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # If you have not received a copy of the GNU General Public License # along with this program, see: http://www.gnu.org/licenses/ import sys, re, itertools # Usage: sosid.py [output-filename] < c_or_cpp_source VERSION="SOSID-0.4" # create output file if len (sys.argv) >= 2: fout_name = sys.argv[1] fout = open (fout_name, 'w') else: fout = sys.stdout # mark territory ;) print >>fout, "/* Switch On String code generated by", VERSION, "*/" # collect all string identifiers from input stream string_list = re.findall (r'\bSOSID\s*\(\s*([a-zA-Z0-9_]+)\s*\)', sys.stdin.read()) # sort the keys so that a binary search can use 2-stage comparisons where the # first order cmp is case insensitive, and the second order cmp can be case # sensitive if needed. this allows us to use a single structure (SOSID_STRINGS) # for case sensitive and case insensitive matches. def case2cmp (a, b): # make case a second order key r = cmp (a.lower(), b.lower()) if r == 0: r = cmp (a, b) return r string_list.sort (cmp = case2cmp) # remove duplicate symbols that CPP would barf on string_list = [k for k,g in itertools.groupby (string_list)] # SOSID_STRINGS print >>fout, r'#define SOSID_N_STRINGS (%u)' % len (string_list) print >>fout, r'#define SOSID_STRINGS ("\000" ', for s in string_list: print >>fout, '"' + s + r'\000"', print >>fout, ")" # SOSID_OFFSETS ofs = 1 print >>fout, r'#define SOSID_OFFSETS { %u,' % ofs, for s in string_list: ofs += len (s) + 1 print >>fout, '%d,' % ofs, print >>fout, '}' # SOSID__* ofs = 1 for s in string_list: print >>fout, '#define SOSID__%s' % s, ' ' * max (0, 23 - len (s)), '(%u)' % ofs ofs += len (s) + 1 # boilerplate print >>fout, r""" #define SOSID(identifier/**/) SOSID__ ## identifier #define SOSID_LOOKUP(STRING) SOSID_LOOKUP__INTERNAL (STRING, 0) #define SOSID_LOOKUP_ICASE(STRING) SOSID_LOOKUP__INTERNAL (STRING, 1) #define SOSID_LOOKUP__INTERNAL(STRING,ICASE) ({ \ const char *sosid_haystack = SOSID_STRINGS, *sosid__needle = STRING; \ const int sosid_offsets[] = SOSID_OFFSETS; \ int sosid_n_elements = SOSID_N_STRINGS, sosid_offset = 0, sosid_result = 0; \ while (sosid_offset < sosid_n_elements) \ { \ int sosid_i = (sosid_offset + sosid_n_elements) >> 1; \ const char *sosid_current = sosid_haystack + sosid_offsets[sosid_i]; \ int sosid_cmp = strcasecmp (sosid__needle, sosid_current); \ if (!ICASE && sosid_cmp == 0) \ sosid_cmp = strcmp (sosid__needle, sosid_current); \ if (sosid_cmp == 0) \ { sosid_result = sosid_offsets[sosid_i]; break; } \ else if (sosid_cmp < 0) \ sosid_n_elements = sosid_i; \ else /* (sosid_cmp > 0) */ \ sosid_offset = sosid_i + 1; \ } \ sosid_result; \ }) """ # clean up (and announce output file) if fout != sys.stdout: fout.close() print fout_name