diff options
-rwxr-xr-x | contrib/struct_endianess.py | 369 |
1 files changed, 369 insertions, 0 deletions
diff --git a/contrib/struct_endianess.py b/contrib/struct_endianess.py new file mode 100755 index 00000000..be73fbe2 --- /dev/null +++ b/contrib/struct_endianess.py @@ -0,0 +1,369 @@ +#!/usr/bin/env python3 + +'''Using mad regexes, automatically make sure that all structs with sub-byte +integers have matching big-endian definitions. The idea is to save a lot of +manual effort, and to automatically verify that there are no errors. +This script most certainly has numerous holes and shortcomings, but actually, +if you hit problems with it, rather adjust your coding style so that this +script can deal with it...''' + +import re +import sys +import codecs +import os.path + +re_struct_start = re.compile(r'^struct\s*[a-zA-Z_][a-zA-Z_0-9]*\s*{\s*$') +re_struct_end = re.compile(r'^}[^;]*;\s*$') + +re_substruct_start = re.compile(r'^\s+struct\s*{\s*$') +re_substruct_end = re.compile(r'^\s+}\s*([^;]*\s)[a-zA-Z_][a-zA-Z_0-9]*\s*;\s*$') + +re_int_def = re.compile(r'(^\s*((const|unsigned|signed|char|int|long|int[0-9]+_t|uint[0-9]_t)\s+)+\s*)([^;]*;)', + re.DOTALL | re.MULTILINE) +re_int_members = re.compile(r'([a-zA-Z_][a-zA-Z_0-9]*|[a-zA-Z_][a-zA-Z_0-9]*\s*:\s*[0-9]+)\s*[,;]\s*', re.DOTALL | re.MULTILINE) + +re_little_endian_ifdef = re.compile(r'#\s*(if|elif)\s+OSMO_IS_LITTLE_ENDIAN\s*(==\s*1\s*|)'); +re_big_endian_ifdef = re.compile(r'#\s*(if|elif)\s+OSMO_IS_BIG_ENDIAN\s*'); +re_else = re.compile(r'#\s*else\s*'); +re_endif = re.compile(r'#\s*endif\s*'); + +re_c_comment = re.compile(r'(/\*[^*]+\*/|//.?$)') + +def remove_c_comments(code_str): + return ''.join(re_c_comment.split(code_str)[::2]) + +def section_struct_body(struct_body_lines): + '''divide a top-level-struct body into sections of + ['arbitrary string', ['body;\n', 'lines;\n'], 'arbitrary string', ...] + Aim: handle each sub-struct on its own, and if there already are ifdefs for + little and big endian, keep just the little endian bit and derive big + endian from it. + An arbitrary string is anything other than struct member definitions, like + a 'struct {', '} sub_name;', ... + "body lines" are lines that define struct members (possibly with comments). + Return: list of alternate arbitrary strings and variable definitions. + ''' + + # these globals are needed so that end_def() can change them from inside + # the function. Not very nice style, but easiest implementation. + global struct_body_parts + global arbitrary_part + global def_part + + struct_body_parts = [] + arbitrary_part = [] + def_part = [] + + def end_def(): + '''if there is any content, flush out recorded parts (def_part, + arbitrary_part) and start a new part. In short, cut a section + boundary.''' + global struct_body_parts + global arbitrary_part + global def_part + + if def_part: + struct_body_parts.append(arbitrary_part) + arbitrary_part = [] + struct_body_parts.append(def_part) + def_part = [] + + j = 0 + while j < len(struct_body_lines): + line = struct_body_lines[j] + + if (re_substruct_start.fullmatch(line) + or re_substruct_end.fullmatch(line)): + end_def() + arbitrary_part.append(line) + j += 1 + continue + + if re_big_endian_ifdef.fullmatch(line): + end_def() + # discard big endian section + j += 1 + while j < len(struct_body_lines): + line = struct_body_lines[j] + if re_endif.fullmatch(line): + end_def() + j += 1 + break; + if re_little_endian_ifdef.fullmatch(line): + end_def() + # keep that start of little endian section, not j++ + break; + if re_else.fullmatch(line): + # there's an '#else' after big-endian. Shim a little-endian header in just for the loop. + struct_body_lines[j] = '#if OSMO_IS_LITTLE_ENDIAN\n' + break; + j += 1 + continue + + if re_little_endian_ifdef.fullmatch(line): + end_def() + j += 1 + while j < len(struct_body_lines): + line = struct_body_lines[j] + if re_endif.fullmatch(line): + end_def() + j += 1 + break; + if re_big_endian_ifdef.fullmatch(line): + end_def() + # keep that start of big endian section, not j++ + break; + if re_else.fullmatch(line): + # there's an '#else' after little-endian. Shim a big-endian header in just for the loop. + struct_body_lines[j] = '#if OSMO_IS_BIG_ENDIAN\n' + break; + def_part.append(line) + j += 1 + + continue + + def_part.append(line) + j += 1 + + # flush the last section remaining that didn't see an explicit end + end_def() + # end_def() only flushes arbitrary_part if there was a def_part, so: + if arbitrary_part: + struct_body_parts.append(arbitrary_part) + + return struct_body_parts + +def struct_body_to_big_endian(body_str): + '''Input: a multi-line string containing the body of a struct, i.e. without + sub-structs and without #if OSMO_IS_BIG_ENDIAN. like + + '\tconst char *foo;\n\tuint8_t moo:3, goo:2;\n\tuint8_t loo:3;\n\tvoid *baz;\n' + + Return None to indicate that there is no little/big endian split + required, or return a multi-line string of the big-endian version of this + same struct body, where sub-byte ints are reversed at byte boundaries, and + all others are copied 1:1. If there are no sub-byte integers, return None, + to indicate that there is no little/big endian split required.''' + + # kick comments out of the code analysis. They will end up being stripped + # from big-endian only. + body_str = remove_c_comments(body_str) + + def_strs = body_str.split(';') + def_strs = ('%s;' % def_str for def_str in def_strs if def_str.strip()) + + # classify defs as containing sub-byte members or not + # defs = [ (true, 'uint8_t ', ('foo:3', 'bar:5')), + # (false, 'int baz;'),...] + defs = [] + any_sub_byte_ints = False + for one_def in def_strs: + + # does it have sub-string integers? + int_def = re_int_def.fullmatch(one_def) + if not int_def: + # not even a number, same for big and little endian + defs.append((False, one_def)) + continue + + int_type = int_def.group(1) + members_str = int_def.groups()[-1] + has_sub_byte_ints = False + + members = [] + for int_member in re_int_members.finditer(members_str): + member = int_member.group(1) + members.append(member) + if ':' in member: + has_sub_byte_ints = True + + if not has_sub_byte_ints: + defs.append((False, one_def)) + else: + defs.append((True, one_def, int_type, members)) + any_sub_byte_ints = True + + if not any_sub_byte_ints: + return None + + # now the interesting part, go over the defs, and reverse the sub-byte ints + # at byte boundaries. + + i = 0 + got_bits = 0 + byte_type = None + members_within_a_byte = [] + big_endian_defs = [] + + big_defs = [] + for classified_def in defs: + has_sub_byte_ints = classified_def[0] + + # now the big endian part + if has_sub_byte_ints: + _, one_def, int_type, members = classified_def + + if byte_type and byte_type.strip() != int_type.strip(): + raise Exception('mismatching type continuation after incomplete byte: %r %r to %r' + % (byte_type, members_within_a_byte, int_type)) + byte_type = int_type + + for member in members: + member_name, bits_str = member.split(':') + member_name = member_name.strip() + bits = int(bits_str) + member = '%s:%d' % (member_name, bits) + members_within_a_byte.append(member) + got_bits += bits + + if got_bits == 8: + # reverse these. + big_endian_defs.append('%s%s;' % (byte_type, ', '.join(reversed(members_within_a_byte)))) + members_within_a_byte = [] + byte_type = None + got_bits = 0 + + elif got_bits > 8: + raise Exception('sub-byte int breaks clean byte bounds: %s -- %d + %d = %d bits' + % (member, got_bits - bits, bits, got_bits)) + + elif not has_sub_byte_ints: + if got_bits: + raise Exception('sub-byte members do not add up to clean byte bounds: %r' % members_within_a_byte) + + big_endian_defs.append(classified_def[1]) + + # strip empty lines + lines = [l for l in (''.join(big_endian_defs).split('\n')) if l.strip()] + # clean lines' whitespace errors we might have taken in with the type names + for i in range(len(lines)): + line = lines[i] + while len(line) and line[-1] in ' \t': + line = line[:-1] + lines[i] = line + return '\n'.join(lines) + +def handle_struct_body(body_str): + + big_endian_body_str = struct_body_to_big_endian(body_str) + + if big_endian_body_str: + new_lines = ['#if OSMO_IS_LITTLE_ENDIAN\n'] + new_lines.append(body_str) + new_lines.append('#elif OSMO_IS_BIG_ENDIAN\n' + '/* auto-generated from the little endian part above (libosmocore/contrib/struct_endianess.py) */\n') + new_lines.append(big_endian_body_str) + new_lines.append('\n#endif\n') + return ''.join(new_lines) + else: + return body_str + +def _check_file(f): + if not (f.endswith('.h') or f.endswith('.c') or f.endswith('.cpp')): + return + + # section the file into + # [ ["no struct def"], ["struct {...};"], ["no struct def"], ... ] + sections = [] + in_struct = False + buf = [] + for line in codecs.open(f, "r", "utf-8").readlines(): + + if not in_struct and re_struct_start.fullmatch(line): + # flush whatever might still be in buf from before + sections.append(buf) + # start an in_struct section + buf = [line] + in_struct = True + elif in_struct and re_struct_end.fullmatch(line): + # add this end to the in_struct section and then start a non-struct section + buf.append(line) + sections.append(buf) + in_struct = False + buf = [] + else: + buf.append(line) + # flush any leftovers in buf + if buf: + sections.append(buf) + + # examine each struct, i.e. every second item in 'sections' + for i in range(len(sections)): + if not (i & 1): + continue + + struct = sections[i] + + # If the struct isn't packed, we need not bother. + # The practical use of this: in some structs we have booleans in the + # form of + # integer flag:1; + # and these don't add up to bytes, and cause errors. So let's skip all + # non-packed structs, then all of those are out of the picture. + if not 'packed' in struct[-1]: + continue + + try: + + # assume the 'struct foo {' is on the first line, the closing brace + # '} __attribute...;' on the last, and the rest are individual + # definitions split by ';'. + struct_body_lines = struct[1:-1] + struct_body_parts = section_struct_body(struct_body_lines) + + new_struct_body_parts = [] + for j in range(len(struct_body_parts)): + part = ''.join(struct_body_parts[j]) + if not (j & 1): + new_struct_body_parts.append(part) + else: + new_struct_body_parts.append(handle_struct_body(part)) + + new_struct = [struct[0], ''.join(new_struct_body_parts), struct[-1]] + sections[i] = new_struct + except Exception as e: + raise Exception('ERROR in struct %r' % struct[0]) + + # phew. result. + result = ''.join((''.join(s) for s in sections)) + + # see if osmocom/core/endian.h is needed and included. + if (not f.endswith('endian.h') + and 'OSMO_IS_LITTLE_ENDIAN' in result + and '#include <osmocom/core/endian.h>' not in result): + # add the include after the last 'osmocom/core' include + last_include_start = result.rfind('#include <osmocom/core/') + if last_include_start < 0: + last_include_start = result.rfind('#include <osmocom/') + if last_include_start < 0: + last_include_start = result.rfind('#include') + + if last_include_start < 0: + raise Exception('do not know where to include osmocom/core/endian.h in %r' % f) + + insert_at = result.find('\n', last_include_start) + + result = result[:insert_at] + '\n#include <osmocom/core/endian.h>' + result[insert_at:] + + with codecs.open(f, "w", "utf-8") as fd: + fd.write(result) + +def check_file(f): + try: + _check_file(f) + except Exception as e: + raise Exception('ERROR IN FILE %r' % f) + +args = sys.argv[1:] +if not args: + args = ['.'] + +for f in args: + if os.path.isdir(f): + for parent_path, subdirs, files in os.walk(f, None, None): + for ff in files: + check_file(os.path.join(parent_path, ff)) + else: + check_file(f) + +# vim: tabstop=4 shiftwidth=4 expandtab |