Replace HexRaysCodeXplorer and HexraysInvertIf with HexRaysPyTools

This commit is contained in:
ecx86
2018-08-05 01:08:36 -04:00
parent 035b4ae117
commit 454ae02a36
22 changed files with 5639 additions and 0 deletions

View File

@@ -0,0 +1,339 @@
import logging
import idaapi
import idc
import Const
import Helper
import TemporaryStructure
import HexRaysPyTools.Api as Api
logger = logging.getLogger(__name__)
# If disabled then recursion will be triggered only for variable passed as first argument to function
SETTING_SCAN_ALL_ARGUMENTS = True
# Global set which is populated when deep scanning and cleared after completion
scanned_functions = set()
debug_scan_tree = []
class ScannedObject(object):
def __init__(self, name, expression_address, origin, applicable=True):
"""
:param name: Object name
:param expression_address: ea_t
:param origin: which offset had structure at scan moment
:param applicable: whether to apply type after creating structure
"""
self.name = name
self.expression_address = expression_address
self.func_ea = idc.get_func_attr(self.expression_address, idc.FUNCATTR_START)
self.origin = origin
self._applicable = applicable
@property
def function_name(self):
return idaapi.get_short_name(self.func_ea)
def apply_type(self, tinfo):
""" Finally apply Class'es tinfo to this variable """
raise NotImplemented
@staticmethod
def create(obj, expression_address, origin, applicable):
""" Creates suitable instance of ScannedObject depending on obj """
if obj.id == Api.SO_GLOBAL_OBJECT:
return ScannedGlobalObject(obj.ea, obj.name, expression_address, origin, applicable)
elif obj.id == Api.SO_LOCAL_VARIABLE:
return ScannedVariableObject(obj.lvar, obj.name, expression_address, origin, applicable)
elif obj.id in (Api.SO_STRUCT_REFERENCE, Api.SO_STRUCT_POINTER):
return ScannedStructureMemberObject(obj.struct_name, obj.offset, expression_address, origin, applicable)
else:
raise AssertionError
def to_list(self):
""" Creates list that is acceptable to MyChoose2 viewer """
return [
"0x{0:04X}".format(self.origin),
self.function_name,
self.name,
Helper.to_hex(self.expression_address)
]
def __eq__(self, other):
return self.func_ea == other.func_ea and self.name == other.name and \
self.expression_address == other.expression_address
def __hash__(self):
return hash((self.func_ea, self.name, self.expression_address))
def __repr__(self):
return "{} : {}".format(self.name, Helper.to_hex(self.expression_address))
class ScannedGlobalObject(ScannedObject):
def __init__(self, obj_ea, name, expression_address, origin, applicable=True):
super(ScannedGlobalObject, self).__init__(name, expression_address, origin, applicable)
self.__obj_ea = obj_ea
def apply_type(self, tinfo):
if self._applicable:
idaapi.set_tinfo2(self.__obj_ea, tinfo)
class ScannedVariableObject(ScannedObject):
def __init__(self, lvar, name, expression_address, origin, applicable=True):
super(ScannedVariableObject, self).__init__(name, expression_address, origin, applicable)
self.__lvar = idaapi.lvar_locator_t(lvar.location, lvar.defea)
def apply_type(self, tinfo):
if not self._applicable:
return
hx_view = idaapi.open_pseudocode(self.func_ea, -1)
if hx_view:
logger.debug("Applying tinfo to variable {0} in function {1}".format(self.name, self.function_name))
# Finding lvar of new window that have the same name that saved one and applying tinfo_t
lvar = filter(lambda x: x == self.__lvar, hx_view.cfunc.get_lvars())
if lvar:
logger.debug("Successful")
hx_view.set_lvar_type(lvar[0], tinfo)
else:
logger.warn("Failed to find previously scanned local variable {} from {}".format(
self.name, Helper.to_hex(self.expression_address)))
class ScannedStructureMemberObject(ScannedObject):
def __init__(self, struct_name, struct_offset, name, expression_address, origin, applicable=True):
super(ScannedStructureMemberObject, self).__init__(name, expression_address, origin, applicable)
self.__struct_name = struct_name
self.__struct_offset = struct_offset
def apply_type(self, tinfo):
if self._applicable:
logger.warn("Changing type of structure field is not yet implemented. Address - {}".format(
Helper.to_hex(self.expression_address)))
class SearchVisitor(Api.ObjectVisitor):
def __init__(self, cfunc, origin, obj, temporary_structure):
super(SearchVisitor, self).__init__(cfunc, obj, None, True)
self.__origin = origin
self.__temporary_structure = temporary_structure
def _manipulate(self, cexpr, obj):
super(SearchVisitor, self)._manipulate(cexpr, obj)
if obj.tinfo and not Helper.is_legal_type(obj.tinfo):
logger.warn("Variable obj.name has weird type at {}".format(Helper.to_hex(self._find_asm_address(cexpr))))
return
if cexpr.type.is_ptr():
member = self.__extract_member_from_pointer(cexpr, obj)
else:
member = self.__extract_member_from_xword(cexpr, obj)
if member:
logger.debug("\tCreating member with type {}, {}, offset - {}".format(
member.type_name, member.scanned_variables, member.offset))
self.__temporary_structure.add_row(member)
def _get_member(self, offset, cexpr, obj, tinfo=None, obj_ea=None):
if offset < 0:
logger.error("Considered to be imposible: offset - {}, obj - {}".format(
offset, Helper.to_hex(self._find_asm_address(cexpr))))
raise AssertionError
applicable = not self.crippled
cexpr_ea = self._find_asm_address(cexpr)
scan_obj = ScannedObject.create(obj, cexpr_ea, self.__origin, applicable)
if obj_ea:
if TemporaryStructure.VirtualTable.check_address(obj_ea):
return TemporaryStructure.VirtualTable(offset, obj_ea, scan_obj, self.__origin)
if Helper.is_code_ea(obj_ea):
cfunc = Api.decompile_function(obj_ea)
if cfunc:
tinfo = cfunc.type
tinfo.create_ptr(tinfo)
else:
tinfo = Const.DUMMY_FUNC
return TemporaryStructure.Member(offset, tinfo, scan_obj, self.__origin)
# logger.warn("Want to see this ea - {},".format(Helper.to_hex(cexpr_ea)))
if not tinfo or tinfo.equals_to(Const.VOID_TINFO) or tinfo.equals_to(Const.CONST_VOID_TINFO):
return TemporaryStructure.VoidMember(offset, scan_obj, self.__origin)
if tinfo.equals_to(Const.CHAR_TINFO):
return TemporaryStructure.VoidMember(offset, scan_obj, self.__origin, char=True)
if tinfo.equals_to(Const.CONST_PCHAR_TINFO):
tinfo = Const.PCHAR_TINFO
elif tinfo.equals_to(Const.CONST_PVOID_TINFO):
tinfo = Const.PVOID_TINFO
else:
tinfo.clr_const()
return TemporaryStructure.Member(offset, tinfo, scan_obj, self.__origin)
def _parse_call(self, call_cexpr, arg_cexpr, offset):
_, tinfo = Helper.get_func_argument_info(call_cexpr, arg_cexpr)
if tinfo:
return self.__deref_tinfo(tinfo)
# TODO: Find example with UTF-16 strings
return Const.CHAR_TINFO
def _parse_left_assignee(self, cexpr, offset):
pass
def __extract_member_from_pointer(self, cexpr, obj):
parents_type = map(lambda x: idaapi.get_ctype_name(x.cexpr.op), list(self.parents)[:0:-1])
parents = map(lambda x: x.cexpr, list(self.parents)[:0:-1])
logger.debug("Parsing expression {}. Parents - {}".format(obj.name, parents_type))
# Extracting offset and removing expression parents making this offset
if parents_type[0] in ('idx', 'add'):
# `obj[idx]' or `(TYPE *) + x'
if parents[0].y.op != idaapi.cot_num:
# There's no way to handle with dynamic offset
return
offset = parents[0].y.numval() * cexpr.type.get_ptrarr_objsize()
cexpr = self.parent_expr()
if parents_type[0] == 'add':
del parents_type[0]
del parents[0]
elif parents_type[0:2] == ['cast', 'add']:
# (TYPE *)obj + offset or (TYPE)obj + offset
if parents[1].y.op != idaapi.cot_num:
return
if parents[0].type.is_ptr():
size = parents[0].type.get_ptrarr_objsize()
else:
size = 1
offset = parents[1].theother(parents[0]).numval() * size
cexpr = parents[1]
del parents_type[0:2]
del parents[0:2]
else:
offset = 0
return self.__extract_member(cexpr, obj, offset, parents, parents_type)
def __extract_member_from_xword(self, cexpr, obj):
parents_type = map(lambda x: idaapi.get_ctype_name(x.cexpr.op), list(self.parents)[:0:-1])
parents = map(lambda x: x.cexpr, list(self.parents)[:0:-1])
logger.debug("Parsing expression {}. Parents - {}".format(obj.name, parents_type))
if parents_type[0] == 'add':
if parents[0].theother(cexpr).op != idaapi.cot_num:
return
offset = parents[0].theother(cexpr).numval()
cexpr = self.parent_expr()
del parents_type[0]
del parents[0]
else:
offset = 0
return self.__extract_member(cexpr, obj, offset, parents, parents_type)
def __extract_member(self, cexpr, obj, offset, parents, parents_type):
if parents_type[0] == 'cast':
default_tinfo = parents[0].type
cexpr = parents[0]
del parents_type[0]
del parents[0]
else:
default_tinfo = Const.PX_WORD_TINFO
if parents_type[0] in ('idx', 'ptr'):
if parents_type[1] == 'cast':
default_tinfo = parents[1].type
cexpr = parents[0]
del parents_type[0]
del parents[0]
else:
default_tinfo = self.__deref_tinfo(default_tinfo)
if parents_type[1] == 'asg':
if parents[1].x == parents[0]:
# *(TYPE *)(var + x) = ???
obj_ea = self.__extract_obj_ea(parents[1].y)
return self._get_member(offset, cexpr, obj, default_tinfo, obj_ea)
return self._get_member(offset, cexpr, obj, parents[1].x.type)
elif parents_type[1] == 'call':
if parents[1].x == parents[0]:
# ((type (__some_call *)(..., ..., ...)var[idx])(..., ..., ...)
# ((type (__some_call *)(..., ..., ...)*(TYPE *)(var + x))(..., ..., ...)
return self._get_member(offset, cexpr, obj, parents[0].type)
_, tinfo = Helper.get_func_argument_info(parents[1], parents[0])
if tinfo is None:
tinfo = Const.PCHAR_TINFO
return self._get_member(offset, cexpr, obj, tinfo)
return self._get_member(offset, cexpr, obj, default_tinfo)
elif parents_type[0] == 'call':
# call(..., (TYPE)(var + x), ...)
tinfo = self._parse_call(parents[0], cexpr, offset)
return self._get_member(offset, cexpr, obj, tinfo)
elif parents_type[0] == 'asg':
if parents[0].y == cexpr:
# other_obj = (TYPE) (var + offset)
self._parse_left_assignee(parents[1].x, offset)
return self._get_member(offset, cexpr, obj, self.__deref_tinfo(default_tinfo))
@staticmethod
def __extract_obj_ea(cexpr):
if cexpr.op == idaapi.cot_ref:
cexpr = cexpr.x
if cexpr.op == idaapi.cot_obj:
if cexpr.obj_ea != idaapi.BADADDR:
return cexpr.obj_ea
@staticmethod
def __deref_tinfo(tinfo):
if tinfo.is_ptr():
if tinfo.get_ptrarr_objsize() == 1:
if tinfo.equals_to(Const.PCHAR_TINFO) or tinfo.equals_to(Const.CONST_PCHAR_TINFO):
return Const.CHAR_TINFO
return None # Turns into VoidMember
return tinfo.get_pointed_object()
return tinfo
class NewShallowSearchVisitor(SearchVisitor, Api.ObjectDownwardsVisitor):
def __init__(self, cfunc, origin, obj, temporary_structure):
super(NewShallowSearchVisitor, self).__init__(cfunc, origin, obj, temporary_structure)
class NewDeepSearchVisitor(SearchVisitor, Api.RecursiveObjectDownwardsVisitor):
def __init__(self, cfunc, origin, obj, temporary_structure):
super(NewDeepSearchVisitor, self).__init__(cfunc, origin, obj, temporary_structure)
class DeepReturnVisitor(NewDeepSearchVisitor):
def __init__(self, cfunc, origin, obj, temporary_structure):
super(DeepReturnVisitor, self).__init__(cfunc, origin, obj, temporary_structure)
self.__callers_ea = Helper.get_funcs_calling_address(cfunc.entry_ea)
self.__call_obj = obj
def _start(self):
for ea in self.__callers_ea:
self._add_scan_tree_info(ea, -1)
assert self.__prepare_scanner()
def _finish(self):
if self.__prepare_scanner():
self._recursive_process()
def __prepare_scanner(self):
try:
cfunc = self.__iter_callers().next()
except StopIteration:
return False
self.prepare_new_scan(cfunc, -1, self.__call_obj)
return True
def __iter_callers(self):
for ea in self.__callers_ea:
cfunc = Api.decompile_function(ea)
if cfunc:
yield cfunc