340 lines
14 KiB
Python
340 lines
14 KiB
Python
import logging
|
|
import idaapi
|
|
import idc
|
|
import Const
|
|
import Helper
|
|
import TemporaryStructure
|
|
import HexRaysPyTools.Api as Api
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# If disabled then recursion will be triggered only for variable passed as first argument to function
|
|
SETTING_SCAN_ALL_ARGUMENTS = True
|
|
|
|
# Global set which is populated when deep scanning and cleared after completion
|
|
scanned_functions = set()
|
|
debug_scan_tree = []
|
|
|
|
|
|
class ScannedObject(object):
|
|
def __init__(self, name, expression_address, origin, applicable=True):
|
|
"""
|
|
:param name: Object name
|
|
:param expression_address: ea_t
|
|
:param origin: which offset had structure at scan moment
|
|
:param applicable: whether to apply type after creating structure
|
|
"""
|
|
self.name = name
|
|
self.expression_address = expression_address
|
|
self.func_ea = idc.get_func_attr(self.expression_address, idc.FUNCATTR_START)
|
|
self.origin = origin
|
|
self._applicable = applicable
|
|
|
|
@property
|
|
def function_name(self):
|
|
return idaapi.get_short_name(self.func_ea)
|
|
|
|
def apply_type(self, tinfo):
|
|
""" Finally apply Class'es tinfo to this variable """
|
|
raise NotImplemented
|
|
|
|
@staticmethod
|
|
def create(obj, expression_address, origin, applicable):
|
|
""" Creates suitable instance of ScannedObject depending on obj """
|
|
if obj.id == Api.SO_GLOBAL_OBJECT:
|
|
return ScannedGlobalObject(obj.ea, obj.name, expression_address, origin, applicable)
|
|
elif obj.id == Api.SO_LOCAL_VARIABLE:
|
|
return ScannedVariableObject(obj.lvar, obj.name, expression_address, origin, applicable)
|
|
elif obj.id in (Api.SO_STRUCT_REFERENCE, Api.SO_STRUCT_POINTER):
|
|
return ScannedStructureMemberObject(obj.struct_name, obj.offset, expression_address, origin, applicable)
|
|
else:
|
|
raise AssertionError
|
|
|
|
def to_list(self):
|
|
""" Creates list that is acceptable to MyChoose2 viewer """
|
|
return [
|
|
"0x{0:04X}".format(self.origin),
|
|
self.function_name,
|
|
self.name,
|
|
Helper.to_hex(self.expression_address)
|
|
]
|
|
|
|
def __eq__(self, other):
|
|
return self.func_ea == other.func_ea and self.name == other.name and \
|
|
self.expression_address == other.expression_address
|
|
|
|
def __hash__(self):
|
|
return hash((self.func_ea, self.name, self.expression_address))
|
|
|
|
def __repr__(self):
|
|
return "{} : {}".format(self.name, Helper.to_hex(self.expression_address))
|
|
|
|
|
|
class ScannedGlobalObject(ScannedObject):
|
|
def __init__(self, obj_ea, name, expression_address, origin, applicable=True):
|
|
super(ScannedGlobalObject, self).__init__(name, expression_address, origin, applicable)
|
|
self.__obj_ea = obj_ea
|
|
|
|
def apply_type(self, tinfo):
|
|
if self._applicable:
|
|
idaapi.set_tinfo2(self.__obj_ea, tinfo)
|
|
|
|
|
|
class ScannedVariableObject(ScannedObject):
|
|
def __init__(self, lvar, name, expression_address, origin, applicable=True):
|
|
super(ScannedVariableObject, self).__init__(name, expression_address, origin, applicable)
|
|
self.__lvar = idaapi.lvar_locator_t(lvar.location, lvar.defea)
|
|
|
|
def apply_type(self, tinfo):
|
|
if not self._applicable:
|
|
return
|
|
|
|
hx_view = idaapi.open_pseudocode(self.func_ea, -1)
|
|
if hx_view:
|
|
logger.debug("Applying tinfo to variable {0} in function {1}".format(self.name, self.function_name))
|
|
# Finding lvar of new window that have the same name that saved one and applying tinfo_t
|
|
lvar = filter(lambda x: x == self.__lvar, hx_view.cfunc.get_lvars())
|
|
if lvar:
|
|
logger.debug("Successful")
|
|
hx_view.set_lvar_type(lvar[0], tinfo)
|
|
else:
|
|
logger.warn("Failed to find previously scanned local variable {} from {}".format(
|
|
self.name, Helper.to_hex(self.expression_address)))
|
|
|
|
|
|
class ScannedStructureMemberObject(ScannedObject):
|
|
def __init__(self, struct_name, struct_offset, name, expression_address, origin, applicable=True):
|
|
super(ScannedStructureMemberObject, self).__init__(name, expression_address, origin, applicable)
|
|
self.__struct_name = struct_name
|
|
self.__struct_offset = struct_offset
|
|
|
|
def apply_type(self, tinfo):
|
|
if self._applicable:
|
|
logger.warn("Changing type of structure field is not yet implemented. Address - {}".format(
|
|
Helper.to_hex(self.expression_address)))
|
|
|
|
|
|
class SearchVisitor(Api.ObjectVisitor):
|
|
def __init__(self, cfunc, origin, obj, temporary_structure):
|
|
super(SearchVisitor, self).__init__(cfunc, obj, None, True)
|
|
self.__origin = origin
|
|
self.__temporary_structure = temporary_structure
|
|
|
|
def _manipulate(self, cexpr, obj):
|
|
super(SearchVisitor, self)._manipulate(cexpr, obj)
|
|
|
|
if obj.tinfo and not Helper.is_legal_type(obj.tinfo):
|
|
logger.warn("Variable obj.name has weird type at {}".format(Helper.to_hex(self._find_asm_address(cexpr))))
|
|
return
|
|
if cexpr.type.is_ptr():
|
|
member = self.__extract_member_from_pointer(cexpr, obj)
|
|
else:
|
|
member = self.__extract_member_from_xword(cexpr, obj)
|
|
if member:
|
|
logger.debug("\tCreating member with type {}, {}, offset - {}".format(
|
|
member.type_name, member.scanned_variables, member.offset))
|
|
self.__temporary_structure.add_row(member)
|
|
|
|
def _get_member(self, offset, cexpr, obj, tinfo=None, obj_ea=None):
|
|
if offset < 0:
|
|
logger.error("Considered to be imposible: offset - {}, obj - {}".format(
|
|
offset, Helper.to_hex(self._find_asm_address(cexpr))))
|
|
raise AssertionError
|
|
|
|
applicable = not self.crippled
|
|
cexpr_ea = self._find_asm_address(cexpr)
|
|
scan_obj = ScannedObject.create(obj, cexpr_ea, self.__origin, applicable)
|
|
if obj_ea:
|
|
if TemporaryStructure.VirtualTable.check_address(obj_ea):
|
|
return TemporaryStructure.VirtualTable(offset, obj_ea, scan_obj, self.__origin)
|
|
if Helper.is_code_ea(obj_ea):
|
|
cfunc = Api.decompile_function(obj_ea)
|
|
if cfunc:
|
|
tinfo = cfunc.type
|
|
tinfo.create_ptr(tinfo)
|
|
else:
|
|
tinfo = Const.DUMMY_FUNC
|
|
return TemporaryStructure.Member(offset, tinfo, scan_obj, self.__origin)
|
|
# logger.warn("Want to see this ea - {},".format(Helper.to_hex(cexpr_ea)))
|
|
|
|
if not tinfo or tinfo.equals_to(Const.VOID_TINFO) or tinfo.equals_to(Const.CONST_VOID_TINFO):
|
|
return TemporaryStructure.VoidMember(offset, scan_obj, self.__origin)
|
|
|
|
if tinfo.equals_to(Const.CHAR_TINFO):
|
|
return TemporaryStructure.VoidMember(offset, scan_obj, self.__origin, char=True)
|
|
|
|
if tinfo.equals_to(Const.CONST_PCHAR_TINFO):
|
|
tinfo = Const.PCHAR_TINFO
|
|
elif tinfo.equals_to(Const.CONST_PVOID_TINFO):
|
|
tinfo = Const.PVOID_TINFO
|
|
else:
|
|
tinfo.clr_const()
|
|
return TemporaryStructure.Member(offset, tinfo, scan_obj, self.__origin)
|
|
|
|
def _parse_call(self, call_cexpr, arg_cexpr, offset):
|
|
_, tinfo = Helper.get_func_argument_info(call_cexpr, arg_cexpr)
|
|
if tinfo:
|
|
return self.__deref_tinfo(tinfo)
|
|
# TODO: Find example with UTF-16 strings
|
|
return Const.CHAR_TINFO
|
|
|
|
def _parse_left_assignee(self, cexpr, offset):
|
|
pass
|
|
|
|
def __extract_member_from_pointer(self, cexpr, obj):
|
|
parents_type = map(lambda x: idaapi.get_ctype_name(x.cexpr.op), list(self.parents)[:0:-1])
|
|
parents = map(lambda x: x.cexpr, list(self.parents)[:0:-1])
|
|
|
|
logger.debug("Parsing expression {}. Parents - {}".format(obj.name, parents_type))
|
|
|
|
# Extracting offset and removing expression parents making this offset
|
|
if parents_type[0] in ('idx', 'add'):
|
|
# `obj[idx]' or `(TYPE *) + x'
|
|
if parents[0].y.op != idaapi.cot_num:
|
|
# There's no way to handle with dynamic offset
|
|
return
|
|
offset = parents[0].y.numval() * cexpr.type.get_ptrarr_objsize()
|
|
cexpr = self.parent_expr()
|
|
if parents_type[0] == 'add':
|
|
del parents_type[0]
|
|
del parents[0]
|
|
elif parents_type[0:2] == ['cast', 'add']:
|
|
# (TYPE *)obj + offset or (TYPE)obj + offset
|
|
if parents[1].y.op != idaapi.cot_num:
|
|
return
|
|
if parents[0].type.is_ptr():
|
|
size = parents[0].type.get_ptrarr_objsize()
|
|
else:
|
|
size = 1
|
|
offset = parents[1].theother(parents[0]).numval() * size
|
|
cexpr = parents[1]
|
|
del parents_type[0:2]
|
|
del parents[0:2]
|
|
else:
|
|
offset = 0
|
|
|
|
return self.__extract_member(cexpr, obj, offset, parents, parents_type)
|
|
|
|
def __extract_member_from_xword(self, cexpr, obj):
|
|
parents_type = map(lambda x: idaapi.get_ctype_name(x.cexpr.op), list(self.parents)[:0:-1])
|
|
parents = map(lambda x: x.cexpr, list(self.parents)[:0:-1])
|
|
|
|
logger.debug("Parsing expression {}. Parents - {}".format(obj.name, parents_type))
|
|
|
|
if parents_type[0] == 'add':
|
|
if parents[0].theother(cexpr).op != idaapi.cot_num:
|
|
return
|
|
offset = parents[0].theother(cexpr).numval()
|
|
cexpr = self.parent_expr()
|
|
del parents_type[0]
|
|
del parents[0]
|
|
else:
|
|
offset = 0
|
|
|
|
return self.__extract_member(cexpr, obj, offset, parents, parents_type)
|
|
|
|
def __extract_member(self, cexpr, obj, offset, parents, parents_type):
|
|
if parents_type[0] == 'cast':
|
|
default_tinfo = parents[0].type
|
|
cexpr = parents[0]
|
|
del parents_type[0]
|
|
del parents[0]
|
|
else:
|
|
default_tinfo = Const.PX_WORD_TINFO
|
|
|
|
if parents_type[0] in ('idx', 'ptr'):
|
|
if parents_type[1] == 'cast':
|
|
default_tinfo = parents[1].type
|
|
cexpr = parents[0]
|
|
del parents_type[0]
|
|
del parents[0]
|
|
else:
|
|
default_tinfo = self.__deref_tinfo(default_tinfo)
|
|
|
|
if parents_type[1] == 'asg':
|
|
if parents[1].x == parents[0]:
|
|
# *(TYPE *)(var + x) = ???
|
|
obj_ea = self.__extract_obj_ea(parents[1].y)
|
|
return self._get_member(offset, cexpr, obj, default_tinfo, obj_ea)
|
|
return self._get_member(offset, cexpr, obj, parents[1].x.type)
|
|
elif parents_type[1] == 'call':
|
|
if parents[1].x == parents[0]:
|
|
# ((type (__some_call *)(..., ..., ...)var[idx])(..., ..., ...)
|
|
# ((type (__some_call *)(..., ..., ...)*(TYPE *)(var + x))(..., ..., ...)
|
|
return self._get_member(offset, cexpr, obj, parents[0].type)
|
|
_, tinfo = Helper.get_func_argument_info(parents[1], parents[0])
|
|
if tinfo is None:
|
|
tinfo = Const.PCHAR_TINFO
|
|
return self._get_member(offset, cexpr, obj, tinfo)
|
|
return self._get_member(offset, cexpr, obj, default_tinfo)
|
|
|
|
elif parents_type[0] == 'call':
|
|
# call(..., (TYPE)(var + x), ...)
|
|
tinfo = self._parse_call(parents[0], cexpr, offset)
|
|
return self._get_member(offset, cexpr, obj, tinfo)
|
|
|
|
elif parents_type[0] == 'asg':
|
|
if parents[0].y == cexpr:
|
|
# other_obj = (TYPE) (var + offset)
|
|
self._parse_left_assignee(parents[1].x, offset)
|
|
return self._get_member(offset, cexpr, obj, self.__deref_tinfo(default_tinfo))
|
|
|
|
@staticmethod
|
|
def __extract_obj_ea(cexpr):
|
|
if cexpr.op == idaapi.cot_ref:
|
|
cexpr = cexpr.x
|
|
if cexpr.op == idaapi.cot_obj:
|
|
if cexpr.obj_ea != idaapi.BADADDR:
|
|
return cexpr.obj_ea
|
|
|
|
@staticmethod
|
|
def __deref_tinfo(tinfo):
|
|
if tinfo.is_ptr():
|
|
if tinfo.get_ptrarr_objsize() == 1:
|
|
if tinfo.equals_to(Const.PCHAR_TINFO) or tinfo.equals_to(Const.CONST_PCHAR_TINFO):
|
|
return Const.CHAR_TINFO
|
|
return None # Turns into VoidMember
|
|
return tinfo.get_pointed_object()
|
|
return tinfo
|
|
|
|
|
|
class NewShallowSearchVisitor(SearchVisitor, Api.ObjectDownwardsVisitor):
|
|
def __init__(self, cfunc, origin, obj, temporary_structure):
|
|
super(NewShallowSearchVisitor, self).__init__(cfunc, origin, obj, temporary_structure)
|
|
|
|
|
|
class NewDeepSearchVisitor(SearchVisitor, Api.RecursiveObjectDownwardsVisitor):
|
|
def __init__(self, cfunc, origin, obj, temporary_structure):
|
|
super(NewDeepSearchVisitor, self).__init__(cfunc, origin, obj, temporary_structure)
|
|
|
|
|
|
class DeepReturnVisitor(NewDeepSearchVisitor):
|
|
def __init__(self, cfunc, origin, obj, temporary_structure):
|
|
super(DeepReturnVisitor, self).__init__(cfunc, origin, obj, temporary_structure)
|
|
self.__callers_ea = Helper.get_funcs_calling_address(cfunc.entry_ea)
|
|
self.__call_obj = obj
|
|
|
|
def _start(self):
|
|
for ea in self.__callers_ea:
|
|
self._add_scan_tree_info(ea, -1)
|
|
assert self.__prepare_scanner()
|
|
|
|
def _finish(self):
|
|
if self.__prepare_scanner():
|
|
self._recursive_process()
|
|
|
|
def __prepare_scanner(self):
|
|
try:
|
|
cfunc = self.__iter_callers().next()
|
|
except StopIteration:
|
|
return False
|
|
|
|
self.prepare_new_scan(cfunc, -1, self.__call_obj)
|
|
return True
|
|
|
|
def __iter_callers(self):
|
|
for ea in self.__callers_ea:
|
|
cfunc = Api.decompile_function(ea)
|
|
if cfunc:
|
|
yield cfunc
|