Files
ida-scripts/plugins/HexRaysPyTools/Core/StructXrefs.py

182 lines
6.0 KiB
Python

import time
import logging
from collections import namedtuple
import json
import idaapi
import Helper
import HexRaysPyTools.Settings as Settings
logger = logging.getLogger(__name__)
XrefInfo = namedtuple('XrefInfo', ['func_ea', 'offset', 'line', 'type'])
def singleton(cls):
instances = {}
def get_instance():
if cls not in instances:
instances[cls] = cls()
return instances[cls]
return get_instance
@singleton
class XrefStorage(object):
ARRAY_NAME = "$HexRaysPyTools:XrefStorage"
def __init__(self):
self.storage = None
def open(self):
if not Settings.STORE_XREFS:
self.storage = {}
return
result = Helper.load_long_str_from_idb(self.ARRAY_NAME)
if result:
try:
self.storage = json.loads(result, object_hook=self.json_keys_to_str)
return
except ValueError:
logger.error("Failed to read previous info about Xrefs. Try Ctrl+F5 to cache data")
self.storage = {}
def close(self):
self.save()
self.storage = None
def save(self):
if not Settings.STORE_XREFS:
return
if self.storage:
Helper.save_long_str_to_idb(self.ARRAY_NAME, json.dumps(self.storage))
def update_structure_info(self, ordinal, function_address, data):
""" Accepts data in form dictionary {structure offset -> list(offsets within function with field appealing) """
if ordinal not in self.storage:
self.storage[ordinal] = {}
image_base = idaapi.get_imagebase()
function_offset = function_address - image_base
self.storage[ordinal][function_offset] = data
def get_structure_info(self, ordinal, struct_offset):
""" By given ordinal and offset within a structure returns dictionary {func_address -> list(offsets)} """
result = []
if ordinal not in self.storage:
return result
for func_offset, data in self.storage[ordinal].items():
if struct_offset in data:
func_ea = func_offset + idaapi.get_imagebase()
for xref_info in data[struct_offset]:
offset, line, usage_type = xref_info
result.append(XrefInfo(func_ea, offset, line, usage_type))
return result
@staticmethod
def json_keys_to_str(x):
if isinstance(x, dict):
return {int(k): v for k, v in x.items()}
return x
def __len__(self):
return len(str(self.storage))
class StructXrefVisitor(idaapi.ctree_parentee_t):
def __init__(self, cfunc):
super(StructXrefVisitor, self).__init__()
self.__cfunc = cfunc
self.__image_base = idaapi.get_imagebase()
self.__function_address = cfunc.entry_ea
self.__result = {}
self.__storage = XrefStorage()
def visit_expr(self, expression):
# Checks if expression is reference by pointer or by value
if expression.op == idaapi.cot_memptr:
struct_type = expression.x.type.get_pointed_object()
elif expression.op == idaapi.cot_memref:
struct_type = expression.x.type
else:
return 0
# Getting information about structure, field offset, address and one line corresponding to code
ordinal = struct_type.get_ordinal()
if ordinal == 0:
t = idaapi.tinfo_t()
struct_name = struct_type.dstr().split()[-1] # Get rid of `struct` prefix or something else
t.get_named_type(idaapi.cvar.idati, struct_name)
ordinal = t.get_ordinal()
field_offset = expression.m
ea = self.__find_ref_address(expression)
usage_type = self.__get_type(expression)
if ea == idaapi.BADADDR or not ordinal:
logger.warning("Failed to parse at address {0}, ordinal - {1}, type - {2}".format(
Helper.to_hex(ea), ordinal, struct_type.dstr()
))
one_line = self.__get_line()
occurrence_offset = ea - self.__function_address
xref_info = (occurrence_offset, one_line, usage_type)
# Saving results
if ordinal not in self.__result:
self.__result[ordinal] = {field_offset: [xref_info]}
elif field_offset not in self.__result[ordinal]:
self.__result[ordinal][field_offset] = [xref_info]
else:
self.__result[ordinal][field_offset].append(xref_info)
return 0
def process(self):
t = time.time()
self.apply_to(self.__cfunc.body, None)
for ordinal, data in self.__result.items():
self.__storage.update_structure_info(ordinal, self.__function_address, data)
storage_mb_size = len(self.__storage) * 1.0 / 1024 ** 2
logger.debug("Xref processing: %f seconds passed, storage size - %.2f MB ", (time.time() - t), storage_mb_size)
def __find_ref_address(self, cexpr):
""" Returns most close virtual address corresponding to cexpr """
ea = cexpr.ea
if ea != idaapi.BADADDR:
return ea
for p in reversed(self.parents):
if p.ea != idaapi.BADADDR:
return p.ea
def __get_type(self, cexpr):
""" Returns one of the following types: 'R' - read value, 'W' - write value, 'A' - function argument"""
child = cexpr
for p in reversed(self.parents):
assert p, "Failed to get type at " + Helper.to_hex(self.__function_address)
if p.cexpr.op == idaapi.cot_call:
return 'Arg'
if not p.is_expr():
return 'R'
if p.cexpr.op == idaapi.cot_asg:
if p.cexpr.x == child:
return 'W'
return 'R'
child = p.cexpr
def __get_line(self):
for p in reversed(self.parents):
if not p.is_expr():
return idaapi.tag_remove(p.print1(self.__cfunc))
AssertionError("Parent instruction is not found")