Surrogate EDR Models: ATP / MDE Part 1
Atlan Team
This post is just going to be an introduction to an approach and give a taster to some of the work that we have been doing around Machine Learning model reverse engineering and perhaps some of the community will begin to work to do the same.
While much work has been doing around defeating EDR mechanisms related to hooking, we don't believe enough work has been done to deeply understand the models.
Our approach has been two fold:
1. Our GAN which identifies and corrects code in malicious implant using the ML model as a discriminator with it staying black box. WIth this we can infere what the model is looking at
2. Actually trying to scrape the model from memory and investigate.
While have done much more work on the first one, we are moving onto the second now.
The first step we took was to fire up a virtual machine with Microsoft's ATP or Defender MDE running and then get process hacker on the box. Elevating the PPL level of Process Hacker means that we can dump the whole process memory.
Rather than debugging the program we set about writing some scripts to do various things:
1. Find JSON keys. When numerous Defender MDE process dumps are analysed for for JSON keys we found these keys:
{'fld', 'origin', 'a', 'keypath', 'UUID', 'popuprect', 'lastFreActionTimestamp', 'button', 'startup_list', 'tele', 'bg_color', 'fe', 'utf8', 'inlinemode', 'ext', 'sessionInfo', 'ids', 'from', 'lock', 'err', 'include_level', '4', 'bot', 'sms', 'o_ibn2', 'watch', 'userid', 'sys', 'telemen', 'dlls', 'submit', 'encode', '_4', 'npagenum', 'profile_name', 'shell', 'pr_key', 'assets', 'dmn', 'subdomain', 'valueof', 'x_capec_version', 'productArch', 'isfolder', '1', 'protectionbodystatictext', 'addr', 'process_path', 'is64', 'chunk_data', 'remoteMetadataTelemetryThrottle', 'windowtype', 'cipherType', 'ment', 'fullAgentPath', 'distributername', 'windowsversion', 'algo', 'ocoords', 'cardId', 's', 'tp', 'wscri', 'charcode', '64', 'country_code', 'x_mitre_attack_spec_version', 'manifest_version', 'BeaconInterval', 'balance', 'ahr0cdovl3zpcc1jbgluawmucmf6cmfib3rrys5ies9hym91df9jzw50zxivte10qlrjtegwcegxb1boatkv', 'eorezoappName', 'shellScripts', 'snmp_mib2syscontact', 'upnpnatt', 'temperature', 'profiles', 'agname', 'zf', 'HQ', 'compname', 'urls_to_restore_on_startup', 'encryptedPassword', 'ppb', 'js', 'g', 'de', 'deleted', 'all_frames', 'wht', 'user_id', 'sample', 'cmType', 'vuln_test', 'tc', 'h', 'component', 'ni', 'inject', 'format', 'licensekey', '0x', 'PrimaryOwnerName', 'enblupnp', 'locale', 'InitialTransportType', 'fls', 'result', 'izt', 'feed', 'xe', 't', 'nr', 'dirs', 'file_id', 'anti_forensic', 'configurations', 'usererror', 'totalCount', 'rop2', 'gid', 'pkgauth', 'hash', 'nbody', 'enabled', 'channelid', 'gbdhkasgdhksagd', 'safebrowsing', 'wipe', 'opcode', 'macTest', 'HMAC', 'prc', 'machine_id', 'auth_server_pw', 'eof', 'language', '9', 'run_at', 'channel', 'timestamp', 's5', 'properties', 'profile_description', 'fuchsia', 'chunk_size', 'harcode', 'cert_buffer', 'table', 'entitytype', 'dwld', 'position', 'cve_ids', 'email', 'creation_flags', 'doc', 'psw', 'ff', 'number', 'session_id', 'concatemoji', 'rl', 'hidden', 'bccList', 'root', 'EventType', 'webkit', 'files', 'tabid', 'ip', 'servers', 'GUID', 'level', '3', 'miner', 'IV', 'chunk_num', '_1', 'di', 'x1556a', 'chkboxchkprotectionpage', '500', 'cam', 'update_url', 'pools', 'cpu_info', 'appPartner', 'uri', 'computer_name', 'EncryptedMessage', 'contents', 'durationTelemetryThrottle', 'version', 'n', 'mode', 'images', 'history', 'model_id', 'r', 'command', 'I', 'cfn', 'eventName', 'querystring', 'id', 'eventcategory', 'uid', 'authors', 'api', 'profile_date', 'detection', 'T', 'sione', 'encsuffix', 'loaderName', 'telem', 'ementsb', '9192939495969798999A9a9', 'accl', 'depends', 'intThumbs', 'statistics', 'busid', 'Type', 'yo', 'productname', 'scripts', 'fg', 'logonid', 'exp', 'jsonrpc', 'aspnet1', 'Message', 'user', 'tvid', 'rect', 'lst', 'reasons', 'event', 'original_id', 'ApiRsvUrl', 'key', 'guid', 'process_id', 'width', 'affiliate_id', 'friendly', 'val', 'at', 'feedCompositionCategory', 'kl', 'pwd', 'rop1x', 'rootThumbs', 'action', 'name', 'cudaid', 'cmdline', 'note', 'extension_bypass', 'ua_ver', 'ok', 'xplatTest', 'folder', 'linuxTest', 'yvj', 'filter', 'om', 'solver', 'youtubeextension', 'eventtime', 'beacon', 'caption', 'commentStatus', 'os_info', 'duration', 'w', 'Note', 'Ct', 'entsbyt', 'tdt_version', '_3', 'saltSize', 'samss', 'username', 'src', 'anti_revere', 'kw', 'ersh', 'jaq', 'cros', 'silent', 'silentLoader', 'dbgvahsksadgka', 'urlgithubfed', 'messagetype', 'support_email', 'cmd', 'authorization', 'computername', 'ccList', 'description', 'procInfo', 'environment', 'server', 'usage', 'group', 'scenario_id', 'iename', 'sk', 'reason', 'setall', 'isLocalContent', 'title', 'wfld', 'service_worker', 'source', 'view_id', 'comment', 'entsb', 'ncc', 'info', 'encryptedpassword', 'Deep', 'password', 'isFeatured', 'phone', 'cname', 'upload', 'urlinterface', 'token', 'background', 'windows', 'e', 'popupopen', 'domain', 'u', 'pk', 'content_scripts', 'plugins', 'key_buffer', 'parents', 'nonce', 'o', 'payloaduuid', 'rop1', 'gpuid', 'price_unit', 'kill_services', 'originalurl', 'passerror', 'references', 'ppid', 'cuserid', 'ransomware', 'Ht', 'hta', 'KeyStr', 'EventName', 'enterprise_store_url', 'svc', 'd', '_2', 'fingerprint', 'imp_url', 'entityname', 'concat', 'Meta', 'abstract', 'agent', 'corp', 'os', 'error', 'exec', 'details', 'rank', 'ssf', 'eventtypename', 'p', 'WEBSITE_PROJECT', 'unique_id', 'abd_process_status', 'os_name', 'success', 'ev', 'pdf', 'pass', 'toList', 'os_flavor', 'loginname', 'fd', 'productversion', 'publishedDateTime', 'rop3', 'eventType', 'weight', '2', 'fileSystemInfo', 'composerId', 'code', '729', 'azid', 'state', 'height', 'loaderId', 'response', 'microsoftofficeerror', 'pubkey', 'w32', 'wait', 'icon', 'ctrigger', 'ssish', 'PayloadName', 'installer_id', 'install_time', 'support_alternativea', 'published', 'appVersion', 'homepageURL', 'tag', 'gpu_status', 'sub', 'jhp', 'nicehash', 'plugin', 'emen', 'target_extensions', 'rpf', 'posturi', 'et', 'server_id', 'x_mitre_version', 'str', 'W', 'white_files', 'currentwindow', 'contenttype', 'return_code', 'focalRegion', 'hashType', 'imagePath', 'date', 'b_listdir', 'excecute', 'countryid_at_install', 'responses', 'commandid', 'autosave', 'submitdebugmsg', 'host', 'verifySsl', 'b_deldir', 'label', 'page', 'cookie', 'confirmurl', 'rlhttp', 'cfieldtype', 'permissions', 'avExcludedEntities', 'files_name', 'warning', 'data_finish', 'uuid', 'ver', 'i', 'proxyhost', 'cd', 'aa', 'km0', 'parent_process_id', 'leme', 'tid', 'dev', 'socks', 'recoId', 'type', 'enterprise_store_name', 'content', 'status', 'parameters', 'P', 'pid', 'tvpw', '__main__', 'S', 'ajaxurl', 'subject', 'path', 'nname', 'script', 'port', 'bye', 'file', 'jitter', 'feedName', 'img', 'appinstanceuid', 'dbg', 'isadmin', 'creditcard', 'text', 'file_extension', 'body', 'SECURITY_TIMES', 'login', 'lrugifnocotua', 'l', 'data', 'ftrimchromebssourl', 'runas', 'params', 'externally_connectable', 'paypal', 'soft', 'TransportModule', 'method', 'Jitter', 'job_id', 'homepage', 'miningrequestid', 'c', 'note_file_name', 'bi', 'urls', '9192939495979', 'packer', 'last_prompted_google_url', 'soft_id', 'cipher_alg', 'bene', 'freebsd', 'mlap', 'get', 'country', 'url', 'profile_author', 'pcname', 'vendors', 'devices', 'cat_code', 'pathdst', 'ri', 'distinct_id', 'loginuser', 'matches', 'license', 'syncGUID', 'ca_cert_buffer', 'module', 'appName', 'scenarios', 'entsbytagn'
Key | Description |
---|---|
origin | Source of the network request (e.g., IP address, domain) |
tele | Telemetry data from network activity |
telemen | Telemetry metrics related to network activity |
upnpnatt | Information on UPnP NAT traversal attempts |
ip | IP address involved in the network request |
uri | Uniform Resource Identifier of the network request |
url | URL accessed or requested |
imp_url | Important or significant URL in the context of the network flow |
proxyhost | Proxy server used for the network request |
server | Information about the server involved in the network activity |
servers | List of servers involved in the network activity |
host | Host information for the network request |
port | Network port used for the communication |
confirmurl | URL confirmation status |
referer | Referer URL indicating the source of the request |
method | HTTP method used (e.g., GET, POST) |
response | HTTP response code received |
ajaxurl | URL for AJAX requests |
Potential Model 2: File Attributes Features
Key | Description |
---|---|
file_id | Unique identifier for the file |
filename | Name of the file |
files | List of files involved |
file_extension | File extension (e.g., .exe, .doc) |
filepath | Path to the file on the system |
fileSystemInfo | Information about the file system |
hash | Cryptographic hash of the file for integrity verification |
file_path | Path where the file is located |
fileSystem | File system type (e.g., NTFS, FAT32) |
files_name | Names of the files |
directory | Directory containing the file |
dir | Directory path |
directories | List of directories involved |
folder | Folder containing the file |
subdirectory | Subdirectory information |
Potential Model 3: System Information Features
Key | Description |
---|---|
os | Operating system (e.g., Windows, Linux) |
os_name | Name of the operating system |
os_flavor | Specific flavor or distribution of the OS |
windows | Information specific to Windows OS |
windowsversion | Version of Windows OS |
process_path | Path of the running process |
sys | System information |
system | Overall system-related information |
cpu_info | Information about the CPU |
hardware | Hardware specifications |
device | Device information |
machine_id | Unique identifier for the machine |
computer_name | Name of the computer |
pcname | PC name |
TorchScript
TorchScript is an intermediate representation of a PyTorch model that can be run in a high-performance environment independent of Python. It enables the exporting of PyTorch models for use in production, providing a way to serialize the model and run it efficiently in C++ environments, among others. Their features:
- Serialization: TorchScript models can be saved and loaded in a format that preserves the model structure and weights.
- Optimization: TorchScript can optimize models for performance.
- Deployability: TorchScript models can be run in environments where Python is not available, such as in mobile or embedded systems.
If you need to deploy the model in a C++ environment, you can load it using the C++ API. This requires linking against PyTorch’s C++ libraries.
#include <torch/script.h>
int main() {
// Deserialize the ScriptModule from a file using torch::jit::load().
std::shared_ptr<torch::jit::script::Module> module = torch::jit::load("model_traced.pt");
// Create an input tensor
std::vector<torch::jit::IValue> inputs;
inputs.push_back(torch::rand({1, 3, 224, 224}));
// Execute the model and turn its output into a tensor
at::Tensor output = module->forward(inputs).toTensor();
std::cout << output.slice(/*dim=*/1, /*start=*/0, /*end=*/5) << '\n';
}
ONNX
ONNX (Open Neural Network Exchange) is an open-source format designed to represent machine learning models. It allows models to be shared across different frameworks, making it easier to move models from one environment to another.
- Interoperability: Models can be transferred between different machine learning frameworks.
- Optimization: ONNX Runtime offers various optimizations for improving model performance.
- Deployability: ONNX models can be deployed across multiple platforms including cloud, mobile, and embedded devices.
You can also load and run ONNX models in a C++ environment using ONNX Runtime's C++ API. This requires linking against ONNX Runtime's C++ libraries.
#include <onnxruntime/core/session/onnxruntime_cxx_api.h>
int main() {
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
// Initialize the ONNX runtime session
Ort::SessionOptions session_options;
Ort::Session session(env, "model.onnx", session_options);
// Create input tensor object from data values
std::vector<float> input_tensor_values(1 * 3 * 224 * 224);
std::vector<int64_t> input_shape = {1, 3, 224, 224};
// Fill the input tensor with some values
std::iota(input_tensor_values.begin(), input_tensor_values.end(), 0.0f);
// Create memory info
Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
// Create input tensor
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
memory_info, input_tensor_values.data(), input_tensor_values.size(), input_shape.data(), input_shape.size()
);
// Prepare inputs and outputs
const char* input_names[] = {"input"};
const char* output_names[] = {"output"};
// Run the model
auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_names, &input_tensor, 1, output_names, 1);
// Access the output tensor
float* float_array = output_tensors[0].GetTensorMutableData<float>();
for (int i = 0; i < 10; i++) {
std::cout << float_array[i] << std::endl;
}
return 0;
}
import torch
import io
from concurrent.futures import ThreadPoolExecutor, as_completed
# Function to check if a byte sequence can be loaded as a TorchScript model
def is_torchscript_model(byte_seq):
try:
# Attempt to load the byte sequence as a TorchScript model
buffer=io.BytesIO(byte_seq)
torch.jit.load(buffer)
returnTrue
except:
returnFalse
# Function to process a chunk of the memory dump
def process_chunk(start_index, memory_dump, initial_chunk_size, max_chunk_size):
chunk_size=initial_chunk_size
end_index=len(memory_dump)
overlap=100# Overlap to ensure we do not miss valid splits between chunks
whilechunk_size<=max_chunk_size:
forcurrent_endinrange(start_index+overlap, start_index+chunk_size):
ifcurrent_end>end_index:
break
byte_seq=memory_dump[start_index:current_end]
ifis_torchscript_model(byte_seq):
returncurrent_end
chunk_size*=2# Double the chunk size
returnNone
# Function to find all ZIP signatures in the memory dump
def find_zip_signatures(memory_dump, signature):
indices= []
start=0
whileTrue:
start=memory_dump.find(signature, start)
ifstart==-1:
break
indices.append(start)
start+=len(signature)
returnindices
# Path to the memory dump file
memory_dump_path = 'MsMpEng.dmp_torchscript-win.pt'
# ZIP file signature (50 4B 03 04 for local file header)
zip_signature = b'\x50\x4B\x03\x04'
# Read the memory dump
with open(memory_dump_path, 'rb') as f:
memory_dump=f.read()
# Find all ZIP file signatures
zip_starts = find_zip_signatures(memory_dump, zip_signature)
if not zip_starts:
print("No ZIP file signatures found.")
else:
print(f"Found {len(zip_starts)} ZIP file signatures at indices: {zip_starts}")
# Set initial chunk size for parallel processing
initial_chunk_size=1*1024*1024# Start with 1 MB chunks
max_chunk_size=100*1024*1024# Maximum chunk size of 100 MB
overlap=100# 100 bytes overlap to handle edge cases
end_index=len(memory_dump)
# Create a list of tasks for each ZIP signature
tasks= []
withThreadPoolExecutor() asexecutor:
forstart_indexinzip_starts:
tasks.append(executor.submit(process_chunk, start_index, memory_dump, initial_chunk_size, max_chunk_size))
# Process the results as they complete
found=False
forfutureinas_completed(tasks):
result=future.result()
ifresultisnotNone:
print(f"TorchScript model successfully loaded with byte sequence up to index {result}")
found=True
break
ifnotfound:
print("Failed to load any TorchScript model from the memory dump.")