Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kernelcache kext extraction #50

Merged
merged 6 commits into from
Jan 30, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Kext extraction
Muirey03 committed Jan 29, 2023
commit 10a8aabd959386f54cc081ee2754d4f7c213852e
1 change: 1 addition & 0 deletions bin/DyldExtractor
253 changes: 253 additions & 0 deletions bin/kextex
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
#!/usr/bin/env python3

import progressbar
import argparse
import pathlib
import logging
import os
import sys
from typing import List, BinaryIO

try:
progressbar.streams
except AttributeError:
print("progressbar is installed but progressbar2 required.", file=sys.stderr)
exit(1)

from DyldExtractor.extraction_context import ExtractionContext
from DyldExtractor.macho.macho_context import MachOContext
from DyldExtractor.kc.kc_context import KCContext

from DyldExtractor.dyld.dyld_structs import (
dyld_cache_image_info
)

from DyldExtractor.converter import (
slide_info,
macho_offset,
linkedit_optimizer,
stub_fixer,
)


class _DyldExtractorArgs(argparse.Namespace):

kc_path: pathlib.Path
extract: str
output: pathlib.Path
list_extensions: bool
filter: str
verbosity: int
pass


def _getArguments():
"""Get program arguments.
"""

parser = argparse.ArgumentParser()
parser.add_argument(
"kc_path",
type=pathlib.Path,
help="A path to the target kernelcache. Only MH_FILESET caches are supported." # noqa
)
parser.add_argument(
"-e", "--extract",
help="The name of the kext to extract." # noqa
)
parser.add_argument(
"-o", "--output",
help="Specify the output path for the extracted kext. By default it extracts to the binaries folder." # noqa
)
parser.add_argument(
"-l", "--list-extensions", action="store_true",
help="List all extensions in the cache."
)
parser.add_argument(
"-f", "--filter",
help="Filter out extensions when listing them."
)
parser.add_argument(
"-a", "--addresses", action="store_true",
help="List addresses along with extension paths. Only applies when --list-extensions is specified."
)
parser.add_argument(
"-b", "--basenames", action="store_true",
help="Print only the basenames of each extension. Only applies when --list-extensions is specified."
)
parser.add_argument(
"--lookup",
help="Find the library that an address lives in. E.g. kextex --lookup 0xfffffff009bbe250 kernelcache.release.iPhone14,6."
)
parser.add_argument(
"-v", "--verbosity", type=int, choices=[0, 1, 2, 3], default=1,
help="Increase verbosity, Option 1 is the default. | 0 = None | 1 = Critical Error and Warnings | 2 = 1 + Info | 3 = 2 + debug |" # noqa
)

return parser.parse_args(namespace=_DyldExtractorArgs)


def _extractImage(
dyldFilePath: pathlib.Path,
dyldCtx: KCContext,
image: dyld_cache_image_info,
outputPath: str
) -> None:
"""Extract an image and save it.
The order of converters is essentially a reverse of Apple's AppCacheBuilder
"""

logger = logging.getLogger()

statusBar = progressbar.ProgressBar(
prefix="{variables.unit} >> {variables.status} :: [",
variables={"unit": "--", "status": "--"},
widgets=[progressbar.widgets.AnimatedMarker(), "]"],
redirect_stdout=True
)

# get a a writable copy of the MachOContext
machoOffset, context = dyldCtx.convertAddr(image.address)
machoCtx = MachOContext(context.fileObject, machoOffset, True)

extractionCtx = ExtractionContext(dyldCtx, machoCtx, statusBar, logger)

#slide_info.processSlideInfo(extractionCtx)
#linkedit_optimizer.optimizeLinkedit(extractionCtx)
stub_fixer.fixStubs(extractionCtx)

writeProcedures = macho_offset.optimizeOffsets(extractionCtx)

# Write the MachO file
with open(outputPath, "wb") as outFile:
statusBar.update(unit="Extractor", status="Writing file")

for procedure in writeProcedures:
outFile.seek(procedure.writeOffset)
outFile.write(
procedure.fileCtx.getBytes(procedure.readOffset, procedure.size)
)
pass
pass

statusBar.update(unit="Extractor", status="Done")
pass


def _filterImages(imagePaths: List[str], filterTerm: str):
filteredPaths = []
filterTerm = filterTerm.lower()

for path in imagePaths:
if filterTerm in path.lower():
filteredPaths.append(path)

return sorted(filteredPaths, key=len)


def main():
args = _getArguments()

# Configure Logging
level = logging.WARNING # default option

if args.verbosity == 0:
# Set the log level so high that it doesn't do anything
level = 100
elif args.verbosity == 2:
level = logging.INFO
elif args.verbosity == 3:
level = logging.DEBUG

# needed for logging compatibility
progressbar.streams.wrap_stderr() # type:ignore

logging.basicConfig(
format="{asctime}:{msecs:3.0f} [{levelname:^9}] {filename}:{lineno:d} : {message}", # noqa
datefmt="%H:%M:%S",
style="{",
level=level
)

with open(args.kc_path, "rb") as f:
dyldCtx = KCContext(f)

# enumerate images, create a map of paths and images
imageMap = {}
for imageData in dyldCtx.images:
path = dyldCtx.readString(imageData.pathFileOffset)
path = path[0:-1] # remove null terminator
path = path.decode("utf-8")

imageMap[path] = imageData

# Find the image that an address lives in
if args.lookup:
lookupAddr = int(args.lookup, 0)

imagePaths = imageMap.keys()

# sort the paths so they're in VM address order
sortedPaths = sorted(imagePaths, key=lambda path: imageMap[path].address)

previousImagePath = None
for path in sortedPaths:
imageAddr = imageMap[path].address
if lookupAddr < imageAddr:
if previousImagePath is None:
print("Error: address before first image!", file=sys.stderr)
sys.exit(1)
print(os.path.basename(previousImagePath) if args.basenames else previousImagePath)
return
else:
previousImagePath = path
# We got to the end of the list, must be the last image
path = sortedPaths[-1]
print(os.path.basename(path) if args.basenames else path)
return

# list images option
if args.list_extensions:
imagePaths = imageMap.keys()

# filter if needed
if args.filter:
filterTerm = args.filter.strip().lower()
imagePaths = set(_filterImages(imagePaths, filterTerm))

# sort the paths so they're displayed in VM address order
sortedPaths = sorted(imagePaths, key=lambda path: imageMap[path].address)

print("Listing Images\n--------------")
for fullpath in sortedPaths:
path = os.path.basename(fullpath) if args.basenames else fullpath
if args.addresses:
print(f"{hex(imageMap[fullpath].address)} : {path}")
else:
print(path)

return

# extract image option
if args.extract:
extractionTarget = args.extract.strip()
targetPaths = _filterImages(imageMap.keys(), extractionTarget)
if len(targetPaths) == 0:
print(f"Unable to find image \"{extractionTarget}\"")
return

outputPath = args.output
if outputPath is None:
outputPath = pathlib.Path("binaries/" + extractionTarget)
os.makedirs(outputPath.parent, exist_ok=True)

print(f"Extracting {targetPaths[0]}")
_extractImage(args.kc_path, dyldCtx, imageMap[targetPaths[0]], outputPath)
return


if "__main__" == __name__:
main()
pass
1 change: 1 addition & 0 deletions dyld
1 change: 1 addition & 0 deletions kc
45 changes: 45 additions & 0 deletions src/DyldExtractor/cache_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import pathlib
from typing import (
List,
Tuple,
BinaryIO
)

from DyldExtractor.file_context import FileContext
from DyldExtractor.dyld.dyld_structs import (
dyld_cache_header,
dyld_cache_mapping_info,
dyld_cache_image_info,
dyld_subcache_entry,
dyld_subcache_entry2,
)


class CacheContext(FileContext):

def __init__(self, fileObject: BinaryIO, copyMode: bool = False) -> None:
super().__init__(fileObject, copyMode=copyMode)

def convertAddr(self, vmaddr: int) -> Tuple[int, "CacheContext"]:
"""Convert a vmaddr to its file offset
Returns:
The file offset and the CacheContext, but if not found, `None`.
"""

for mapping, ctx in self.mappings:
lowBound = mapping.address
highBound = mapping.address + mapping.size

if vmaddr >= lowBound and vmaddr < highBound:
mappingOff = vmaddr - lowBound
return mapping.fileOffset + mappingOff, ctx

# didn't find the address in any mappings...
return None

def hasSubCaches(self) -> bool:
return False

def isFileset(self) -> bool:
return False
22 changes: 22 additions & 0 deletions src/DyldExtractor/converter/slide_info.py
Original file line number Diff line number Diff line change
@@ -330,6 +330,14 @@ def _getMappingInfo(

class PointerSlider(object):

def __new__(cls, extractionCtx: ExtractionContext) -> object:
if extractionCtx.dyldCtx.isFileset():
slider = KCPointerSlider.__new__(KCPointerSlider)
slider.__init__(extractionCtx)
return slider

return super().__new__(cls, extractionCtx)

def __init__(self, extractionCtx: ExtractionContext) -> None:
"""Provides a way to slide individual pointers.
"""
@@ -414,6 +422,20 @@ def slideStruct(

return structData

class KCPointerSlider(object):
def __init__(self, extractionCtx: ExtractionContext) -> None:
"""Provides a way to slide individual pointers in kernelcaches.
"""
self._dyldCtx = extractionCtx.dyldCtx
pass

def slideAddress(self, address: int) -> int:
# TODO(muirey03): This doesn't yet deal with chained pointers
if not (offset := self._dyldCtx.convertAddr(address)):
return None
offset, context = offset
return context.readFormat("<Q", offset)[0]


def processSlideInfo(extractionCtx: ExtractionContext) -> None:
"""Process and remove rebase info.
73 changes: 56 additions & 17 deletions src/DyldExtractor/converter/stub_fixer.py
Original file line number Diff line number Diff line change
@@ -57,7 +57,7 @@ def __init__(self, extractionCtx: ExtractionContext) -> None:
pass

self._enumerateExports()
self._enumerateSymbols()
self._enumerateSymbols(self._machoCtx)
pass

def symbolizeAddr(self, addr: int) -> List[bytes]:
@@ -96,11 +96,18 @@ def _enumerateExports(self) -> None:
reExports: List[dyld_trie.ExportInfo] = []

# get an initial list of dependencies
if dylibs := self._machoCtx.getLoadCommand(DEP_LCS, multiple=True):
for dylib in dylibs:
if depInfo := self._getDepInfo(dylib, self._machoCtx):
depsQueue.append(depInfo)
pass
# assume every image in a fileset is a dependency:
if self._dyldCtx.isFileset():
for image in self._dyldCtx.images:
machoOffset, context = self._dyldCtx.convertAddr(image.address)
context = MachOContext(context.fileObject, machoOffset)
self._enumerateSymbols(context)
else:
if dylibs := self._machoCtx.getLoadCommand(DEP_LCS, multiple=True):
for dylib in dylibs:
if depInfo := self._getDepInfo(dylib, self._machoCtx):
depsQueue.append(depInfo)
pass

while len(depsQueue):
self._statusBar.update()
@@ -248,19 +255,19 @@ def _cacheDepExports(
self._symbolCache[functionAddr] = [bytes(export.name)]
pass

def _enumerateSymbols(self) -> None:
def _enumerateSymbols(self, machoCtx) -> None:
"""Cache potential symbols in the symbol table.
"""

symtab: symtab_command = self._machoCtx.getLoadCommand(
symtab: symtab_command = machoCtx.getLoadCommand(
(LoadCommands.LC_SYMTAB,)
)
if not symtab:
self._logger.warning("Unable to find LC_SYMTAB.")
return

linkeditFile = self._machoCtx.ctxForAddr(
self._machoCtx.segments[b"__LINKEDIT"].seg.vmaddr
linkeditFile = machoCtx.ctxForAddr(
machoCtx.segments[b"__LINKEDIT"].seg.vmaddr
)

for i in range(symtab.nsyms):
@@ -275,7 +282,7 @@ def _enumerateSymbols(self) -> None:

if symbolAddr == 0:
continue
if not self._machoCtx.containsAddr(symbolAddr):
if not machoCtx.containsAddr(symbolAddr):
self._logger.warning(f"Invalid address: {symbolAddr}, for symbol entry: {symbol}.") # noqa
continue

@@ -1266,6 +1273,39 @@ def _addToMap(stubName: bytes, stubAddr: int):
for segment in self._machoCtx.segmentsI:
for sect in segment.sectsI:
if sect.flags & SECTION_TYPE == S_SYMBOL_STUBS:
if sect.size == 0 and self._dyldCtx.isFileset():
# fileset stubs section was nuked, rebuild it
# here I expand the __TEXT_EXEC section
# we can assume that we have enough space for this
# as the area after will belong to another binary
sect.offset = segment.seg.fileoff + segment.seg.filesize
sect.reserved2 = 16
sect.size = sect.reserved2 * len(symbolPtrs)
segment.seg.vmsize += sect.size
segment.seg.filesize += sect.size
self._machoCtx.writeBytes(sect._fileOff_, sect)
self._machoCtx.writeBytes(segment.seg._fileOff_, segment.seg)

for i, (key, targets) in enumerate(symbolPtrs.items()):
self._statusBar.update(status="Fixing Stubs")

stubAddr = sect.addr + (i * sect.reserved2)
symPtrAddr = targets[0]

symPtrOff = self._dyldCtx.convertAddr(symPtrAddr)[0]
if not symbolPtrFile:
symbolPtrFile = self._machoCtx.ctxForAddr(symPtrAddr)
pass
symbolPtrFile.writeBytes(symPtrOff, struct.pack("<Q", stubAddr))

newStub = self._arm64Utils.generateAuthStubNormal(stubAddr, symPtrAddr)
stubOff, ctx = self._dyldCtx.convertAddr(stubAddr)
textFile.writeBytes(stubOff, newStub)

_addToMap(key, stubAddr)
pass
continue

for i in range(int(sect.size / sect.reserved2)):
self._statusBar.update(status="Fixing Stubs")

@@ -1400,13 +1440,12 @@ def _addToMap(stubName: bytes, stubAddr: int):
return stubMap

def _fixCallsites(self, stubMap: Dict[bytes, Tuple[int]]) -> None:
if (
b"__TEXT" not in self._machoCtx.segments
or b"__text" not in self._machoCtx.segments[b"__TEXT"].sects
):
raise _StubFixerError("Unable to get __text section.")
textSect = self._machoCtx.segments.get(b"__TEXT", {}).sects.get(b"__text", None)
if not textSect:
textSect = self._machoCtx.segments.get(b"__TEXT_EXEC", {}).sects.get(b"__text", None)

textSect = self._machoCtx.segments[b"__TEXT"].sects[b"__text"]
if not textSect:
raise _StubFixerError("Unable to get __text section.")

textAddr = textSect.addr
# Section offsets by section_64.offset are sometimes
21 changes: 2 additions & 19 deletions src/DyldExtractor/dyld/dyld_context.py
Original file line number Diff line number Diff line change
@@ -13,9 +13,10 @@
dyld_subcache_entry,
dyld_subcache_entry2,
)
from DyldExtractor.cache_context import CacheContext


class DyldContext(FileContext):
class DyldContext(CacheContext):

def __init__(self, fileObject: BinaryIO, copyMode: bool = False) -> None:
"""A wrapper around a dyld file.
@@ -60,24 +61,6 @@ def __init__(self, fileObject: BinaryIO, copyMode: bool = False) -> None:
self._subCaches: List[DyldContext] = []
pass

def convertAddr(self, vmaddr: int) -> Tuple[int, "DyldContext"]:
"""Convert a vmaddr to its file offset
Returns:
The file offset and the DyldContext, but if not found, `None`.
"""

for mapping, ctx in self.mappings:
lowBound = mapping.address
highBound = mapping.address + mapping.size

if vmaddr >= lowBound and vmaddr < highBound:
mappingOff = vmaddr - lowBound
return mapping.fileOffset + mappingOff, ctx

# didn't find the address in any mappings...
return None

def headerContainsField(self, field: str) -> bool:
"""Check to see if the header contains the given field.
6 changes: 3 additions & 3 deletions src/DyldExtractor/extraction_context.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import progressbar
import logging

from DyldExtractor.dyld.dyld_context import DyldContext
from DyldExtractor.cache_context import CacheContext
from DyldExtractor.macho.macho_context import MachOContext


class ExtractionContext(object):
"""Holds state information for extraction
"""

dyldCtx: DyldContext
dyldCtx: CacheContext
machoCtx: MachOContext

# The update method of the the progress bar has
@@ -33,7 +33,7 @@ class ExtractionContext(object):

def __init__(
self,
dyldCtx: DyldContext,
dyldCtx: CacheContext,
machoCtx: MachOContext,
statusBar: progressbar.ProgressBar,
logger: logging.Logger
Empty file.
78 changes: 78 additions & 0 deletions src/DyldExtractor/kc/kc_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import pathlib
from typing import (
List,
Tuple,
BinaryIO
)

from DyldExtractor.file_context import FileContext
from DyldExtractor.dyld.dyld_structs import (
dyld_cache_header,
dyld_cache_mapping_info,
dyld_cache_image_info,
dyld_subcache_entry,
dyld_subcache_entry2,
)

from DyldExtractor.macho.macho_context import MachOContext
from DyldExtractor.cache_context import CacheContext

from DyldExtractor.macho.macho_structs import (
LoadCommandMap,
LoadCommands,
load_command,
UnknownLoadCommand,
mach_header_64,
segment_command_64
)

class KCContext(CacheContext):

def __init__(self, fileObject: BinaryIO, copyMode: bool = False) -> None:
"""A wrapper around a kernelcache file.
Provides convenient methods and attributes for a given kernelcache file.
Args:
file: an open kernelcache file.
"""

super().__init__(fileObject, copyMode=copyMode)

machoCtx = MachOContext(fileObject, 0, False)
self._machoCtx = machoCtx
self.header = machoCtx.header

# Check filetype
MH_FILESET = 0xc
if self.header.filetype != MH_FILESET:
raise Exception("Only MH_FILESET kernelcaches are supported!")

self.mappings: List[Tuple[dyld_cache_mapping_info, KCContext]] = []
for segment in machoCtx.segmentsI:
seg = segment.seg

info = dyld_cache_mapping_info()
info.address = seg.vmaddr
info.size = seg.vmsize
info.fileOffset = seg.fileoff
self.mappings.append((info, self))
pass

# get images
self.images: List[dyld_cache_image_info] = []

filesetEntries = machoCtx.getLoadCommand((LoadCommands.LC_FILESET_ENTRY,), multiple=True)
if not filesetEntries:
raise Exception("Kernelcache does not contain any fileset entries!")

for entry in filesetEntries:
info = dyld_cache_image_info()
info.pathFileOffset = entry._fileOff_ + entry.entry_id.offset
info.address = entry.vmaddr
self.images.append(info)
pass
pass

def isFileset(self) -> bool:
return True