forked from kennytm/Miscellaneous
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdyld_decache.cpp
1536 lines (1339 loc) · 55.5 KB
/
dyld_decache.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
dyld_decache.cpp ... Extract dylib files from shared cache.
Copyright (C) 2011 KennyTM~ <[email protected]>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
With reference to DHowett's dyldcache.cc, with the following condition:
"if you find it useful, do whatever you want with it. just don't forget that
somebody helped."
see http://blog.howett.net/?p=75 for detail.
*/
/*
Part of code is referenced from Apple's dyld project, with the following li-
cense:
*/
/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
*
* Copyright (c) 2006-2008 Apple Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this
* file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
//------------------------------------------------------------------------------
// END LEGALESE
//------------------------------------------------------------------------------
// g++ -o dyld_decache -O3 -Wall -Wextra -std=c++98 /usr/local/lib/libboost_filesystem-mt.a /usr/local/lib/libboost_system-mt.a dyld_decache.cpp DataFile.cpp
#include <unistd.h>
#include <cstdio>
#include <stdint.h>
#include <getopt.h>
#include "DataFile.h"
#include <string>
#include <vector>
#define BOOST_FILESYSTEM_VERSION 3
#include <boost/filesystem.hpp>
#include <utility>
#include <boost/unordered_map.hpp>
#include <boost/foreach.hpp>
struct dyld_cache_header {
char magic[16];
uint32_t mappingOffset;
uint32_t mappingCount;
uint32_t imagesOffset;
uint32_t imagesCount;
uint64_t dyldBaseAddress;
};
typedef uint64_t mach_vm_address_t;
typedef uint64_t mach_vm_offset_t;
typedef uint64_t mach_vm_size_t;
typedef int32_t vm_prot_t;
struct shared_file_mapping_np {
mach_vm_address_t sfm_address;
mach_vm_size_t sfm_size;
mach_vm_offset_t sfm_file_offset;
vm_prot_t sfm_max_prot;
vm_prot_t sfm_init_prot;
};
struct dyld_cache_image_info {
uint64_t address;
uint64_t modTime;
uint64_t inode;
uint32_t pathFileOffset;
uint32_t pad;
};
typedef int32_t integer_t;
typedef integer_t cpu_type_t;
typedef integer_t cpu_subtype_t;
struct mach_header {
uint32_t magic;
cpu_type_t cputype;
cpu_subtype_t cpusubtype;
uint32_t filetype;
uint32_t ncmds;
uint32_t sizeofcmds;
uint32_t flags;
};
struct load_command {
uint32_t cmd;
uint32_t cmdsize;
};
#define LC_REQ_DYLD 0x80000000
#define LC_SEGMENT 0x1
#define LC_SYMTAB 0x2
#define LC_SYMSEG 0x3
#define LC_THREAD 0x4
#define LC_UNIXTHREAD 0x5
#define LC_LOADFVMLIB 0x6
#define LC_IDFVMLIB 0x7
#define LC_IDENT 0x8
#define LC_FVMFILE 0x9
#define LC_PREPAGE 0xa
#define LC_DYSYMTAB 0xb
#define LC_LOAD_DYLIB 0xc
#define LC_ID_DYLIB 0xd
#define LC_LOAD_DYLINKER 0xe
#define LC_ID_DYLINKER 0xf
#define LC_PREBOUND_DYLIB 0x10
#define LC_ROUTINES 0x11
#define LC_SUB_FRAMEWORK 0x12
#define LC_SUB_UMBRELLA 0x13
#define LC_SUB_CLIENT 0x14
#define LC_SUB_LIBRARY 0x15
#define LC_TWOLEVEL_HINTS 0x16
#define LC_PREBIND_CKSUM 0x17
#define LC_LOAD_WEAK_DYLIB (0x18 | LC_REQ_DYLD)
#define LC_SEGMENT_64 0x19
#define LC_ROUTINES_64 0x1a
#define LC_UUID 0x1b
#define LC_RPATH (0x1c | LC_REQ_DYLD)
#define LC_CODE_SIGNATURE 0x1d
#define LC_SEGMENT_SPLIT_INFO 0x1e
#define LC_REEXPORT_DYLIB (0x1f | LC_REQ_DYLD)
#define LC_LAZY_LOAD_DYLIB 0x20
#define LC_ENCRYPTION_INFO 0x21
#define LC_DYLD_INFO 0x22
#define LC_DYLD_INFO_ONLY (0x22|LC_REQ_DYLD)
#define LC_LOAD_UPWARD_DYLIB (0x23|LC_REQ_DYLD)
#define LC_VERSION_MIN_MACOSX 0x24
#define LC_VERSION_MIN_IPHONEOS 0x25
#define LC_FUNCTION_STARTS 0x26
#define LC_DYLD_ENVIRONMENT 0x27
struct segment_command : public load_command {
char segname[16];
uint32_t vmaddr;
uint32_t vmsize;
uint32_t fileoff;
uint32_t filesize;
vm_prot_t maxprot;
vm_prot_t initprot;
uint32_t nsects;
uint32_t flags;
};
struct section {
char sectname[16];
char segname[16];
uint32_t addr;
uint32_t size;
uint32_t offset;
uint32_t align;
uint32_t reloff;
uint32_t nreloc;
uint32_t flags;
uint32_t reserved1;
uint32_t reserved2;
};
struct symtab_command : public load_command {
uint32_t symoff;
uint32_t nsyms;
uint32_t stroff;
uint32_t strsize;
};
struct symseg_command : public load_command {
uint32_t offset;
uint32_t size;
};
struct dysymtab_command : public load_command {
uint32_t ilocalsym;
uint32_t nlocalsym;
uint32_t iextdefsym;
uint32_t nextdefsym;
uint32_t iundefsym;
uint32_t nundefsym;
uint32_t tocoff;
uint32_t ntoc;
uint32_t modtaboff;
uint32_t nmodtab;
uint32_t extrefsymoff;
uint32_t nextrefsyms;
uint32_t indirectsymoff;
uint32_t nindirectsyms;
uint32_t extreloff;
uint32_t nextrel;
uint32_t locreloff;
uint32_t nlocrel;
};
struct twolevel_hints_command : public load_command {
uint32_t offset;
uint32_t nhints;
};
struct segment_command_64 : public load_command {
char segname[16];
uint64_t vmaddr;
uint64_t vmsize;
uint64_t fileoff;
uint64_t filesize;
vm_prot_t maxprot;
vm_prot_t initprot;
uint32_t nsects;
uint32_t flags;
};
struct section_64 {
char sectname[16];
char segname[16];
uint64_t addr;
uint64_t size;
uint32_t offset;
uint32_t align;
uint32_t reloff;
uint32_t nreloc;
uint32_t flags;
uint32_t reserved1;
uint32_t reserved2;
uint32_t reserved3;
};
struct linkedit_data_command : public load_command {
uint32_t dataoff;
uint32_t datasize;
};
struct encryption_info_command : public load_command {
uint32_t cryptoff;
uint32_t cryptsize;
uint32_t cryptid;
};
struct dyld_info_command : public load_command {
uint32_t rebase_off;
uint32_t rebase_size;
uint32_t bind_off;
uint32_t bind_size;
uint32_t weak_bind_off;
uint32_t weak_bind_size;
uint32_t lazy_bind_off;
uint32_t lazy_bind_size;
uint32_t export_off;
uint32_t export_size;
};
struct dylib {
uint32_t name;
uint32_t timestamp;
uint32_t current_version;
uint32_t compatibility_version;
};
struct dylib_command : public load_command {
struct dylib dylib;
};
struct nlist {
int32_t n_strx;
uint8_t n_type;
uint8_t n_sect;
int16_t n_desc;
uint32_t n_value;
};
struct class_t {
uint32_t isa;
uint32_t superclass;
uint32_t cache;
uint32_t vtable;
uint32_t data;
};
struct class_ro_t {
uint32_t flags;
uint32_t instanceStart;
uint32_t instanceSize;
uint32_t ivarLayout;
uint32_t name;
uint32_t baseMethods;
uint32_t baseProtocols;
uint32_t ivars;
uint32_t weakIvarLayout;
uint32_t baseProperties;
};
struct method_t {
uint32_t name;
uint32_t types;
uint32_t imp;
};
struct property_t {
uint32_t name;
uint32_t attributes;
};
struct protocol_t {
uint32_t isa;
uint32_t name;
uint32_t protocols;
uint32_t instanceMethods;
uint32_t classMethods;
uint32_t optionalInstanceMethods;
uint32_t optionalClassMethods;
uint32_t instanceProperties;
};
struct category_t {
uint32_t name;
uint32_t cls;
uint32_t instanceMethods;
uint32_t classMethods;
uint32_t protocols;
uint32_t instanceProperties;
};
#define BIND_OPCODE_MASK 0xF0
#define BIND_IMMEDIATE_MASK 0x0F
#define BIND_OPCODE_DONE 0x00
#define BIND_OPCODE_SET_DYLIB_ORDINAL_IMM 0x10
#define BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB 0x20
#define BIND_OPCODE_SET_DYLIB_SPECIAL_IMM 0x30
#define BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM 0x40
#define BIND_OPCODE_SET_TYPE_IMM 0x50
#define BIND_OPCODE_SET_ADDEND_SLEB 0x60
#define BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 0x70
#define BIND_OPCODE_ADD_ADDR_ULEB 0x80
#define BIND_OPCODE_DO_BIND 0x90
#define BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB 0xA0
#define BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED 0xB0
#define BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB 0xC0
//------------------------------------------------------------------------------
// END THIRD-PARTY STRUCTURES
//------------------------------------------------------------------------------
// Check if two strings are equal within 16 characters.
// Used for comparing segment and section names.
static bool streq(const char x[16], const char* y) {
return strncmp(x, y, 16) == 0;
}
static long write_uleb128(FILE* f, unsigned u) {
uint8_t buf[16];
int byte_count = 0;
while (u) {
buf[byte_count++] = u | 0x80;
u >>= 7;
}
buf[byte_count-1] &= ~0x80;
fwrite(buf, byte_count, sizeof(*buf), f);
return byte_count;
}
static boost::filesystem::path remove_all_extensions(const char* the_path) {
boost::filesystem::path retval (the_path);
do {
retval = retval.stem();
} while (!retval.extension().empty());
return retval;
}
class ProgramContext;
// When dyld create the cache file, if it recognize common Objective-C strings
// and methods across different libraries, they will be coalesced. However,
// this poses a big trouble when decaching, because the references to the other
// library will become a dangling pointer. This class is to store these
// external references, and put them back in an extra section of the decached
// library.
// ("String" is a misnomer because it can also store non-strings.)
class ExtraStringRepository {
struct Entry {
const char* string;
size_t size;
uint32_t new_address;
std::vector<uint32_t> override_addresses;
};
boost::unordered_map<const char*, int> _indices;
std::vector<Entry> _entries;
size_t _total_size;
section _template;
public:
ExtraStringRepository(const char* segname, const char* sectname, uint32_t flags, uint32_t alignment) {
memset(&_template, 0, sizeof(_template));
strncpy(_template.segname, segname, 16);
strncpy(_template.sectname, sectname, 16);
_template.flags = flags;
_template.align = alignment;
}
// Insert a piece of external data referred from 'override_address' to the
// repository.
void insert(const char* string, size_t size, uint32_t override_address) {
boost::unordered_map<const char*, int>::const_iterator it = _indices.find(string);
if (it != _indices.end()) {
_entries[it->second].override_addresses.push_back(override_address);
} else {
Entry entry;
entry.string = string;
entry.size = size;
entry.new_address = this->next_vmaddr();
entry.override_addresses.push_back(override_address);
_indices.insert(std::make_pair(string, _entries.size()));
_entries.push_back(entry);
_template.size += size;
}
}
void insert(const char* string, uint32_t override_address) {
this->insert(string, strlen(string) + 1, override_address);
}
// Iterate over all external data in this repository.
template <typename Object>
void foreach_entry(const Object* self, void (Object::*action)(const char* string, size_t size, uint32_t new_address, const std::vector<uint32_t>& override_addresses) const) const {
BOOST_FOREACH(const Entry& e, _entries) {
(self->*action)(e.string, e.size, e.new_address, e.override_addresses);
}
}
void increase_size_by(size_t delta) { _template.size += delta; }
size_t total_size() const { return _template.size; }
bool has_content() const { return _template.size != 0; }
// Get the 'section' structure for the extra section this repository
// represents.
section section_template() const { return _template; }
void set_section_vmaddr(uint32_t vmaddr) { _template.addr = vmaddr; }
void set_section_fileoff(uint32_t fileoff) { _template.offset = fileoff; }
uint32_t next_vmaddr() const { return _template.addr + _template.size; }
};
class ExtraBindRepository {
struct Entry {
std::string symname;
int libord;
std::vector<std::pair<int, uint32_t> > replace_offsets;
};
boost::unordered_map<uint32_t, Entry> _entries;
public:
bool contains(uint32_t target_address) const {
return (_entries.find(target_address) != _entries.end());
}
template <typename Object>
void insert(uint32_t target_address, std::pair<int, uint32_t> replace_offset, const Object* self, void (Object::*addr_info_getter)(uint32_t addr, std::string* p_symname, int* p_libord) const) {
boost::unordered_map<uint32_t, Entry>::iterator it = _entries.find(target_address);
if (it != _entries.end()) {
it->second.replace_offsets.push_back(replace_offset);
} else {
Entry entry;
entry.replace_offsets.push_back(replace_offset);
(self->*addr_info_getter)(target_address, &entry.symname, &entry.libord);
_entries.insert(std::make_pair(target_address, entry));
}
}
long optimize_and_write(FILE* f) {
typedef boost::unordered_map<uint32_t, Entry>::value_type V;
typedef boost::unordered_map<int, std::vector<const Entry*> > M;
typedef std::pair<int, uint32_t> P;
M entries_by_libord;
BOOST_FOREACH(V& pair, _entries) {
Entry& entry = pair.second;
std::sort(entry.replace_offsets.begin(), entry.replace_offsets.end());
entries_by_libord[entry.libord].push_back(&entry);
}
fputc(BIND_OPCODE_SET_TYPE_IMM | 1, f);
long size = 1;
BOOST_FOREACH(const M::value_type& pair, entries_by_libord) {
int libord = pair.first;
if (libord < 0x10) {
unsigned char imm = libord & BIND_IMMEDIATE_MASK;
unsigned char opcode = libord < 0 ? BIND_OPCODE_SET_DYLIB_SPECIAL_IMM : BIND_OPCODE_SET_DYLIB_ORDINAL_IMM;
fputc(opcode | imm, f);
++ size;
} else {
fputc(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB, f);
size += 1 + write_uleb128(f, libord);
}
BOOST_FOREACH(const Entry* entry, pair.second) {
size_t string_len = entry->symname.size();
size += string_len + 2;
fputc(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM, f);
fwrite(entry->symname.c_str(), string_len+1, 1, f);
int segnum = -1;
uint32_t last_offset = 0;
BOOST_FOREACH(P offset, entry->replace_offsets) {
if (offset.first != segnum) {
segnum = offset.first;
last_offset = offset.second + 4;
fputc(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | segnum, f);
size += 1 + write_uleb128(f, offset.second);
} else {
uint32_t delta = offset.second - last_offset;
unsigned imm_scale = delta % 4 == 0 ? delta / 4 : ~0u;
if (imm_scale == 0) {
fputc(BIND_OPCODE_DO_BIND, f);
} else if (imm_scale < 0x10u) {
fputc(BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED | imm_scale, f);
} else {
fputc(BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB, f);
size += write_uleb128(f, delta);
}
++ size;
last_offset = offset.second + 4;
}
}
fputc(BIND_OPCODE_DO_BIND, f);
++ size;
}
}
return size;
}
};
// A simple structure which only provides services related to VM address.
class MachOFile {
protected:
const mach_header* _header;
const ProgramContext* _context;
std::vector<const segment_command*> _segments;
uint32_t _image_vmaddr;
private:
boost::unordered_map<std::string, int> _libords;
int _cur_libord;
boost::unordered_map<uint32_t, std::string> _exports;
protected:
template <typename T>
void foreach_command(void(T::*action)(const load_command* cmd)) {
const unsigned char* cur_cmd = reinterpret_cast<const unsigned char*>(_header + 1);
for (uint32_t i = 0; i < _header->ncmds; ++ i) {
const load_command* cmd = reinterpret_cast<const load_command*>(cur_cmd);
cur_cmd += cmd->cmdsize;
(static_cast<T*>(this)->*action)(cmd);
}
}
// Convert VM address to file offset of the decached file _before_ inserting
// the extra sections.
long from_vmaddr(uint32_t vmaddr) const {
BOOST_FOREACH(const segment_command* segcmd, _segments) {
if (segcmd->vmaddr <= vmaddr && vmaddr < segcmd->vmaddr + segcmd->vmsize)
return vmaddr - segcmd->vmaddr + segcmd->fileoff;
}
return -1;
}
private:
void retrieve_segments_and_libords(const load_command* cmd);
public:
// Checks if the VM address is included in the decached file _before_
// inserting the extra sections.
bool contains_address(uint32_t vmaddr) const {
BOOST_FOREACH(const segment_command* segcmd, _segments) {
if (segcmd->vmaddr <= vmaddr && vmaddr < segcmd->vmaddr + segcmd->vmsize)
return true;
}
return false;
}
MachOFile(const mach_header* header, const ProgramContext* context, uint32_t image_vmaddr = 0)
: _header(header), _context(context), _image_vmaddr(image_vmaddr), _cur_libord(0)
{
if (header->magic != 0xfeedface)
return;
this->foreach_command(&MachOFile::retrieve_segments_and_libords);
}
const mach_header* header() const { return _header; }
int libord_with_name(const char* libname) const {
boost::unordered_map<std::string, int>::const_iterator cit = _libords.find(libname);
if (cit == _libords.end())
return 0;
else
return cit->second;
}
std::string exported_symbol(uint32_t vmaddr) const {
boost::unordered_map<uint32_t, std::string>::const_iterator cit = _exports.find(vmaddr);
if (cit != _exports.end())
return cit->second;
else
return "";
}
};
// This class represents one file going to be decached.
// Decaching is performed in several phases:
// 1. Search for all Objective-C selectors and methods that point outside of
// this library, and put this into an ExtraStringRepository.
// 2. Write out the __TEXT and __DATA segments, including the data from the
// ExtraStringRepository.
// 3. Inspect the DYLD_INFO, SYMTAB and DYSYMTAB commands to collect the
// relevant parts of global __LINKEDIT segment and copy them to the output
// file.
// 4. Revisit the output file to fix the file offsets. All file offsets were
// originally pointing to locations in the cache file, but in the decached
// file these will be no longer meaningful if not fixed.
// 5. Append the extra 'section' header to the corresponding segments, if there
// are external Objective-C selectors or methods.
// 6. Go through the Objective-C sections and rewire the external references.
class DecachingFile : public MachOFile {
struct FileoffFixup {
uint32_t sourceBegin;
uint32_t sourceEnd;
int32_t negDelta;
};
struct ObjcExtraString {
const char* string;
size_t entry_size;
uint32_t new_address;
off_t override_offset;
};
struct {
long rebase_off, bind_off, weak_bind_off,
lazy_bind_off, export_off, // dyld_info
symoff, stroff, // symtab
tocoff, modtaboff, extrefsymoff,
indirectsymoff, extreloff, locreloff, // dysymtab
dataoff, // linkedit_data_command (dummy)
dataoff_cs, dataoff_ssi, dataoff_fs;
long bind_size;
int32_t strsize;
} _new_linkedit_offsets;
private:
uint32_t _linkedit_offset, _linkedit_size;
uint32_t _imageinfo_address, _imageinfo_replacement;
FILE* _f;
std::vector<FileoffFixup> _fixups;
std::vector<segment_command> _new_segments;
ExtraStringRepository _extra_text, _extra_data;
std::vector<uint32_t> _nullify_patches;
ExtraBindRepository _extra_bind;
private:
void open_file(const boost::filesystem::path& filename) {
boost::filesystem::create_directories(filename.parent_path());
_f = fopen(filename.c_str(), "wb");
if (!_f) {
perror("Error");
fprintf(stderr, "Error: Cannot write to '%s'.\n", filename.c_str());
}
}
void write_extrastr(const char* string, size_t size, uint32_t, const std::vector<uint32_t>&) const {
fwrite(string, size, 1, _f);
}
void write_segment_content(const segment_command* cmd);
ExtraStringRepository* repo_for_segname(const char* segname) {
if (!strcmp(segname, "__DATA"))
return &_extra_data;
else if (!strcmp(segname, "__TEXT"))
return &_extra_text;
return NULL;
}
template<typename T>
void fix_offset(T& fileoff) const {
if (fileoff == 0)
return;
BOOST_REVERSE_FOREACH(const FileoffFixup& fixup, _fixups) {
if (fixup.sourceBegin <= fileoff && fileoff < fixup.sourceEnd) {
fileoff -= fixup.negDelta;
return;
}
}
}
void write_real_linkedit(const load_command* cmd);
void fix_file_offsets(const load_command* cmd) {
switch (cmd->cmd) {
default:
fwrite(cmd, cmd->cmdsize, 1, _f);
break;
case LC_SEGMENT: {
segment_command segcmd = *static_cast<const segment_command*>(cmd);
if (streq(segcmd.segname, "__LINKEDIT")) {
segcmd.vmsize = _linkedit_size;
segcmd.fileoff = _linkedit_offset;
segcmd.filesize = _linkedit_size;
fwrite(&segcmd, sizeof(segcmd), 1, _f);
} else {
const ExtraStringRepository* extra_repo = this->repo_for_segname(segcmd.segname);
bool has_extra_sect = extra_repo && extra_repo->has_content();
this->fix_offset(segcmd.fileoff);
section* sects = new section[segcmd.nsects + has_extra_sect];
memcpy(sects, 1 + static_cast<const segment_command*>(cmd), segcmd.nsects * sizeof(*sects));
for (uint32_t i = 0; i < segcmd.nsects; ++ i) {
this->fix_offset(sects[i].offset);
this->fix_offset(sects[i].reloff);
}
if (has_extra_sect) {
uint32_t extra_sect_size = extra_repo->total_size();
sects[segcmd.nsects] = extra_repo->section_template();
segcmd.cmdsize += sizeof(*sects);
segcmd.vmsize += extra_sect_size;
segcmd.filesize += extra_sect_size;
segcmd.nsects += 1;
}
fwrite(&segcmd, sizeof(segcmd), 1, _f);
fwrite(sects, sizeof(*sects), segcmd.nsects, _f);
delete[] sects;
}
_new_segments.push_back(segcmd);
break;
}
case LC_SYMTAB: {
symtab_command symcmd = *static_cast<const symtab_command*>(cmd);
symcmd.symoff = _new_linkedit_offsets.symoff;
symcmd.stroff = _new_linkedit_offsets.stroff;
symcmd.strsize = _new_linkedit_offsets.strsize;
fwrite(&symcmd, sizeof(symcmd), 1, _f);
break;
}
case LC_DYSYMTAB: {
dysymtab_command dycmd = *static_cast<const dysymtab_command*>(cmd);
dycmd.tocoff = _new_linkedit_offsets.tocoff;
dycmd.modtaboff = _new_linkedit_offsets.modtaboff;
dycmd.extrefsymoff = _new_linkedit_offsets.extrefsymoff;
dycmd.indirectsymoff = _new_linkedit_offsets.indirectsymoff;
dycmd.extreloff = _new_linkedit_offsets.extreloff;
dycmd.locreloff = _new_linkedit_offsets.locreloff;
fwrite(&dycmd, sizeof(dycmd), 1, _f);
break;
}
case LC_TWOLEVEL_HINTS: {
twolevel_hints_command tlcmd = *static_cast<const twolevel_hints_command*>(cmd);
this->fix_offset(tlcmd.offset);
fwrite(&tlcmd, sizeof(tlcmd), 1, _f);
break;
}
/*
case LC_SEGMENT_64: {
segment_command_64 segcmd = *static_cast<const segment_command_64*>(cmd);
this->fix_offset(segcmd.fileoff);
fwrite(&segcmd, sizeof(segcmd), 1, _f);
section_64* sects = new section_64[segcmd.nsects];
memcpy(sects, 1 + static_cast<const segment_command_64*>(cmd), segcmd.nsects * sizeof(*sects));
for (uint32_t i = 0; i < segcmd.nsects; ++ i) {
this->fix_offset(sects[i].offset);
this->fix_offset(sects[i].reloff);
}
fwrite(sects, sizeof(*sects), segcmd.nsects, _f);
delete[] sects;
break;
}
*/
case LC_CODE_SIGNATURE:
case LC_SEGMENT_SPLIT_INFO:
case LC_FUNCTION_STARTS: {
linkedit_data_command ldcmd = *static_cast<const linkedit_data_command*>(cmd);
if (ldcmd.cmd == LC_CODE_SIGNATURE)
ldcmd.dataoff = _new_linkedit_offsets.dataoff_cs;
else if (ldcmd.cmd == LC_SEGMENT_SPLIT_INFO)
ldcmd.dataoff = _new_linkedit_offsets.dataoff_ssi;
else if (ldcmd.cmd == LC_FUNCTION_STARTS)
ldcmd.dataoff = _new_linkedit_offsets.dataoff_fs;
fwrite(&ldcmd, sizeof(ldcmd), 1, _f);
break;
}
case LC_ENCRYPTION_INFO: {
encryption_info_command eicmd = *static_cast<const encryption_info_command*>(cmd);
this->fix_offset(eicmd.cryptoff);
fwrite(&eicmd, sizeof(eicmd), 1, _f);
break;
}
case LC_DYLD_INFO:
case LC_DYLD_INFO_ONLY: {
dyld_info_command dicmd = *static_cast<const dyld_info_command*>(cmd);
dicmd.rebase_off = _new_linkedit_offsets.rebase_off;
dicmd.bind_off = _new_linkedit_offsets.bind_off;
dicmd.weak_bind_off = _new_linkedit_offsets.weak_bind_off;
dicmd.lazy_bind_off = _new_linkedit_offsets.lazy_bind_off;
dicmd.export_off = _new_linkedit_offsets.export_off;
dicmd.bind_size = _new_linkedit_offsets.bind_size;
fwrite(&dicmd, sizeof(dicmd), 1, _f);
break;
}
}
}
// Convert VM address to file offset of the decached file _after_ inserting
// the extra sections.
long from_new_vmaddr(uint32_t vmaddr) const {
std::vector<segment_command>::const_iterator nit;
std::vector<const segment_command*>::const_iterator oit;
std::vector<segment_command>::const_iterator end = _new_segments.end();
for (nit = _new_segments.begin(), oit = _segments.begin(); nit != end; ++ nit, ++ oit) {
if (nit->vmaddr <= vmaddr && vmaddr < nit->vmaddr + nit->vmsize) {
uint32_t retval = vmaddr - nit->vmaddr + nit->fileoff;
// This mess is added to solve the __DATA,__bss section issue.
// This section is zero-filled, causing the segment's vmsize
// larger than the filesize. Since the __extradat section is
// placed after the __bss section, using just the formula above
// will cause the imaginary size comes from that section to be
// included as well. The "-=" below attempts to fix it.
if (vmaddr >= (*oit)->vmaddr + (*oit)->vmsize)
retval -= (*oit)->vmsize - (*oit)->filesize;
return retval;
}
}
return -1;
}
// Get the segment number and offset from that segment given a VM address.
std::pair<int, uint32_t> segnum_and_offset(uint32_t vmaddr) const {
int i = 0;
BOOST_FOREACH(const segment_command* segcmd, _segments) {
if (segcmd->vmaddr <= vmaddr && vmaddr < segcmd->vmaddr + segcmd->vmsize)
return std::make_pair(i, vmaddr - segcmd->vmaddr);
++ i;
}
return std::make_pair(-1, ~0u);
}
template <typename T>
void prepare_patch_objc_list(uint32_t list_vmaddr, uint32_t override_vmaddr);
void prepare_objc_extrastr(const segment_command* segcmd);
void get_address_info(uint32_t vmaddr, std::string* p_name, int* p_libord) const;
void add_extlink_to(uint32_t vmaddr, uint32_t override_vmaddr);
void patch_objc_sects_callback(const char*, size_t, uint32_t new_address, const std::vector<uint32_t>& override_addresses) const {
BOOST_FOREACH(uint32_t vmaddr, override_addresses) {
long actual_offset = this->from_new_vmaddr(vmaddr);
fseek(_f, actual_offset, SEEK_SET);
fwrite(&new_address, 4, 1, _f);
}
}
void patch_objc_sects() const {
_extra_text.foreach_entry(this, &DecachingFile::patch_objc_sects_callback);
_extra_data.foreach_entry(this, &DecachingFile::patch_objc_sects_callback);
this->patch_objc_sects_callback(NULL, 0, 0, _nullify_patches);
if (_imageinfo_address) {
long actual_offset = this->from_new_vmaddr(_imageinfo_address);
fseek(_f, actual_offset, SEEK_SET);
fwrite(&_imageinfo_replacement, 4, 1, _f);
}
}
public:
DecachingFile(const boost::filesystem::path& filename, const mach_header* header, const ProgramContext* context) :
MachOFile(header, context), _imageinfo_address(0),
_extra_text("__TEXT", "__objc_extratxt", 2, 0),
_extra_data("__DATA", "__objc_extradat", 0, 2)
{
if (header->magic != 0xfeedface) {
fprintf(stderr,
"Error: Cannot dump '%s'. Only 32-bit little-endian single-file\n"
" Mach-O objects are supported.\n", filename.c_str());
return;
}
memset(&_new_linkedit_offsets, 0, sizeof(_new_linkedit_offsets));
this->open_file(filename);
if (!_f)
return;
// phase 1
BOOST_FOREACH(const segment_command* segcmd, _segments) {
ExtraStringRepository* repo = this->repo_for_segname(segcmd->segname);
if (repo)
repo->set_section_vmaddr(segcmd->vmaddr + segcmd->vmsize);
}
BOOST_FOREACH(const segment_command* segcmd, _segments)
this->prepare_objc_extrastr(segcmd);
// phase 2
BOOST_FOREACH(const segment_command* segcmd, _segments)
this->write_segment_content(segcmd);
// phase 3
_linkedit_offset = static_cast<uint32_t>(ftell(_f));
this->foreach_command(&DecachingFile::write_real_linkedit);
_linkedit_size = static_cast<uint32_t>(ftell(_f)) - _linkedit_offset;
// phase 4 & 5
fseek(_f, offsetof(mach_header, sizeofcmds), SEEK_SET);
uint32_t new_sizeofcmds = _header->sizeofcmds + (_extra_text.has_content() + _extra_data.has_content()) * sizeof(section);
fwrite(&new_sizeofcmds, sizeof(new_sizeofcmds), 1, _f);
fseek(_f, sizeof(*header), SEEK_SET);
this->foreach_command(&DecachingFile::fix_file_offsets);
// phase 6
this->patch_objc_sects();
}
~DecachingFile() {
if (_f)
fclose(_f);
}
bool is_open() const { return _f != NULL; }
};
class ProgramContext {
const char* _folder;
char* _filename;
DataFile* _f;
bool _printmode;
std::vector<boost::filesystem::path> _namefilters;
boost::unordered_map<const mach_header*, boost::filesystem::path> _already_dumped;
const dyld_cache_header* _header;
const shared_file_mapping_np* _mapping;
const dyld_cache_image_info* _images;
std::vector<MachOFile> _macho_files;
public:
ProgramContext() :
_folder("libraries"),
_filename(NULL),
_f(NULL),
_printmode(false)
{}