Skip to content

Commit

Permalink
Handle CACHE instructions in decompyle, and suppress them in disassem…
Browse files Browse the repository at this point in the history
…bly by default.

Showing CACHE instructions and the extra PyCode fields in pycdas is now
controlled by command-line arguments.
  • Loading branch information
zrax committed Jan 19, 2023
1 parent 5d855f0 commit 60e4eb6
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 32 deletions.
6 changes: 6 additions & 0 deletions ASTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2510,6 +2510,12 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
case Pyc::SETUP_ANNOTATIONS:
variable_annotations = true;
break;
case Pyc::CACHE:
/* These "fake" opcodes are used as placeholders for optimizing
certain opcodes in Python 3.11+. Since we have no need for
that during disassembly/decompilation, we can just treat these
as no-ops. */
break;
default:
fprintf(stderr, "Unsupported opcode: %s\n", Pyc::OpcodeName(opcode & 0xFF));
cleanBuild = false;
Expand Down
12 changes: 7 additions & 5 deletions bytecode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int&
}
}

void bc_disasm(PycRef<PycCode> code, PycModule* mod, int indent)
void bc_disasm(PycRef<PycCode> code, PycModule* mod, int indent, unsigned flags)
{
static const char *cmp_strings[] = {
"<", "<=", "==", "!=", ">", ">=", "in", "not in", "is", "is not",
Expand All @@ -358,12 +358,14 @@ void bc_disasm(PycRef<PycCode> code, PycModule* mod, int indent)
int opcode, operand;
int pos = 0;
while (!source.atEof()) {
int start_pos = pos;
bc_next(source, mod, opcode, operand, pos);
if (opcode == Pyc::CACHE && (flags & Pyc::DISASM_SHOW_CACHES) == 0)
continue;

for (int i=0; i<indent; i++)
fputs(" ", pyc_output);
fprintf(pyc_output, "%-7d ", pos); // Current bytecode position

bc_next(source, mod, opcode, operand, pos);
fprintf(pyc_output, "%-30s", Pyc::OpcodeName(opcode));
fprintf(pyc_output, "%-7d %-30s", start_pos, Pyc::OpcodeName(opcode));

if (opcode >= Pyc::PYC_HAVE_ARG) {
if (Pyc::IsConstArg(opcode)) {
Expand Down
7 changes: 6 additions & 1 deletion bytecode.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ enum Opcode {
PYC_INVALID_OPCODE = -1,
};

enum DisassemblyFlags {
DISASM_PYCODE_VERBOSE = 0x1,
DISASM_SHOW_CACHES = 0x2,
};

const char* OpcodeName(int opcode);
int ByteToOpcode(int maj, int min, int opcode);

Expand All @@ -32,4 +37,4 @@ bool IsCompareArg(int opcode);

void print_const(PycRef<PycObject> obj, PycModule* mod, const char* parent_f_string_quote = nullptr);
void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int& pos);
void bc_disasm(PycRef<PycCode> code, PycModule* mod, int indent);
void bc_disasm(PycRef<PycCode> code, PycModule* mod, int indent, unsigned flags);
57 changes: 31 additions & 26 deletions pycdas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@
# define PATHSEP '/'
#endif

// Set this to 1 to print extra details on PyCode objects
#define PRINT_EXTRA_PYCODE_FIELDS 0

static const char* flag_names[] = {
"CO_OPTIMIZED", "CO_NEWLOCALS", "CO_VARARGS", "CO_VARKEYWORDS",
"CO_NESTED", "CO_GENERATOR", "CO_NOFREE", "CO_COROUTINE",
Expand Down Expand Up @@ -74,7 +71,8 @@ static void iprintf(int indent, const char* fmt, ...)
va_end(varargs);
}

void output_object(PycRef<PycObject> obj, PycModule* mod, int indent)
void output_object(PycRef<PycObject> obj, PycModule* mod, int indent,
unsigned flags)
{
if (obj == NULL) {
iputs(indent, "<NULL>");
Expand Down Expand Up @@ -107,52 +105,48 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent)

iputs(indent + 1, "[Names]\n");
for (int i=0; i<codeObj->names()->size(); i++)
output_object(codeObj->names()->get(i), mod, indent + 2);
output_object(codeObj->names()->get(i), mod, indent + 2, flags);

if (mod->verCompare(1, 3) >= 0 && mod->verCompare(3, 11) < 0) {
if (mod->verCompare(3, 11) >= 0)
iputs(indent + 1, "[Locals+Names]\n");
else
iputs(indent + 1, "[Var Names]\n");
for (int i=0; i<codeObj->localNames()->size(); i++)
output_object(codeObj->localNames()->get(i), mod, indent + 2);
output_object(codeObj->localNames()->get(i), mod, indent + 2, flags);
}

#if PRINT_EXTRA_PYCODE_FIELDS
if (mod->verCompare(3, 11) >= 0) {
if (mod->verCompare(3, 11) >= 0 && (flags & Pyc::DISASM_PYCODE_VERBOSE) != 0) {
iputs(indent + 1, "[Locals+Kinds]\n");
output_object(codeObj->localKinds().cast<PycObject>(), mod, indent + 2);
output_object(codeObj->localKinds().cast<PycObject>(), mod, indent + 2, flags);
}
#endif

if (mod->verCompare(2, 1) >= 0 && mod->verCompare(3, 11) < 0) {
iputs(indent + 1, "[Free Vars]\n");
for (int i=0; i<codeObj->freeVars()->size(); i++)
output_object(codeObj->freeVars()->get(i), mod, indent + 2);
output_object(codeObj->freeVars()->get(i), mod, indent + 2, flags);

iputs(indent + 1, "[Cell Vars]\n");
for (int i=0; i<codeObj->cellVars()->size(); i++)
output_object(codeObj->cellVars()->get(i), mod, indent + 2);
output_object(codeObj->cellVars()->get(i), mod, indent + 2, flags);
}

iputs(indent + 1, "[Constants]\n");
for (int i=0; i<codeObj->consts()->size(); i++)
output_object(codeObj->consts()->get(i), mod, indent + 2);
output_object(codeObj->consts()->get(i), mod, indent + 2, flags);

iputs(indent + 1, "[Disassembly]\n");
bc_disasm(codeObj, mod, indent + 2);
bc_disasm(codeObj, mod, indent + 2, flags);

#if PRINT_EXTRA_PYCODE_FIELDS
if (mod->verCompare(1, 5) >= 0) {
if (mod->verCompare(1, 5) >= 0 && (flags & Pyc::DISASM_PYCODE_VERBOSE) != 0) {
iputs(indent + 1, "[Line Number Table]\n");
output_object(codeObj->lnTable().cast<PycObject>(), mod, indent + 2);
output_object(codeObj->lnTable().cast<PycObject>(), mod, indent + 2, flags);
}

if (mod->verCompare(3, 11) >= 0) {
if (mod->verCompare(3, 11) >= 0 && (flags & Pyc::DISASM_PYCODE_VERBOSE) != 0) {
iputs(indent + 1, "[Exception Table]\n");
output_object(codeObj->exceptTable().cast<PycObject>(), mod, indent + 2);
output_object(codeObj->exceptTable().cast<PycObject>(), mod, indent + 2, flags);
}
#endif
}
break;
case PycObject::TYPE_STRING:
Expand Down Expand Up @@ -182,15 +176,15 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent)
{
iputs(indent, "(\n");
for (const auto& val : obj.cast<PycTuple>()->values())
output_object(val, mod, indent + 1);
output_object(val, mod, indent + 1, flags);
iputs(indent, ")\n");
}
break;
case PycObject::TYPE_LIST:
{
iputs(indent, "[\n");
for (const auto& val : obj.cast<PycList>()->values())
output_object(val, mod, indent + 1);
output_object(val, mod, indent + 1, flags);
iputs(indent, "]\n");
}
break;
Expand All @@ -202,8 +196,8 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent)
PycDict::key_t::const_iterator ki = keys.begin();
PycDict::value_t::const_iterator vi = values.begin();
while (ki != keys.end()) {
output_object(*ki, mod, indent + 1);
output_object(*vi, mod, indent + 2);
output_object(*ki, mod, indent + 1, flags);
output_object(*vi, mod, indent + 2, flags);
++ki, ++vi;
}
iputs(indent, "}\n");
Expand All @@ -213,7 +207,7 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent)
{
iputs(indent, "{\n");
for (const auto& val : obj.cast<PycSet>()->values())
output_object(val, mod, indent + 1);
output_object(val, mod, indent + 1, flags);
iputs(indent, "}\n");
}
break;
Expand Down Expand Up @@ -259,6 +253,8 @@ int main(int argc, char* argv[])
const char* infile = nullptr;
bool marshalled = false;
const char* version = nullptr;
unsigned disasm_flags = 0;

for (int arg = 1; arg < argc; ++arg) {
if (strcmp(argv[arg], "-o") == 0) {
if (arg + 1 < argc) {
Expand All @@ -283,14 +279,23 @@ int main(int argc, char* argv[])
fputs("Option '-v' requires a version\n", stderr);
return 1;
}
} else if (strcmp(argv[arg], "--pycode-extra") == 0) {
disasm_flags |= Pyc::DISASM_PYCODE_VERBOSE;
} else if (strcmp(argv[arg], "--show-caches") == 0) {
disasm_flags |= Pyc::DISASM_SHOW_CACHES;
} else if (strcmp(argv[arg], "--help") == 0 || strcmp(argv[arg], "-h") == 0) {
fprintf(stderr, "Usage: %s [options] input.pyc\n\n", argv[0]);
fputs("Options:\n", stderr);
fputs(" -o <filename> Write output to <filename> (default: stdout)\n", stderr);
fputs(" -c Specify loading a compiled code object. Requires the version to be set\n", stderr);
fputs(" -v <x.y> Specify a Python version for loading a compiled code object\n", stderr);
fputs(" --pycode-extra Show extra fields in PyCode object dumps\n", stderr);
fputs(" --show-caches Don't suprress CACHE instructions in Python 3.11+ disassembly\n", stderr);
fputs(" --help Show this help text and then exit\n", stderr);
return 0;
} else if (argv[arg][0] == '-') {
fprintf(stderr, "Error: Unrecognized argument %s\n", argv[arg]);
return 1;
} else {
infile = argv[arg];
}
Expand Down Expand Up @@ -329,7 +334,7 @@ int main(int argc, char* argv[])
fprintf(pyc_output, "%s (Python %d.%d%s)\n", dispname, mod.majorVer(), mod.minorVer(),
(mod.majorVer() < 3 && mod.isUnicode()) ? " -U" : "");
try {
output_object(mod.code().try_cast<PycObject>(), &mod, 0);
output_object(mod.code().try_cast<PycObject>(), &mod, 0, disasm_flags);
} catch (std::exception& ex) {
fprintf(stderr, "Error disassembling %s: %s\n", infile, ex.what());
return 1;
Expand Down

0 comments on commit 60e4eb6

Please sign in to comment.