Skip to content

Commit

Permalink
Merge branch 'zrax:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
merdw authored Aug 5, 2024
2 parents 7500547 + c925daf commit d4a93ab
Show file tree
Hide file tree
Showing 25 changed files with 186 additions and 76 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/linux-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ jobs:
run: |
mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=Debug ..
make -j2
make -j4
- name: Test
run: |
cd build
make check
make check JOBS=4
1 change: 1 addition & 0 deletions .github/workflows/msvc-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ jobs:
bash.exe ..\..\tests\all_tests.sh
env:
PYTHON_EXE: python.exe
JOBS: 4

- name: Upload artifact
uses: actions/upload-artifact@v3
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
*.kdev4
/.kdev4
__pycache__
tests-out
4 changes: 3 additions & 1 deletion ASTNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,9 @@ class ASTFormattedValue : public ASTNode {
STR = 1,
REPR = 2,
ASCII = 3,
FMTSPEC = 4
CONVERSION_MASK = 0x03,

HAVE_FMT_SPEC = 4,
};

ASTFormattedValue(PycRef<ASTNode> val, ConversionFlag conversion,
Expand Down
64 changes: 33 additions & 31 deletions ASTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -978,29 +978,14 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
case Pyc::FORMAT_VALUE_A:
{
auto conversion_flag = static_cast<ASTFormattedValue::ConversionFlag>(operand);
switch (conversion_flag) {
case ASTFormattedValue::ConversionFlag::NONE:
case ASTFormattedValue::ConversionFlag::STR:
case ASTFormattedValue::ConversionFlag::REPR:
case ASTFormattedValue::ConversionFlag::ASCII:
{
auto val = stack.top();
stack.pop();
stack.push(new ASTFormattedValue(val, conversion_flag, nullptr));
}
break;
case ASTFormattedValue::ConversionFlag::FMTSPEC:
{
auto format_spec = stack.top();
stack.pop();
auto val = stack.top();
stack.pop();
stack.push(new ASTFormattedValue(val, conversion_flag, format_spec));
}
break;
default:
fprintf(stderr, "Unsupported FORMAT_VALUE_A conversion flag: %d\n", operand);
PycRef<ASTNode> format_spec = nullptr;
if (conversion_flag & ASTFormattedValue::HAVE_FMT_SPEC) {
format_spec = stack.top();
stack.pop();
}
auto val = stack.top();
stack.pop();
stack.push(new ASTFormattedValue(val, conversion_flag, format_spec));
}
break;
case Pyc::GET_AWAITABLE:
Expand Down Expand Up @@ -1892,6 +1877,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
// Ignore
break;
case Pyc::SETUP_WITH_A:
case Pyc::WITH_EXCEPT_START:
{
PycRef<ASTBlock> withblock = new ASTWithBlock(pos+operand);
blocks.push(withblock);
Expand Down Expand Up @@ -2471,6 +2457,24 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
case Pyc::GEN_START_A:
stack.pop();
break;
case Pyc::SWAP_A:
{
unpack = operand;
ASTTuple::value_t values;
ASTTuple::value_t next_tuple;
values.resize(operand);
for (int i = 0; i < operand; i++) {
values[operand - i - 1] = stack.top();
stack.pop();
}
auto tup = new ASTTuple(values);
tup->setRequireParens(false);
auto next_tup = new ASTTuple(next_tuple);
next_tup->setRequireParens(false);
stack.push(tup);
stack.push(next_tup);
}
break;
default:
fprintf(stderr, "Unsupporteddwhere opcode: %s\n", Pyc::OpcodeName(opcode & 0xFF));
cleanBuild = false;
Expand Down Expand Up @@ -2648,23 +2652,21 @@ void print_formatted_value(PycRef<ASTFormattedValue> formatted_value, PycModule*
pyc_output << "{";
print_src(formatted_value->val(), mod, pyc_output);

switch (formatted_value->conversion()) {
case ASTFormattedValue::ConversionFlag::NONE:
switch (formatted_value->conversion() & ASTFormattedValue::CONVERSION_MASK) {
case ASTFormattedValue::NONE:
break;
case ASTFormattedValue::ConversionFlag::STR:
case ASTFormattedValue::STR:
pyc_output << "!s";
break;
case ASTFormattedValue::ConversionFlag::REPR:
case ASTFormattedValue::REPR:
pyc_output << "!r";
break;
case ASTFormattedValue::ConversionFlag::ASCII:
case ASTFormattedValue::ASCII:
pyc_output << "!a";
break;
case ASTFormattedValue::ConversionFlag::FMTSPEC:
}
if (formatted_value->conversion() & ASTFormattedValue::HAVE_FMT_SPEC) {
pyc_output << ":" << formatted_value->format_spec().cast<ASTObject>()->object().cast<PycString>()->value();
break;
default:
fprintf(stderr, "Unsupported NODE_FORMATTEDVALUE conversion flag: %d\n", formatted_value->conversion());
}
pyc_output << "}";
}
Expand Down
3 changes: 2 additions & 1 deletion README.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ https://github.com/zrax/pycdc
* Build the generated project or makefile
* For projects (e.g. MSVC), open the generated project file and build it
* For makefiles, just run `make`
* To run tests (on \*nix or MSYS), run `make check`
* To run tests (on \*nix or MSYS), run `make check JOBS=4` (optional
`FILTER=xxxx` to run only certain tests)

## Usage
**To run pycdas**, the PYC Disassembler:
Expand Down
19 changes: 18 additions & 1 deletion bytecode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ void print_const(std::ostream& pyc_output, PycRef<PycObject> obj, PycModule* mod
formatted_print(pyc_output, "%d", obj.cast<PycInt>()->value());
break;
case PycObject::TYPE_LONG:
formatted_print(pyc_output, "%s", obj.cast<PycLong>()->repr().c_str());
formatted_print(pyc_output, "%s", obj.cast<PycLong>()->repr(mod).c_str());
break;
case PycObject::TYPE_FLOAT:
formatted_print(pyc_output, "%s", obj.cast<PycFloat>()->value());
Expand Down Expand Up @@ -331,6 +331,11 @@ void bc_disasm(std::ostream& pyc_output, PycRef<PycCode> code, PycModule* mod,
};
static const size_t intrinsic2_names_len = sizeof(intrinsic2_names) / sizeof(intrinsic2_names[0]);

static const char *format_value_names[] = {
"FVC_NONE", "FVC_STR", "FVC_REPR", "FVC_ASCII",
};
static const size_t format_value_names_len = sizeof(format_value_names) / sizeof(format_value_names[0]);

PycBuffer source(code->code()->value(), code->code()->length());

int opcode, operand;
Expand Down Expand Up @@ -530,6 +535,18 @@ void bc_disasm(std::ostream& pyc_output, PycRef<PycCode> code, PycModule* mod,
else
formatted_print(pyc_output, "%d (UNKNOWN)", operand);
break;
case Pyc::FORMAT_VALUE_A:
{
auto conv = static_cast<size_t>(operand & 0x03);
const char *flag = (operand & 0x04) ? " | FVS_HAVE_SPEC" : "";
if (conv < format_value_names_len) {
formatted_print(pyc_output, "%d (%s%s)", operand,
format_value_names[conv], flag);
} else {
formatted_print(pyc_output, "%d (UNKNOWN)", operand);
}
}
break;
default:
formatted_print(pyc_output, "%d", operand);
break;
Expand Down
2 changes: 1 addition & 1 deletion bytecode_ops.inl
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ OPCODE_A(BUILD_MAP_UNPACK_WITH_CALL) // Python 3.5 A=(count
OPCODE_A(BUILD_TUPLE_UNPACK) // Python 3.5 - 3.8 A=count
OPCODE_A(BUILD_SET_UNPACK) // Python 3.5 - 3.8 A=count
OPCODE_A(SETUP_ASYNC_WITH) // Python 3.5 - 3.10 rel jmp +A
OPCODE_A(FORMAT_VALUE) // Python 3.6 -> A=conversion_type
OPCODE_A(FORMAT_VALUE) // Python 3.6 -> A=(conversion_type&0x3)+(flags)
OPCODE_A(BUILD_CONST_KEY_MAP) // Python 3.6 -> A=count
OPCODE_A(BUILD_STRING) // Python 3.6 -> A=count
OPCODE_A(BUILD_TUPLE_UNPACK_WITH_CALL) // Python 3.6 - 3.8 A=count
Expand Down
7 changes: 7 additions & 0 deletions pyc_code.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@ void PycCode::load(PycData* stream, PycModule* mod)
else
m_flags = 0;

if (mod->verCompare(3, 8) < 0) {
// Remap flags to new values introduced in 3.8
if (m_flags & 0xF0000000)
throw std::runtime_error("Cannot remap unexpected flags");
m_flags = (m_flags & 0xFFFF) | ((m_flags & 0xFFF0000) << 4);
}

m_code = LoadObject(stream, mod).cast<PycString>();
m_consts = LoadObject(stream, mod).cast<PycSequence>();
m_names = LoadObject(stream, mod).cast<PycSequence>();
Expand Down
40 changes: 23 additions & 17 deletions pyc_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,29 @@ class PycCode : public PycObject {
public:
typedef std::vector<PycRef<PycString>> globals_t;
enum CodeFlags {
CO_OPTIMIZED = 0x1,
CO_NEWLOCALS = 0x2,
CO_VARARGS = 0x4,
CO_VARKEYWORDS = 0x8,
CO_NESTED = 0x10,
CO_GENERATOR = 0x20,
CO_NOFREE = 0x40,
CO_COROUTINE = 0x80,
CO_ITERABLE_COROUTINE = 0x100,
CO_GENERATOR_ALLOWED = 0x1000,
CO_FUTURE_DIVISION = 0x2000,
CO_FUTURE_ABSOLUTE_IMPORT = 0x4000,
CO_FUTURE_WITH_STATEMENT = 0x8000,
CO_FUTURE_PRINT_FUNCTION = 0x10000,
CO_FUTURE_UNICODE_LITERALS = 0x20000,
CO_FUTURE_BARRY_AS_BDFL = 0x40000,
CO_FUTURE_GENERATOR_STOP = 0x80000,
CO_OPTIMIZED = 0x1, // 1.3 ->
CO_NEWLOCALS = 0x2, // 1.3 ->
CO_VARARGS = 0x4, // 1.3 ->
CO_VARKEYWORDS = 0x8, // 1.3 ->
CO_NESTED = 0x10, // 2.1 ->
CO_GENERATOR = 0x20, // 2.2 ->
CO_NOFREE = 0x40, // 2.3 ->
CO_COROUTINE = 0x80, // 3.5 ->
CO_ITERABLE_COROUTINE = 0x100, // 3.5 ->
CO_ASYNC_GENERATOR = 0x200, // 3.6 ->
CO_GENERATOR_ALLOWED = 0x1000, // 2.3 only

// The FUTURE flags are shifted left 4 bits starting from Python 3.8
// Older versions are automatically mapped to the new values in load()
CO_FUTURE_DIVISION = 0x20000, // 2.3 - 2.7, 3.1 ->
CO_FUTURE_ABSOLUTE_IMPORT = 0x40000, // 2.5 - 2.7, 3.1 ->
CO_FUTURE_WITH_STATEMENT = 0x80000, // 2.5 - 2.7, 3.1 ->
CO_FUTURE_PRINT_FUNCTION = 0x100000, // 2.6 - 2.7, 3.1 ->
CO_FUTURE_UNICODE_LITERALS = 0x200000, // 2.6 - 2.7, 3.1 ->
CO_FUTURE_BARRY_AS_BDFL = 0x400000, // 3.1 ->
CO_FUTURE_GENERATOR_STOP = 0x800000, // 3.5 ->
CO_FUTURE_ANNOTATIONS = 0x1000000, // 3.7 ->
CO_NO_MONITORING_EVENTS = 0x2000000, // 3.13 ->
};

PycCode(int type = TYPE_CODE)
Expand Down
7 changes: 4 additions & 3 deletions pyc_numeric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,13 @@ bool PycLong::isEqual(PycRef<PycObject> obj) const
return true;
}

std::string PycLong::repr() const
std::string PycLong::repr(PycModule* mod) const
{
// Longs are printed as hex, since it's easier (and faster) to convert
// arbitrary-length integers to a power of two than an arbitrary base

if (m_size == 0)
return "0x0L";
return (mod->verCompare(3, 0) >= 0) ? "0x0" : "0x0L";

// Realign to 32 bits, since Python uses only 15
std::vector<unsigned> bits;
Expand Down Expand Up @@ -90,7 +90,8 @@ std::string PycLong::repr() const
aptr += snprintf(aptr, 9, "%X", *iter++);
while (iter != bits.rend())
aptr += snprintf(aptr, 9, "%08X", *iter++);
*aptr++ = 'L';
if (mod->verCompare(3, 0) < 0)
*aptr++ = 'L';
*aptr = 0;
return accum;
}
Expand Down
2 changes: 1 addition & 1 deletion pyc_numeric.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class PycLong : public PycObject {
int size() const { return m_size; }
const std::vector<int>& value() const { return m_value; }

std::string repr() const;
std::string repr(PycModule* mod) const;

private:
int m_size;
Expand Down
20 changes: 12 additions & 8 deletions pycdas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,11 @@ static const char* flag_names[] = {
"CO_OPTIMIZED", "CO_NEWLOCALS", "CO_VARARGS", "CO_VARKEYWORDS",
"CO_NESTED", "CO_GENERATOR", "CO_NOFREE", "CO_COROUTINE",
"CO_ITERABLE_COROUTINE", "<0x200>", "<0x400>", "<0x800>",
"CO_GENERATOR_ALLOWED", "CO_FUTURE_DIVISION",
"CO_FUTURE_ABSOLUTE_IMPORT", "CO_FUTURE_WITH_STATEMENT",
"CO_FUTURE_PRINT_FUNCTION", "CO_FUTURE_UNICODE_LITERALS",
"CO_FUTURE_BARRY_AS_BDFL", "CO_FUTURE_GENERATOR_STOP",
"<0x100000>", "<0x200000>", "<0x400000>", "<0x800000>",
"<0x1000000>", "<0x2000000>", "<0x4000000>", "<0x8000000>",
"CO_GENERATOR_ALLOWED", "<0x2000>", "<0x4000>", "<0x8000>",
"<0x10000>", "CO_FUTURE_DIVISION", "CO_FUTURE_ABSOLUTE_IMPORT", "CO_FUTURE_WITH_STATEMENT",
"CO_FUTURE_PRINT_FUNCTION", "CO_FUTURE_UNICODE_LITERALS", "CO_FUTURE_BARRY_AS_BDFL",
"CO_FUTURE_GENERATOR_STOP",
"CO_FUTURE_ANNOTATIONS", "CO_NO_MONITORING_EVENTS", "<0x4000000>", "<0x8000000>",
"<0x10000000>", "<0x20000000>", "<0x40000000>", "<0x80000000>"
};

Expand Down Expand Up @@ -102,7 +101,12 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent,
if (mod->verCompare(1, 5) >= 0)
iprintf(pyc_output, indent + 1, "Stack Size: %d\n", codeObj->stackSize());
if (mod->verCompare(1, 3) >= 0) {
iprintf(pyc_output, indent + 1, "Flags: 0x%08X", codeObj->flags());
unsigned int orig_flags = codeObj->flags();
if (mod->verCompare(3, 8) < 0) {
// Remap flags back to the value stored in the PyCode object
orig_flags = (orig_flags & 0xFFFF) | ((orig_flags & 0xFFF00000) >> 4);
}
iprintf(pyc_output, indent + 1, "Flags: 0x%08X", orig_flags);
print_coflags(codeObj->flags(), pyc_output);
}

Expand Down Expand Up @@ -223,7 +227,7 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent,
iprintf(pyc_output, indent, "%d\n", obj.cast<PycInt>()->value());
break;
case PycObject::TYPE_LONG:
iprintf(pyc_output, indent, "%s\n", obj.cast<PycLong>()->repr().c_str());
iprintf(pyc_output, indent, "%s\n", obj.cast<PycLong>()->repr(mod).c_str());
break;
case PycObject::TYPE_FLOAT:
iprintf(pyc_output, indent, "%s\n", obj.cast<PycFloat>()->value());
Expand Down
15 changes: 6 additions & 9 deletions tests/all_tests.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
#!/bin/bash
set -e

srcdir="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)"
jobs=${JOBS:-4}
filter=${FILTER:-""}

test_status=0
test_files=( "$srcdir"/tests/tokenized/*.txt )
for tf in "${test_files[@]}"; do
test_name="$(basename "$tf")"
test_name="${test_name%.txt}"
"$srcdir"/tests/decompyle_test.sh $test_name tests || test_status=1
done

exit $test_status
find "${srcdir}/tests/tokenized" -type f -name '*.txt' -a -name "*${filter}*" -print0 | \
xargs -0 -I '{}' -P $jobs \
bash -c 'o=$('"$srcdir"'/tests/decompyle_test.sh "$(basename -s .txt "{}")" tests-out) r=$?; echo "$o"; exit $r'
Binary file modified tests/compiled/f-string.3.7.pyc
Binary file not shown.
Binary file added tests/compiled/swap.3.11.pyc
Binary file not shown.
Binary file added tests/compiled/test_integers_py3.3.12.pyc
Binary file not shown.
Binary file added tests/compiled/test_integers_py3.3.5.pyc
Binary file not shown.
3 changes: 2 additions & 1 deletion tests/input/f-string.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@
print(f'''some {{braces}} {"inner literal: {braces} {{double braces}}"}''')
print(f'''f-string dict {some_dict[2]} and {{function call in expression}}: {max([1,20,3])}''')
print(f'{(lambda x: x*2)(3)}')
print(f'{var3!s:4.5}')
msg = (
f'a {var1}'
f'cool'
f'multiline {var2}\n'
f'f-string {var3}'
)
print(f'{now:%Y-%m-%d %H:%M}')
print(f'{now:%Y-%m-%d %H:%M}')
12 changes: 12 additions & 0 deletions tests/input/swap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
def SWAP():
my_array = [
1,
2,
3,
4,
5,
6,
8]
i = 1
j = 3
my_array[i], my_array[j], my_array[2] = my_array[j], my_array[i], my_array[4]
Loading

0 comments on commit d4a93ab

Please sign in to comment.