pycを読む(スクリプト味)
前回はpycを低レベルな命令に変換して読む方法を説明した.
あれはあれで読めはするが,アセンブリ言語と同様の読みづらさはある.
今回は直接pyc
をpythonのスクリプトまで変換する方法を紹介する.
変換するライブラリは複数あるが,ここでは uncompyle
を使った.
結論として,精度はいい感じ.pycにソース保護を期待するのはやめましょう.
uncompyle
python byte-code decompiler.
uncompyle2
とuncompyle6
がある.
前者は2系サポート,後者は2・3系両方で使える.
環境
各種バージョン
以下のとおり.
$ uname -a Linux poppycompass 4.18.6-arch1-1-ARCH #1 SMP PREEMPT Wed Sep 5 11:54:09 UTC 2018 x86_64 GNU/Linux $ python2 Python 2.7.15 (default, Jun 27 2018, 13:05:28) [GCC 8.1.1 20180531] on linux2
Arch Linuxの64ビット版.
つまりただのLinux,pythonは2系.
環境作成
$ virtualenv2 ENV $ . ENV/bin/activate $ pip install uncompyle
サンプル
単純なやつ
適当に組んだ.
# sample.py import string def add(a, b): return a+b a = "abc" b = "def" c = "ghi" d = a + b + c sum = 0 for i in range(100): sum += 0 add(1,2)
pyc
を生成して,戻してみる.
$ python2 -m compileall sample.py # pyc生成 Compiling sample.py ... $ ENV/bin/uncompyle6 ./sample.pyc # uncompyleで変換 # uncompyle6 version 3.2.3 # Python bytecode 2.7 (62211) # Decompiled from: Python 2.7.15 (default, Jun 27 2018, 13:05:28) # [GCC 8.1.1 20180531] # Embedded file name: sample.py # Compiled at: 2018-09-26 06:49:02 import string def add(a, b): return a + b a = 'abc' b = 'def' c = 'ghi' d = a + b + c sum = 0 for i in range(100): sum += 0 add(1, 2) # okay decompiling ./sample.pyc
見た感じ,完璧に戻されている.
まともなやつ
簡単なやつだけじゃつまらないから,Oxyry Python Obfuscatorから取ってきたNQueensで試してみる.
オリジナルコードは以下.
__all__ = [] class NQueens: """Generate all valid solutions for the n queens puzzle""" def __init__(self, size): # Store the puzzle (problem) size and the number of valid solutions self.__size = size self.__solutions = 0 self.__solve() def __solve(self): """Solve the n queens puzzle and print the number of solutions""" positions = [-1] * self.__size self.__put_queen(positions, 0) print("Found", self.__solutions, "solutions.") def __put_queen(self, positions, target_row): """ Try to place a queen on target_row by checking all N possible cases. If a valid place is found the function calls itself trying to place a queen on the next row until all N queens are placed on the NxN board. """ # Base (stop) case - all N rows are occupied if target_row == self.__size: self.__show_full_board(positions) self.__solutions += 1 else: # For all N columns positions try to place a queen for column in range(self.__size): # Reject all invalid positions if self.__check_place(positions, target_row, column): positions[target_row] = column self.__put_queen(positions, target_row + 1) def __check_place(self, positions, ocuppied_rows, column): """ Check if a given position is under attack from any of the previously placed queens (check column and diagonal positions) """ for i in range(ocuppied_rows): if positions[i] == column or \ positions[i] - i == column - ocuppied_rows or \ positions[i] + i == column + ocuppied_rows: return False return True def __show_full_board(self, positions): """Show the full NxN board""" for row in range(self.__size): line = "" for column in range(self.__size): if positions[row] == column: line += "Q " else: line += ". " print(line) print("\n") def __show_short_board(self, positions): """ Show the queens positions on the board in compressed form, each number represent the occupied column position in the corresponding row. """ line = "" for i in range(self.__size): line += str(positions[i]) + " " print(line) def main(): """Initialize and solve the n queens puzzle""" NQueens(8) if __name__ == "__main__": # execute only if run as a script main()
戻した結果
__all__ = [] class NQueens: """Generate all valid solutions for the n queens puzzle""" def __init__(self, size): self.__size = size self.__solutions = 0 self.__solve() def __solve(self): """Solve the n queens puzzle and print the number of solutions""" positions = [ -1] * self.__size self.__put_queen(positions, 0) print ('Found', self.__solutions, 'solutions.') def __put_queen(self, positions, target_row): """ Try to place a queen on target_row by checking all N possible cases. If a valid place is found the function calls itself trying to place a queen on the next row until all N queens are placed on the NxN board. """ if target_row == self.__size: self.__show_full_board(positions) self.__solutions += 1 else: for column in range(self.__size): if self.__check_place(positions, target_row, column): positions[target_row] = column self.__put_queen(positions, target_row + 1) def __check_place(self, positions, ocuppied_rows, column): """ Check if a given position is under attack from any of the previously placed queens (check column and diagonal positions) """ for i in range(ocuppied_rows): if positions[i] == column or positions[i] - i == column - ocuppied_rows or positions[i] + i == column + ocuppied_rows: return False return True def __show_full_board(self, positions): """Show the full NxN board""" for row in range(self.__size): line = '' for column in range(self.__size): if positions[row] == column: line += 'Q ' else: line += '. ' print line print '\n' def __show_short_board(self, positions): """ Show the queens positions on the board in compressed form, each number represent the occupied column position in the corresponding row. """ line = '' for i in range(self.__size): line += str(positions[i]) + ' ' print line def main(): """Initialize and solve the n queens puzzle""" for i in range(self.__size): line += str(positions[i]) + ' ' print line def main(): NQueens(8) if __name__ == '__main__': main()
やはり,記述はほぼ完璧に戻されている.
面白いのは,"""
で付けたコメントアウトは復元でき,#
で付けたものは復元できなかったこと.
可読性のためにつける\
も消える.そりゃそんな情報残すわけ無いか.
難読化すると・・・?
さっきのやつをOxyry Python Obfuscatorで難読化した結果も試してみる.
""#line:4 __all__ =[]#line:6 class O0000OO0000OOO0OO :#line:8 ""#line:9 def __init__ (O00OO0O0O0O00OO00 ,O00OO0OO00OOO00OO ):#line:11 O00OO0O0O0O00OO00 .__OO00OO0OO00OOO00O =O00OO0OO00OOO00OO #line:13 O00OO0O0O0O00OO00 .__O00OOO00O0O000000 =0 #line:14 O00OO0O0O0O00OO00 .__O0OO00000O0O0O0O0 ()#line:15 def __O0OO00000O0O0O0O0 (OOOOO0O00OO0O0O0O ):#line:17 ""#line:18 O0OO0000OO0OO0000 =[-1 ]*OOOOO0O00OO0O0O0O .__OO00OO0OO00OOO00O #line:19 OOOOO0O00OO0O0O0O .__OO0OOOO0000O0O00O (O0OO0000OO0OO0000 ,0 )#line:20 print ("Found",OOOOO0O00OO0O0O0O .__O00OOO00O0O000000 ,"solutions.")#line:21 def __OO0OOOO0000O0O00O (O0OO00O0000O000O0 ,O0OOOOO0OOOOO000O ,OO0OOOO00OO00OOO0 ):#line:23 ""#line:28 if OO0OOOO00OO00OOO0 ==O0OO00O0000O000O0 .__OO00OO0OO00OOO00O :#line:30 O0OO00O0000O000O0 .__OOO0OO0O0000O00OO (O0OOOOO0OOOOO000O )#line:31 O0OO00O0000O000O0 .__O00OOO00O0O000000 +=1 #line:32 else :#line:33 for OO000O0OOOOO0O0OO in range (O0OO00O0000O000O0 .__OO00OO0OO00OOO00O ):#line:35 if O0OO00O0000O000O0 .__O00O0O00000O0O00O (O0OOOOO0OOOOO000O ,OO0OOOO00OO00OOO0 ,OO000O0OOOOO0O0OO ):#line:37 O0OOOOO0OOOOO000O [OO0OOOO00OO00OOO0 ]=OO000O0OOOOO0O0OO #line:38 O0OO00O0000O000O0 .__OO0OOOO0000O0O00O (O0OOOOO0OOOOO000O ,OO0OOOO00OO00OOO0 +1 )#line:39 def __O00O0O00000O0O00O (OO0OOO0OOO0O0OO0O ,O00000O0O00OOOOOO ,O00O0O000000OOOOO ,OOOO00OOO000OO0OO ):#line:42 ""#line:46 for OOO000O0OO0O00000 in range (O00O0O000000OOOOO ):#line:47 if O00000O0O00OOOOOO [OOO000O0OO0O00000 ]==OOOO00OOO000OO0OO or O00000O0O00OOOOOO [OOO000O0OO0O00000 ]-OOO000O0OO0O00000 ==OOOO00OOO000OO0OO -O00O0O000000OOOOO or O00000O0O00OOOOOO [OOO000O0OO0O00000 ]+OOO000O0OO0O00000 ==OOOO00OOO000OO0OO +O00O0O000000OOOOO :#line:50 return False #line:52 return True #line:53 def __OOO0OO0O0000O00OO (OOOO0OOO0OOO0OO00 ,OO0OO0O00OOO00OO0 ):#line:55 ""#line:56 for O0OOO0O0O0OO00O0O in range (OOOO0OOO0OOO0OO00 .__OO00OO0OO00OOO00O ):#line:57 OOO00OO0OOOOOOOOO =""#line:58 for OO00O0O0OOO0O0000 in range (OOOO0OOO0OOO0OO00 .__OO00OO0OO00OOO00O ):#line:59 if OO0OO0O00OOO00OO0 [O0OOO0O0O0OO00O0O ]==OO00O0O0OOO0O0000 :#line:60 OOO00OO0OOOOOOOOO +="Q "#line:61 else :#line:62 OOO00OO0OOOOOOOOO +=". "#line:63 print (OOO00OO0OOOOOOOOO )#line:64 print ("\n")#line:65 def __OO0O00O0O0OOO0OOO (OO0OO000O00O0OO00 ,O00O00O00OO0O0OOO ):#line:67 ""#line:71 OOO000OOO00OO0OOO =""#line:72 for O00OOO000OO0O00OO in range (OO0OO000O00O0OO00 .__OO00OO0OO00OOO00O ):#line:73 OOO000OOO00OO0OOO +=str (O00O00O00OO0O0OOO [O00OOO000OO0O00OO ])+" "#line:74 print (OOO000OOO00OO0OOO )#line:75 def OOO0O0OO0OOO000OO ():#line:77 ""#line:78 O0000OO0000OOO0OO (8 )#line:79 if __name__ =="__main__":#line:81 OOO0O0OO0OOO000OO ()#line:83
戻す
__all__ = [] class O0000OO0000OOO0OO: """""" def __init__(O00OO0O0O0O00OO00, O00OO0OO00OOO00OO): O00OO0O0O0O00OO00.__OO00OO0OO00OOO00O = O00OO0OO00OOO00OO O00OO0O0O0O00OO00.__O00OOO00O0O000000 = 0 O00OO0O0O0O00OO00.__O0OO00000O0O0O0O0() def __O0OO00000O0O0O0O0(OOOOO0O00OO0O0O0O): """""" O0OO0000OO0OO0000 = [ -1] * OOOOO0O00OO0O0O0O.__OO00OO0OO00OOO00O OOOOO0O00OO0O0O0O.__OO0OOOO0000O0O00O(O0OO0000OO0OO0000, 0) print ('Found', OOOOO0O00OO0O0O0O.__O00OOO00O0O000000, 'solutions.') def __OO0OOOO0000O0O00O(O0OO00O0000O000O0, O0OOOOO0OOOOO000O, OO0OOOO00OO00OOO0): """""" if OO0OOOO00OO00OOO0 == O0OO00O0000O000O0.__OO00OO0OO00OOO00O: O0OO00O0000O000O0.__OOO0OO0O0000O00OO(O0OOOOO0OOOOO000O) O0OO00O0000O000O0.__O00OOO00O0O000000 += 1 else: for OO000O0OOOOO0O0OO in range(O0OO00O0000O000O0.__OO00OO0OO00OOO00O): if O0OO00O0000O000O0.__O00O0O00000O0O00O(O0OOOOO0OOOOO000O, OO0OOOO00OO00OOO0, OO000O0OOOOO0O0OO): O0OOOOO0OOOOO000O[OO0OOOO00OO00OOO0] = OO000O0OOOOO0O0OO O0OO00O0000O000O0.__OO0OOOO0000O0O00O(O0OOOOO0OOOOO000O, OO0OOOO00OO00OOO0 + 1) def __O00O0O00000O0O00O(OO0OOO0OOO0O0OO0O, O00000O0O00OOOOOO, O00O0O000000OOOOO, OOOO00OOO000OO0OO): """""" for OOO000O0OO0O00000 in range(O00O0O000000OOOOO): if O00000O0O00OOOOOO[OOO000O0OO0O00000] == OOOO00OOO000OO0OO or O00000O0O00OOOOOO[OOO000O0OO0O00000] - OOO000O0OO0O00000 == OOOO00OOO000OO0OO - O00O0O000000OOOOO or O00000O0O00OOOOOO[OOO000O0OO0O00000] + OOO000O0OO0O00000 == OOOO00OOO000OO0OO + O00O0O000000OOO OO: return False return True def __OOO0OO0O0000O00OO(OOOO0OOO0OOO0OO00, OO0OO0O00OOO00OO0): """""" for O0OOO0O0O0OO00O0O in range(OOOO0OOO0OOO0OO00.__OO00OO0OO00OOO00O): OOO00OO0OOOOOOOOO = '' for OO00O0O0OOO0O0000 in range(OOOO0OOO0OOO0OO00.__OO00OO0OO00OOO00O): if OO0OO0O00OOO00OO0[O0OOO0O0O0OO00O0O] == OO00O0O0OOO0O0000: OOO00OO0OOOOOOOOO += 'Q ' else: OOO00OO0OOOOOOOOO += '. ' print OOO00OO0OOOOOOOOO print '\n' def __OO0O00O0O0OOO0OOO(OO0OO000O00O0OO00, O00O00O00OO0O0OOO): """""" OOO000OOO00OO0OOO = '' for O00OOO000OO0O00OO in range(OO0OO000O00O0OO00.__OO00OO0OO00OOO00O): OOO000OOO00OO0OOO += str(O00O00O00OO0O0OOO[O00OOO000OO0O00OO]) + ' ' print OOO000OOO00OO0OOO def OOO0O0OO0OOO000OO(): """""" def OOO0O0OO0OOO000OO(): """""" O0000OO0000OOO0OO(8) if __name__ == '__main__': OOO0O0OO0OOO000OO()
まあ,やっぱりしっかりと戻された.
おわりに
pythonのソース保護としてpycで配布するのが良いという意見があるが,バイトコードにするだけではほぼ無意味なようだ.
本気で保護したいのなら,難読化,言語を変える,クラウドとして提供して一切ソースが手元に渡らないようにするくらいだろうか.
今回紹介したuncompyle
以外にも,unpyclib
というものもあるようだ.
試してみたらエラーが出て上手く行かなかったから,こちらはまた今度.
リバースエンジニアリング対策にpycをstripする方法はあるのだろうか?
参考
python - Is it possible to decompile a compiled .pyc file into a .py file? - Stack Overflow
GitHub - wibiti/uncompyle2: Python 2.7 decompiler
Oxyry Python Obfuscator - The most reliable python obfuscator in the world
どうやったらPythonアプリのソースコードを保護できますか? - Quora
Oxyry Python Obfuscator - The most reliable python obfuscator in the world