A PHP Error was encountered

Severity: 8192

Message: Function create_function() is deprecated

Filename: geshi/geshi.php

Line Number: 4698

Backtrace:

File: /home/httpd/vhosts/scratchbook.ch/geopaste.scratchbook.ch/application/libraries/geshi/geshi.php
Line: 4698
Function: _error_handler

File: /home/httpd/vhosts/scratchbook.ch/geopaste.scratchbook.ch/application/libraries/geshi/geshi.php
Line: 4621
Function: _optimize_regexp_list_tokens_to_string

File: /home/httpd/vhosts/scratchbook.ch/geopaste.scratchbook.ch/application/libraries/geshi/geshi.php
Line: 1655
Function: optimize_regexp_list

File: /home/httpd/vhosts/scratchbook.ch/geopaste.scratchbook.ch/application/libraries/geshi/geshi.php
Line: 2029
Function: optimize_keyword_group

File: /home/httpd/vhosts/scratchbook.ch/geopaste.scratchbook.ch/application/libraries/geshi/geshi.php
Line: 2168
Function: build_parse_cache

File: /home/httpd/vhosts/scratchbook.ch/geopaste.scratchbook.ch/application/libraries/Process.php
Line: 45
Function: parse_code

File: /home/httpd/vhosts/scratchbook.ch/geopaste.scratchbook.ch/application/models/Pastes.php
Line: 517
Function: syntax

File: /home/httpd/vhosts/scratchbook.ch/geopaste.scratchbook.ch/application/controllers/Main.php
Line: 693
Function: getPaste

File: /home/httpd/vhosts/scratchbook.ch/geopaste.scratchbook.ch/index.php
Line: 315
Function: require_once

checksummer.py - Stikked
From Claude, 12 Years ago, written in Python.
Embed
  1. #!/usr/bin/env python
  2.  
  3. from __future__ import print_function
  4. import os, sys, subprocess
  5. from os.path import join, getsize
  6. from datetime import datetime
  7. import hashlib
  8. import sqlite3
  9.  
  10.  
  11.  
  12. # utils
  13.  
  14. def run(cmd):
  15.     return subprocess.check_output(cmd, shell = True)
  16.  
  17. def pager(string, autoquit = False):
  18.     if autoquit:
  19.         pipe = os.popen('less -X --quit-if-one-screen', 'w')
  20.     else:
  21.         pipe = os.popen('less -X', 'w')
  22.     try:
  23.         pipe.write(str(string))
  24.     except:
  25.         pass
  26.     pipe.close()
  27.  
  28. def byteformat(num):
  29.     try:
  30.         num = float(num)
  31.     except:
  32.         num = float(0)
  33.     for ext in ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']:
  34.         if num < 1024.0:
  35.             return "%3.1f%s" % (num, ext)
  36.         num /= 1024.0
  37.  
  38. def dateformat(num):
  39.     try:
  40.         num = int(num)
  41.     except:
  42.         num = int(0)
  43.     date = datetime.fromtimestamp(num).strftime('%Y-%m-%d %H:%M:%S')
  44.     return date
  45.  
  46. def hash_file(filename):
  47.     try:
  48.         with open(filename, 'rb') as f:
  49.             sha = hashlib.sha256()
  50.             for chunk in iter(lambda: f.read(8192), b''):
  51.                 sha.update(chunk)
  52.         return sha.hexdigest()
  53.     except:
  54.         raise
  55.  
  56.  
  57.  
  58. # checksummer
  59.  
  60. class Checksummer:
  61.  
  62.     def __init__(self, database):
  63.         self.database = database
  64.         self.db = sqlite3.connect(database)
  65.         self.db.text_factory = sqlite3.OptimizedUnicode
  66.         self.init_db()
  67.  
  68.         # get basepath
  69.         self.basepath = self.get_option('basepath')
  70.         if not self.basepath:
  71.             self.change_basepath()
  72.  
  73.     def main(self):
  74.         if os.geteuid() != 0:
  75.             print('You are not root. Collecting files is NOT recommended!')
  76.             raw_input('Press Enter to continue...')
  77.         files_check = self.check('files')
  78.         filesize_check = self.check('filesize')
  79.         checksum_check = self.check('checksum')
  80.         deleted = self.check('deleted')
  81.         changed = self.check('changed')
  82.         totalsize = self.check('totalsize')
  83.  
  84.         os.system('clear')
  85.         print('')
  86.         print('basepath is: ' + self.basepath)
  87.         print('total size: ' + totalsize)
  88.         print('')
  89.         print('=== Collecting ===')
  90.         print('[cf] collect files')
  91.         if files_check:
  92.             print('[cs] collect filestats')
  93.         if filesize_check:
  94.             print('[mc] make checksums')
  95.         if checksum_check:
  96.             print('[rc] reindex & check all files')
  97.         print('')
  98.         print('=== Stats ===')
  99.         if files_check:
  100.             print('[s] search files')
  101.         if filesize_check:
  102.             print('[r] rank by filesize')
  103.             print('[m] recently modified files')
  104.         if checksum_check:
  105.             print('[ld] list duplicate files')
  106.         if deleted > 0:
  107.             print('[d] show ' + str(deleted) + ' deleted files')
  108.             print('[pd] prune deleted files')
  109.         if changed > 0:
  110.             print('[ch] show ' + str(changed) + ' changed files')
  111.             print('[pc] prune changed files')
  112.         print('')
  113.         print('[cb] change basepath')
  114.         print('[q] exit')
  115.         print('')
  116.         choice = raw_input('Select: ')
  117.  
  118.         if choice == 'q':
  119.             sys.exit()
  120.  
  121.         elif choice == 'cf':
  122.             self.collect_files()
  123.             raw_input('Press Enter to continue...')
  124.             self.main()
  125.  
  126.         elif choice == 'cs':
  127.             self.collect_filestats()
  128.             raw_input('Press Enter to continue...')
  129.             self.main()
  130.  
  131.         elif choice == 'mc':
  132.             self.make_checksums()
  133.             raw_input('Press Enter to continue...')
  134.             self.main()
  135.  
  136.         elif choice == 'rc':
  137.             self.reindex()
  138.             raw_input('Press Enter to continue...')
  139.             self.main()
  140.  
  141.         elif choice == 's':
  142.             self.search()
  143.             self.main()
  144.  
  145.         elif choice == 'r':
  146.             self.filesize_stats()
  147.             self.main()
  148.  
  149.         elif choice == 'm':
  150.             self.mtime_stats()
  151.             self.main()
  152.  
  153.         elif choice == 'ld':
  154.             self.duplicate_stats()
  155.             self.main()
  156.  
  157.         elif choice == 'd':
  158.             self.deleted_stats()
  159.             self.main()
  160.  
  161.         elif choice == 'pd':
  162.             self.prune_deleted()
  163.             self.main()
  164.  
  165.         elif choice == 'ch':
  166.             self.changed_stats()
  167.             self.main()
  168.  
  169.         elif choice == 'pc':
  170.             self.prune_changed()
  171.             self.main()
  172.  
  173.         elif choice == 'cb':
  174.             self.change_basepath()
  175.             self.main()
  176.  
  177.         else:
  178.             self.main()
  179.  
  180.     def init_db(self):
  181.         c = self.db.cursor()
  182.         try:
  183.             c.execute("""CREATE TABLE files (
  184.            id INTEGER PRIMARY KEY AUTOINCREMENT,
  185.            filename TEXT UNIQUE,
  186.            checksum_sha256 TEXT,
  187.            filesize INTEGER,
  188.            mtime INTEGER,
  189.            file_found INTEGER,
  190.            checksum_ok INTEGER
  191.            )""")
  192.             c.execute("""CREATE TABLE options (
  193.            id INTEGER PRIMARY KEY AUTOINCREMENT,
  194.            o_name TEXT UNIQUE,
  195.            o_value TEXT
  196.            )""")
  197.         except:
  198.             pass
  199.  
  200.     def check(self, subject):
  201.         c = self.db.cursor()
  202.  
  203.         if subject == 'files':
  204.             c.execute("""SELECT id FROM files LIMIT 1""")
  205.             if c.fetchone() != None:
  206.                 return True
  207.  
  208.         if subject == 'filesize':
  209.             c.execute("""SELECT id FROM files WHERE filesize IS NOT NULL LIMIT 1""")
  210.             if c.fetchone() != None:
  211.                 return True
  212.  
  213.         if subject == 'checksum':
  214.             c.execute("""SELECT id FROM files WHERE checksum_sha256 IS NOT NULL LIMIT 1""")
  215.             if c.fetchone() != None:
  216.                 return True
  217.  
  218.         # too laggy with sqlite3
  219.         # if subject == 'duplicates':
  220.         # c.execute("""SELECT filename, COUNT(checksum_sha256) AS count
  221.         # FROM files
  222.         # GROUP BY checksum_sha256
  223.         # HAVING (COUNT(checksum_sha256) > 1) LIMIT 1""")
  224.         # if c.fetchone() != None:
  225.         #     return True
  226.  
  227.         if subject == 'deleted':
  228.             c.execute("""SELECT id FROM files WHERE file_found = '0'""")
  229.             return len(c.fetchall())
  230.  
  231.         if subject == 'changed':
  232.             c.execute("""SELECT id FROM files WHERE checksum_ok = '0'""")
  233.             return len(c.fetchall())
  234.  
  235.         if subject == 'totalsize':
  236.             c.execute("""SELECT SUM(filesize) FROM files""")
  237.             return byteformat(c.fetchone()[0])
  238.  
  239.     def collect_files(self):
  240.         print('collecting files...')
  241.         c = self.db.cursor()
  242.         count = 0
  243.  
  244.         for root, dirs, files in os.walk(self.basepath):
  245.             filenames = [os.path.join(root, file) for file in files]
  246.  
  247.             for filename in filenames:
  248.                 filename = filename.decode('utf-8')
  249.                 filename = filename.replace(self.basepath, '')
  250.                 filename = filename.replace("'", "\'")
  251.                 filename = filename.replace('"', '\"')
  252.  
  253.                 try:
  254.                     c.execute("""INSERT INTO files(filename) VALUES(?)""", [filename])
  255.                 except:
  256.                     pass
  257.  
  258.                 count += 1
  259.                 if (count % 10000) == 0:
  260.                     print(str(count), end="\n")
  261.                     self.db.commit()
  262.  
  263.         self.db.commit()
  264.  
  265.     def collect_filestats(self):
  266.         print('collecting filestats...')
  267.         c = self.db.cursor()
  268.         uc = self.db.cursor()
  269.         c.execute("""SELECT id, filename FROM files""")
  270.  
  271.         allfiles = c.fetchall()
  272.         count = len(allfiles)
  273.         for r in allfiles:
  274.             id = r[0]
  275.             filename = r[1].encode('utf-8')
  276.  
  277.             try:
  278.                 stat = os.stat(self.basepath + filename)
  279.                 filesize = stat.st_size
  280.                 mtime = stat.st_mtime
  281.                 uc.execute("""UPDATE files SET filesize = ?, mtime = ?, file_found = 1 WHERE id = ?""", [filesize, mtime, id])
  282.  
  283.             except:
  284.                 # file not found
  285.                 uc.execute("""UPDATE files SET file_found = 0 WHERE id = ?""", [id])
  286.  
  287.             if (count % 10000) == 0:
  288.                 print(str(count), end="\n")
  289.                 self.db.commit()
  290.             count -= 1
  291.  
  292.         self.db.commit()
  293.  
  294.     def make_checksums(self):
  295.         c = self.db.cursor()
  296.         uc = self.db.cursor()
  297.         c.execute("""SELECT id, filename, filesize FROM files WHERE checksum_sha256 IS NULL AND file_found = '1'""")
  298.  
  299.         allfiles = c.fetchall()
  300.         count = len(allfiles)
  301.         for r in allfiles:
  302.             id = r[0]
  303.             filename = r[1].encode('utf-8')
  304.             filesize = byteformat(r[2])
  305.  
  306.             try:
  307.                 print('(' + str(count) + ') making checksum: ' + filename + ' (' + filesize + ')')
  308.                 checksum = hash_file(self.basepath + filename)
  309.                 uc.execute("""UPDATE files SET checksum_sha256 = ? WHERE id = ?""", [checksum, id])
  310.  
  311.             except:
  312.                 # file not found
  313.                 uc.execute("""UPDATE files SET file_found = 0 WHERE id = ?""", [id])
  314.  
  315.             if (count % 1000) == 0:
  316.                 self.db.commit()
  317.             count -= 1
  318.  
  319.         self.db.commit()
  320.  
  321.     def reindex(self):
  322.         self.collect_files()
  323.         self.collect_filestats()
  324.         self.make_checksums()
  325.  
  326.         # set to check
  327.         print('preparing to check files...')
  328.         uc = self.db.cursor()
  329.         uc.execute("""UPDATE files SET checksum_ok = NULL WHERE file_found = '1'""")
  330.         self.db.commit()
  331.  
  332.         # check checksum
  333.         c = self.db.cursor()
  334.         c.execute("""SELECT id, filename, filesize, checksum_sha256 FROM files WHERE checksum_ok IS NULL AND file_found = '1'""")
  335.  
  336.         allfiles = c.fetchall()
  337.         count = len(allfiles)
  338.         for r in allfiles:
  339.             id = r[0]
  340.             filename = r[1].encode('utf-8')
  341.             filesize = byteformat(r[2])
  342.             checksum_sha256 = r[3]
  343.  
  344.             try:
  345.                 print('checking checksum: (' + filesize + ') ' + filename)
  346.                 checksum = hash_file(self.basepath + filename)
  347.  
  348.                 if checksum == checksum_sha256:
  349.                     uc.execute("""UPDATE files SET checksum_ok = '1' WHERE id = ?""", [id])
  350.                 else:
  351.                     print('checksum mismatch')
  352.                     uc.execute("""UPDATE files SET checksum_ok = '0' WHERE id = ?""", [id])
  353.  
  354.             except:
  355.                 # file not found
  356.                 uc.execute("""UPDATE files SET file_found = 0 WHERE id = ?""", [id])
  357.  
  358.             if (count % 1000) == 0:
  359.                 self.db.commit()
  360.             count -= 1
  361.  
  362.         self.db.commit()
  363.  
  364.     def search(self, searchterm = '', autoquit = False):
  365.         if searchterm == '':
  366.             searchterm = raw_input('Enter searchterm: ')
  367.  
  368.         c = self.db.cursor()
  369.         c.execute("""SELECT filename, filesize FROM files WHERE filename LIKE ? ORDER BY filesize DESC""", ['%' + searchterm.decode('utf-8') + '%'])
  370.         res = []
  371.  
  372.         for r in c.fetchall():
  373.             filename = r[0].encode('utf-8')
  374.             filesize = byteformat(r[1])
  375.             res.append(filesize + "\t" + filename)
  376.  
  377.         pager("\n".join(res), autoquit)
  378.  
  379.     def filesize_stats(self):
  380.         print('populating list...')
  381.         c = self.db.cursor()
  382.         c.execute("""SELECT filename, filesize FROM files WHERE filesize IS NOT NULL ORDER BY filesize DESC""")
  383.         res = []
  384.  
  385.         for r in c.fetchall():
  386.             filename = r[0].encode('utf-8')
  387.             filesize = byteformat(r[1])
  388.             res.append(filesize + "\t" + filename)
  389.  
  390.         pager("\n".join(res))
  391.  
  392.     def mtime_stats(self):
  393.         print('populating list...')
  394.         c = self.db.cursor()
  395.         c.execute("""SELECT filename, filesize, mtime FROM files WHERE file_found = '1' ORDER BY mtime DESC""")
  396.         res = []
  397.  
  398.         for r in c.fetchall():
  399.             filename = r[0].encode('utf-8')
  400.             filesize = byteformat(r[1])
  401.             date = dateformat(r[2])
  402.             res.append(date + "\t" + filesize + "\t" + filename)
  403.  
  404.         pager("\n".join(res))
  405.  
  406.     def duplicate_stats(self):
  407.         print('populating list...')
  408.         c = self.db.cursor()
  409.         c.execute("""SELECT filename, COUNT(checksum_sha256) AS count
  410.        FROM files
  411.        GROUP BY checksum_sha256
  412.        HAVING (COUNT(checksum_sha256) > 1)
  413.        ORDER BY count DESC""")
  414.         res = []
  415.  
  416.         for r in c.fetchall():
  417.             filename = r[0].encode('utf-8')
  418.             count = str(r[1])
  419.             res.append(count + "\t" + filename)
  420.  
  421.         pager("\n".join(res))
  422.  
  423.     def deleted_stats(self):
  424.         c = self.db.cursor()
  425.         c.execute("""SELECT filename, filesize FROM files WHERE file_found = '0' ORDER BY filesize DESC""")
  426.         res = []
  427.  
  428.         for r in c.fetchall():
  429.             filename = r[0].encode('utf-8')
  430.             filesize = byteformat(r[1])
  431.             res.append(filesize + "\t" + filename)
  432.  
  433.         pager("\n".join(res))
  434.  
  435.     def prune_deleted(self):
  436.         print('pruning deleted files...')
  437.         c = self.db.cursor()
  438.         c.execute("""DELETE FROM files WHERE file_found = '0'""")
  439.         self.db.commit()
  440.  
  441.     def changed_stats(self):
  442.         c = self.db.cursor()
  443.         c.execute("""SELECT filename, filesize FROM files WHERE checksum_ok = '0' ORDER BY filesize DESC""")
  444.         res = []
  445.  
  446.         for r in c.fetchall():
  447.             filename = r[0].encode('utf-8')
  448.             filesize = byteformat(r[1])
  449.             res.append(filesize + "\t" + filename)
  450.  
  451.         pager("\n".join(res))
  452.  
  453.     def prune_changed(self):
  454.         print('removing checksum from changed files...')
  455.         c = self.db.cursor()
  456.         c.execute("""UPDATE files SET checksum_sha256 = NULL, checksum_ok = NULL, filesize = NULL WHERE checksum_ok = 0""")
  457.         self.db.commit()
  458.  
  459.     def change_basepath(self):
  460.         print('Choose base path')
  461.         basepath = raw_input('(enter full path, without trailing slash): ')
  462.         if basepath != '':
  463.             self.basepath = basepath
  464.             self.set_option('basepath', basepath)
  465.  
  466.     def set_option(self, key, val):
  467.         c = self.db.cursor()
  468.         try:
  469.             c.execute("""INSERT INTO options(o_name, o_value) VALUES(?, ?)""", [key, val])
  470.         except:
  471.             c.execute("""UPDATE options SET o_value = ? WHERE o_name = ?""", [val, key])
  472.         self.db.commit()
  473.  
  474.     def get_option(self, key):
  475.         c = self.db.cursor()
  476.         c.execute("""SELECT o_value FROM options WHERE o_name = ?""", [key])
  477.         val = c.fetchone()
  478.         if val:
  479.             return val[0]
  480.         else:
  481.             return False
  482.  
  483.  
  484.  
  485. # run
  486.  
  487. if len(sys.argv) >= 2 and sys.argv[1] != '':
  488.     database = sys.argv[1]
  489. else:
  490.     print('Usage:   ' + __file__ + ' sqlite3.db [search arguments]')
  491.     print('')
  492.     print('Example: ' + __file__ + ' myfiles.db')
  493.     print('')
  494.     sys.exit()
  495.  
  496. c = Checksummer(database)
  497.  
  498. searchterm = ' '.join(sys.argv[2:])
  499. if searchterm != '':
  500.     c.search(searchterm, autoquit = True)
  501. else:
  502.     c.main()
  503.