pushdump.patch

diff --git a/scripts/iterate_folder.py b/scripts/iterate_folder.py
index 27a7fbd..c3bf2cf 100644
--- a/scripts/iterate_folder.py
+++ b/scripts/iterate_folder.py
@@ -25,7 +25,7 @@ def read_and_decode(reader, chunk_size, max_window_size, previous_chunk=None, by
except UnicodeDecodeError:
if bytes_read > max_window_size:
raise UnicodeError(f"Unable to decode frame after reading {bytes_read:,} bytes")

  •   log.info(f"Decoding error with {bytes_read:,} bytes, reading another chunk")
    
  •   #log.info(f"Decoding error with {bytes_read:,} bytes, reading another chunk")
      return read_and_decode(reader, chunk_size, max_window_size, chunk, bytes_read)
    

@@ -59,7 +59,7 @@ for subdir, dirs, files in os.walk(input_folder):
total_size += file_size
input_files.append([input_path, file_size])

-log.info(f"Processing {len(input_files)} files of {(total_size / (230)):.2f} gigabytes")
+#log.info(f"Processing {len(input_files)} files of {(total_size / (2
30)):.2f} gigabytes")

total_lines = 0
total_bytes_processed = 0
@@ -69,14 +69,17 @@ for input_file in input_files:
created = None
for line, file_bytes_processed in read_lines_zst(input_file[0]):
obj = json.loads(line)

  •   if obj['author'] == 'spez':  # your reddit username here w/o the u/
    
  •    #print(obj)
    
  •    print(obj['id'])
      created = datetime.utcfromtimestamp(int(obj['created_utc']))
      file_lines += 1
    
  •   if file_lines == 1:
    
  •   	log.info(f"{created.strftime('%Y-%m-%d %H:%M:%S')} : {file_lines + total_lines:,} : 0% : {(total_bytes_processed / total_size) * 100:.0f}%")
    
  •   if file_lines % 100000 == 0:
    
  •   	log.info(f"{created.strftime('%Y-%m-%d %H:%M:%S')} : {file_lines + total_lines:,} : {(file_bytes_processed / input_file[1]) * 100:.0f}% : {(total_bytes_processed / total_size) * 100:.0f}%")
    
  •   #if file_lines == 1:
    
  •   	#log.info(f"{created.strftime('%Y-%m-%d %H:%M:%S')} : {file_lines + total_lines:,} : 0% : {(total_bytes_processed / total_size) * 100:.0f}%")
    
  •   #if file_lines % 100000 == 0:
    
  •   	#log.info(f"{created.strftime('%Y-%m-%d %H:%M:%S')} : {file_lines + total_lines:,} : {(file_bytes_processed / input_file[1]) * 100:.0f}% : {(total_bytes_processed / total_size) * 100:.0f}%")
    
    total_lines += file_lines
    total_bytes_processed += input_file[1]
  • log.info(f"{created.strftime('%Y-%m-%d %H:%M:%S')} : {total_lines:,} : 100% : {(total_bytes_processed / total_size) * 100:.0f}%")
  • #log.info(f"{created.strftime('%Y-%m-%d %H:%M:%S')} : {total_lines:,} : 100% : {(total_bytes_processed / total_size) * 100:.0f}%")

-log.info(f"Total: {total_lines}")
+#log.info(f"Total: {total_lines}")

  • All
  • Subscribed
  • Moderated
  • Favorites
  • DataDumps
  • DreamBathrooms
  • everett
  • osvaldo12
  • magazineikmin
  • thenastyranch
  • rosin
  • normalnudes
  • Youngstown
  • Durango
  • slotface
  • ngwrru68w68
  • kavyap
  • mdbf
  • InstantRegret
  • JUstTest
  • ethstaker
  • GTA5RPClips
  • tacticalgear
  • Leos
  • anitta
  • modclub
  • khanakhh
  • cubers
  • cisconetworking
  • provamag3
  • megavids
  • tester
  • lostlight
  • All magazines