From 2bd1393ad5cd09903193ea9686491533f044cf43 Mon Sep 17 00:00:00 2001
From: Bulat Kurbanov <kurbanovbul@gmail.com>
Date: Sun, 11 Aug 2024 03:24:45 +0200
Subject: [PATCH] Unit

---
 README.md        |  4 +++
 input/.gitkeep   |  0
 main.py          | 68 ++++++++++++++++++++++++++++++++++++++++++++++++
 output/.gitkeep  |  0
 requirements.txt |  1 +
 run.bat          |  2 ++
 6 files changed, 75 insertions(+)
 create mode 100644 input/.gitkeep
 create mode 100644 main.py
 create mode 100644 output/.gitkeep
 create mode 100644 requirements.txt
 create mode 100644 run.bat

diff --git a/README.md b/README.md
index d98546c..d39ca10 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,6 @@
 # image_cutter
 
+
+## Run
+
+Open `run.bat`
diff --git a/input/.gitkeep b/input/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..2f11df5
--- /dev/null
+++ b/main.py
@@ -0,0 +1,68 @@
+from PIL import Image, UnidentifiedImageError
+import io
+
+import os
+
+
+def extract_images(prefix, byte_data):
+    # JPEG and PNG start and end markers
+    jpeg_start_marker = b'\xFF\xD8'
+    jpeg_end_marker = b'\xFF\xD9'
+
+    png_start_marker = b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A'
+    png_end_marker = b'IEND\xAE\x42\x60\x82'
+
+    images = []
+    start = 0
+
+    while start < len(byte_data):
+        jpeg_start = byte_data.find(jpeg_start_marker, start)
+        png_start = byte_data.find(png_start_marker, start)
+
+        # Determine the next image type (JPEG or PNG) and its start position
+        if (jpeg_start != -1 and (jpeg_start < png_start or png_start == -1)):
+            # Processing JPEG image
+            jpeg_end = byte_data.find(jpeg_end_marker, jpeg_start) + len(jpeg_end_marker)
+            if jpeg_end == -1:
+                break  # No valid end found, likely corrupted
+            image_data = byte_data[jpeg_start:jpeg_end]
+            start = jpeg_end  # Move start to the end of the current image
+        elif (png_start != -1 and (png_start < jpeg_start or jpeg_start == -1)):
+            # Processing PNG image
+            png_end = byte_data.find(png_end_marker, png_start) + len(png_end_marker)
+            if png_end == -1:
+                break  # No valid end found, likely corrupted
+            image_data = byte_data[png_start:png_end]
+            start = png_end  # Move start to the end of the current image
+        else:
+            break  # No more images found
+
+        try:
+            # Load the image using PIL to verify it's valid
+            image = Image.open(io.BytesIO(image_data))
+            image.verify()  # Verify that it is indeed an image
+            images.append(image_data)
+        except (IOError, UnidentifiedImageError):
+            # Skip this image if it cannot be identified
+            print(f"Skipping invalid image data at position {start}")
+            continue
+
+    # Process all extracted images
+    for idx, image_data in enumerate(images):
+        # Reload the image (since `verify()` puts the file in an unusable state)
+        image = Image.open(io.BytesIO(image_data))
+
+        # Save the image (you can also display it using image.show())
+        extension = 'jpg' if image.format == 'JPEG' else 'png'
+        image.save(f"./output/{prefix}_image_{idx+1}.{extension}")
+        print(f"Extracted {image.format} image {idx+1}")
+
+
+
+files = os.listdir("./input/")
+
+for file in files:
+    filename, _ = os.path.splitext(file)
+
+    with open(f"./input/{file}", "rb") as file:
+        extract_images(filename, file.read())
diff --git a/output/.gitkeep b/output/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..3868fb1
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+pillow
diff --git a/run.bat b/run.bat
new file mode 100644
index 0000000..f8e7856
--- /dev/null
+++ b/run.bat
@@ -0,0 +1,2 @@
+pip install -r requirements.txt
+python main.py