diff --git a/MathCaptchaSolver.py b/MathCaptchaSolver.py index ecb4d78..d7823b2 100644 --- a/MathCaptchaSolver.py +++ b/MathCaptchaSolver.py @@ -11,51 +11,53 @@ def __init__(self, image_path): def enhance_legibility(self, cropped_image): gray = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY) - _, mask = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) - return cv2.erode(cv2.blur(mask, (2, 2)), self.kernel, iterations=1) + return gray - def math_operation(self, left_number, right_number, operation='+'): - if left_number.isdigit() and right_number.isdigit(): - return eval(f"{left_number} {operation} {right_number}") - else: - return None - def math_operation_for_both_signs(self, left_number, right_number): - if left_number.isdigit() and right_number.isdigit(): + def math_operation(self, left_number, right_number): + if right_number.isdigit(): + return eval(f"{left_number} + {right_number}") + return None + + def resolve(self, left_image, right_image,left_image_twice,right_image_twice): + left_number = pipe(left_image_twice)[0]['generated_text'] + if left_number.isdigit(): left_number = int(left_number) - right_number = int(right_number) - return [left_number - right_number, left_number + right_number] - else: - return None - def resolve(self, left_image, right_image, sign_image, negative_sign_right_image): - sign = pipe(sign_image)[0]['generated_text'] - left_number = pipe(left_image)[0]['generated_text'] - - if sign in {'+', '@', '4','*'}: - right_number = pipe(right_image)[0]['generated_text'] - return self.math_operation(left_number, right_number) - elif sign in {'-', '='}: - right_number = pipe(negative_sign_right_image)[0]['generated_text'] - return self.math_operation(left_number, right_number, '-') + if left_number<10 or left_number==None or left_number=="": + left_number = pipe(left_image)[0]['generated_text'] + right_number = pipe(right_image_twice)[0]['generated_text'] + if right_number.isdigit() and int(right_number)>10: + return self.math_operation(left_number, right_number) + else: + right_number = pipe(right_image)[0]['generated_text'] + return self.math_operation(left_number, right_number) + elif left_number>=10 : + right_number = pipe(right_image_twice)[0]['generated_text'] + return self.math_operation(left_number, right_number) else: - unfixed_right_number = ''.join(char for char in pipe(right_image)[0]['generated_text'] if char.isdigit()) - return self.math_operation_for_both_signs(left_number, unfixed_right_number) + left_number = pipe(left_image)[0]['generated_text'] + if left_number.isdigit(): + right_number = pipe(right_image)[0]['generated_text'] + return self.math_operation(left_number, right_number) + def solve_captcha(self): - positions = {'left': 5, 'right': 60, 'sign': 39, 'negative_sign_right': 56} - dimensions = {'width': 25, 'height': 20, 'width_sign': 15, 'height_sign': 15, 'width_negative_sign': 18} - - left_image = self.image[7:27, positions['left']:positions['left']+dimensions['width']] - right_image = self.image[7:27, positions['right']:positions['right']+dimensions['width']] - sign_image = self.image[10:25, positions['sign']:positions['sign']+dimensions['width_sign']] - negative_sign_right_image = self.image[7:27, positions['negative_sign_right']:positions['negative_sign_right']+dimensions['width_negative_sign']] + positions = {'left': 5, 'right_unit': 57 , 'right_twice' : 71} + dimensions = {'width_twice': 31, 'width_unit': 19, 'height': 20} - left_enhanced = self.enhance_legibility(left_image) - right_enhanced = self.enhance_legibility(right_image) - negative_sign_right_enhanced = self.enhance_legibility(negative_sign_right_image) + left_image_for_unit_number = self.image[7:30, positions['left']:positions['left']+dimensions['width_unit']] + left_image_for_twice_number = self.image[7:30, positions['left']:positions['left']+dimensions['width_twice']] + right_image_for_left_twice_number = self.image[7:30, positions['right_twice']:positions['right_twice']+dimensions['width_twice']] + right_image_for_left_unit_number = self.image[7:30, positions['right_unit']:positions['right_unit']+dimensions['width_twice']] + + left_enhanced = self.enhance_legibility(left_image_for_unit_number) + left_enhanced_for_twice_number = self.enhance_legibility(left_image_for_twice_number) + right_enhanced = self.enhance_legibility(right_image_for_left_unit_number) + right_enhanced_for_twice_number = self.enhance_legibility(right_image_for_left_twice_number) + + cv2.imwrite('left_number.png', left_enhanced) + cv2.imwrite('left_image_for_twice_number.png', left_enhanced_for_twice_number) + cv2.imwrite('right_number.png', right_enhanced) + cv2.imwrite('right_image_for_twice_number.png', right_enhanced_for_twice_number) - cv2.imwrite('left_number.jpg', left_enhanced) - cv2.imwrite('right_number.jpg', right_enhanced) - cv2.imwrite('sign.jpg', sign_image) - cv2.imwrite('negative_sign_right_number.jpg', negative_sign_right_enhanced) - return self.resolve('left_number.jpg', 'right_number.jpg', 'sign.jpg', 'negative_sign_right_number.jpg') \ No newline at end of file + return self.resolve('left_number.png', 'right_number.png','left_image_for_twice_number.png',"right_image_for_twice_number.png") diff --git a/README.md b/README.md index b043987..67b61b7 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,10 @@ -
- - Logo - - -

Math Captcha Solver

- -

- A great tool for solving math captchas ! -

-
- - -
Table of Contents
  1. About The Project -
  2. - How it processes @@ -30,119 +12,24 @@
  3. Usage
  4. License
  5. Contact
  6. -
  7. Kindness
  8. -
## About The Project -This project is a tool to process mathematical captcha images and calculate the answer using deep learning models and image processing using Python language. - -An example of captchas used in this project: - -![captcha example](images/captcha-example.jpg) - -If you are looking for solving captchas of the following types: - -![captcha example](images/other-type.png) - -go to this repo: - -[MathCaptchaSolver](https://github.com/AmireNoori/MathCaptchaSolver-v2) - - -### Built With - -The deep learning model used in this tool: It is `TrOCR (large-sized model, fine-tuned on SROIE)`, which you can download from the link [https://huggingface.co/microsoft/trocr-large-printed](https://huggingface.co/microsoft/trocr-large-printed) and read about it. -The TrOCR model is an encoder-decoder model, consisting of an image Transformer as encoder, and a text Transformer as decoder. -Python language and opecv library -* https://huggingface.co -* https://opencv.org -* https://www.python.org - -## How it processes - -The processing of numbers and signs in the image is performed by the powerful model that I introduced above. But to process this type of captcha images, this model alone was not able to correctly extract the numbers and calculate the operation. So we had to use image processing to improve the result. - -* First, let's see what will be returned if we give the complete captcha image to the model: - -![captcha example](images/first-cap.jpg) - -* Output - ```sh - [{'generated_text': '40 % & @'}] - ``` - -As you can see, the output returned to us is `40 % & @`, which is not a very good result. - -So we first used morphological operations to erode the image and erase the extra lines to some extent, then we gave the image a more normal state using the blur method. -And finally, we used the crop technique to save two numbers separately with the sign between them with fixed coordinates of each number in all captcha images. - -Now we have three pictures, two numbers and a math symbol : - -![left number](images/left_number.jpg) ![sign](images/sign.jpg) ![right number](images/right_number.jpg) - - -Next, according to the extracted numbers in the image, the function performs the subtraction or addition operation and returns the answer to us. +![captcha example](images/captcha-example.png) -The output of the tool is in 3 different types. +This type of captcha is very similar to the captcha in [MathCaptchaSolver](https://github.com/AmireNoori/MathCaptchaSolver) main branch. +So if you want to know more about the details of the project, refer to this link: -* 1-The first type of output is an `integer` that is the sum or subtraction of two numbers.this output is displayed when the math sign is correctly recognized - -Example: - -
- captcha example -
- - -* Output - ```sh - 99 - ``` - -* 2-The second type of output is a `list` containing two numbers. - -This output is displayed when the tool is able to recognize the numbers, but despite the possible filters that have been set for the sign, it is not able to recognize the sign and is forced to add or subtract the numbers together, and the list it displays is the total result.you can test both answers in the input according to the output of the tool - -Example: - -
- captcha example -
- - -* Output - ```sh - [3, 19] - ``` - -* 3-The third type of output is `None`. - -This output is displayed when the tool is not able to recognize one of the two numbers and as a result could not calculate the numbers together. -This case happens very rarely and the tool detects the numbers correctly in most cases, but it may still show you such an output. - - -
- captcha example -
- - -* Output - ```sh - None - ``` - -* Execution Time -Captcha processing and calculation time in a system without GPU is between `16` seconds and `20` seconds from the time of execution to the end time, which is much less in systems with GPU and the processing speed is faster. +[MathCaptchaSolver](https://github.com/AmireNoori/MathCaptchaSolver) ### Installation 1. Clone the repo ```sh - git clone https://github.com/AmireNoori/MathCaptchaSolver + git clone https://github.com/AmireNoori/MathCaptchaSolver/tree/v2 ``` You must have installed `transformers`, `opencv-python` and `numpy` libraries. For this, you can install each of them separately or install this tool using the `requirements.txt` file by entering the following command in the terminal. @@ -177,7 +64,7 @@ _Note: When you run the program for the first time, your system must be connecte This screenshot is an example of the results obtained from the 20 captcha images shared for you
- screenshot + screenshot
## License @@ -215,7 +102,3 @@ SOFTWARE. Amir Noori - [@AmireNoori1](https://t.me/AmireNoori1) - noorifardam@gmail.com Project Link: [https://github.com/AmireNoori/MathCapthaSolver](https://github.com/AmireNoori/MathCaptchaSolver) - -## Kindness - -Thank you if this tool was useful for you and you used it, give it a star ⭐ and make me happy by following my profile❤️. diff --git a/images/captcha-example.png b/images/captcha-example.png new file mode 100644 index 0000000..7ea4a2c Binary files /dev/null and b/images/captcha-example.png differ diff --git a/images/results.jpg b/images/results.jpg new file mode 100644 index 0000000..9d68469 Binary files /dev/null and b/images/results.jpg differ