Extract cow number from image

前端 未结 5 2020
庸人自扰
庸人自扰 2020-12-13 07:13

every now and then my mom has to shift through these type of photos to extract the number from the image and rename it to the number.

5条回答
  •  挽巷
    挽巷 (楼主)
    2020-12-13 08:07

    Using the PIL (Python Imaging Library) you can easily load images and process them. Using the grayscale conversion, you can convert RGB to grayscale, which should be easier to detect levels. If you want to threshold the image (to detect the white boards), there is a point() function which lets you map the colors.

    On the other hand, you could write a simple program, which lets you

    • select, and show an image
    • mark the area where the boards are
    • crop the image
    • apply tesseract or whatever,
    • save the image with the detected number

    That should facilitate the process a lot! Writing this should be relatively easy using TkInter, PyGTK, PyQt or some other windowing toolkit.

    EDIT: I was needing a similar program to categorize images here - though not OCRing them. So I finally decided this was as good a time as any and made a first try (with OCR!). Make a backup of your images before trying it out! Quick manual:

    • Top left: Select the work folder, image list will appear if any images in the folder.
    • Select image. Select area of image with number. Coordinates will appear in lower left corner, and program will call Tesseract.
    • Edit - if necessary - the number OCRd in the dialog.
    • Click Ok to accept - image will be renamed.

    Here's the pre-alpha program:

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    #
    #  test_pil.py
    #  
    #  Copyright 2015 John Coppens 
    #  
    #  This program is free software; you can redistribute it and/or modify
    #  it under the terms of the GNU General Public License as published by
    #  the Free Software Foundation; either version 2 of the License, or
    #  (at your option) any later version.
    #  
    #  This program is distributed in the hope that it will be useful,
    #  but WITHOUT ANY WARRANTY; without even the implied warranty of
    #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    #  GNU General Public License for more details.
    #  
    #  You should have received a copy of the GNU General Public License
    #  along with this program; if not, write to the Free Software
    #  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
    #  MA 02110-1301, USA.
    #  
    #  
    
    import pygtk
    import gtk
    import glob
    import os.path as osp
    from os import rename
    import re
    import subprocess as sp
    
    temp_image = "/tmp/test_pil.png"
    image_re = """\.(?:jpe?g|png|gif)$"""
    
    class RecognizeDigits():
        def __init__(self):
            pass
    
        def process(self, img, x0, y0, x1, y1):
            """ Receive the gtk.Image, and the limits of the selected area (in
                window coordinates!)
                Call Tesseract on the area, and give the possibility to  edit the
                result.
                Returns None if NO is pressed, and the OCR'd (and edited) text if OK
            """
            pb = img.get_pixbuf().subpixbuf(x0, y0, x1-x0, y1-y0)
            pb.save(temp_image, "png")
    
            out = sp.check_output(("tesseract", temp_image, "stdout", "-psm 7", "digits"))
            out = out.replace(" ", "").strip()
    
            dlg = gtk.MessageDialog(type = gtk.MESSAGE_QUESTION,
                                    flags = gtk.DIALOG_MODAL,
                                    buttons = gtk.BUTTONS_YES_NO,
                                    message_format = "The number read is:")
            entry = gtk.Entry()
            entry.set_text(out)
            dlg.get_message_area().pack_start(entry)
            entry.show()
            response = dlg.run()
            nr = entry.get_text()
    
            dlg.destroy()
    
            if response == gtk.RESPONSE_YES:
                return nr
            else:
                return None
    
    class FileSelector(gtk.VBox):
        """ Provides a folder selector (at the top) and a list of files in the
            selected folder. On selecting a file, the FileSelector calls the
            function provided to the constructor (image_viewer)
        """
        def __init__(self, image_viewer):
            gtk.VBox.__init__(self)
            self.image_viewer = image_viewer
    
            fc = gtk.FileChooserButton('Select a folder')
            fc.set_action(gtk.FILE_CHOOSER_ACTION_SELECT_FOLDER)
            fc.connect("selection-changed", self.on_file_set)
            self.pack_start(fc, expand = False, fill = True)
    
            self.tstore = gtk.ListStore(str)
            self.tview = gtk.TreeView(self.tstore)
            self.tsel = self.tview.get_selection()
            self.tsel.connect("changed", self.on_selection_changed)
            renderer = gtk.CellRendererText()
            col = gtk.TreeViewColumn(None, renderer, text = 0)
            self.tview.append_column(col)
    
            scrw = gtk.ScrolledWindow()
            scrw.add(self.tview)
            self.pack_start(scrw, expand = True, fill = True)
    
        def on_file_set(self, fcb):
            self.tstore.clear()
            self.imgdir = fcb.get_filename()
            for f in glob.glob(self.imgdir + "/*"):
                if re.search(image_re, f):
                    self.tstore.append([osp.basename(f)])
    
        def on_selection_changed(self, sel):
            model, itr = sel.get_selected()
            if itr != None:
                base = model.get(itr, 0)
                fname = self.imgdir + "/" + base[0]
                self.image_viewer(fname)
    
    class Status(gtk.Table):
        """ Small status window which shows the coordinates for of the area
            selected in the image
        """
        def __init__(self):
            gtk.Table.__init__(self)
    
            self.attach(gtk.Label("X"), 1, 2, 0, 1, yoptions = gtk.FILL)
            self.attach(gtk.Label("Y"), 2, 3, 0, 1, yoptions = gtk.FILL)
            self.attach(gtk.Label("Top left:"), 0, 1, 1, 2, yoptions = gtk.FILL)
            self.attach(gtk.Label("Bottom right:"), 0, 1, 2, 3, yoptions = gtk.FILL)
    
            self.entries = {}
            for coord in (("x0", 1, 2, 1, 2), ("y0", 2, 3, 1, 2),
                          ("x1", 1, 2, 2, 3), ("y1", 2, 3, 2, 3)):
                self.entries[coord[0]] = gtk.Entry()
                self.entries[coord[0]].set_width_chars(6)
                self.attach(self.entries[coord[0]],
                                         coord[1], coord[2], coord[3], coord[4],
                                         yoptions = gtk.FILL)
    
        def set_top_left(self, x0, y0):
            self.x0 = x0
            self.y0 = y0
            self.entries["x0"].set_text(str(x0))
            self.entries["y0"].set_text(str(y0))
    
        def set_bottom_right(self, x1, y1):
            self.x1 = x1
            self.y1 = y1
            self.entries["x1"].set_text(str(x1))
            self.entries["y1"].set_text(str(y1))
    
    class ImageViewer(gtk.ScrolledWindow):
        """ Provides a scrollwindow to move the image around. It also detects
            button press and release events (left button), will call status
            to update the coordinates, and will call task on button release
        """
        def __init__(self, status, task = None):
            gtk.ScrolledWindow.__init__(self)
    
            self.task = task
            self.status = status
            self.drawing = False
            self.prev_rect = None
    
            self.viewport = gtk.Viewport()
            self.viewport.connect("button-press-event", self.on_button_pressed)
            self.viewport.connect("button-release-event", self.on_button_released)
            self.viewport.set_events(gtk.gdk.BUTTON_PRESS_MASK | \
                                     gtk.gdk.BUTTON_RELEASE_MASK)
    
            self.img = gtk.Image()
            self.viewport.add(self.img)
            self.add(self.viewport)
    
        def set_image(self, fname):
            self.imagename = fname
            self.img.set_from_file(fname)
    
        def on_button_pressed(self, viewport, event):
            if event.button == 1:       # Left button: Select rectangle start
                #self.x0, self.y0 = self.translate_coordinates(self.img, int(event.x), int(event.y))
                self.x0, self.y0 = int(event.x), int(event.y)
                self.status.set_top_left(self.x0, self.y0)
                self.drawing = True
    
        def on_button_released(self, viewport, event):
            if event.button == 1:       # Right button: Select rectangle end
                #self.x1, self.y1 = self.translate_coordinates(self.img, int(event.x), int(event.y))
                self.x1, self.y1 = int(event.x), int(event.y)
                self.status.set_bottom_right(self.x1, self.y1)
                if self.task != None:
                    res = self.task().process(self.img, self.x0, self.y0, self.x1, self.y1)
    
                    if res == None: return
    
                    newname = osp.split(self.imagename)[0] + '/' + res + ".jpeg"
                    rename(self.imagename, newname)
                    print "Renaming ", self.imagename, newname
    
    class MainWindow(gtk.Window):
        def __init__(self):
            gtk.Window.__init__(self)
            self.connect("delete-event", self.on_delete_event)
            self.set_size_request(600, 300)
    
            grid = gtk.Table()
    
            # Image selector
            files = FileSelector(self.update_image)
            grid.attach(files, 0, 1, 0, 1,
                               yoptions = gtk.FILL | gtk.EXPAND, xoptions = gtk.FILL)
    
            # Some status information
            self.status = Status()
            grid.attach(self.status, 0, 1, 1, 2,
                                     yoptions = gtk.FILL, xoptions = gtk.FILL)
    
            # The image viewer
            self.viewer = ImageViewer(self.status, RecognizeDigits)
            grid.attach(self.viewer, 1, 2, 0, 2)
            self.add(grid)
    
            self.show_all()
    
        def update_image(self, fname):
            self.viewer.set_image(fname)
    
        def on_delete_event(self, wdg, data):
            gtk.main_quit()
    
        def run(self):
            gtk.mainloop()
    
    def main():
        mw = MainWindow()
        mw.run()
        return 0
    
    if __name__ == '__main__':
        main()
    

提交回复
热议问题