From 4efbfae2a0c052141e5d404829a728bb3b9a15ba Mon Sep 17 00:00:00 2001 From: ZoranPavlovic Date: Sat, 6 Jan 2018 23:55:16 +0200 Subject: [PATCH 1/3] Moved gpageseg processing into class Added logging module Added constants for default values --- ocropus-gpageseg | 883 +++++++++++++++++++++++++---------------------- 1 file changed, 478 insertions(+), 405 deletions(-) diff --git a/ocropus-gpageseg b/ocropus-gpageseg index 7828c756..c9d96ec3 100755 --- a/ocropus-gpageseg +++ b/ocropus-gpageseg @@ -24,458 +24,531 @@ from multiprocessing import Pool import numpy as np from scipy.ndimage import measurements from scipy.misc import imsave -from scipy.ndimage.filters import gaussian_filter,uniform_filter,maximum_filter +from scipy.ndimage.filters import gaussian_filter, uniform_filter, maximum_filter import ocrolib -from ocrolib import psegutils,morph,sl +from ocrolib import psegutils, morph, sl from ocrolib.exceptions import OcropusException from ocrolib.toplevel import * +import logging + +if __name__ == '__main__': + logging.basicConfig() + +logger = logging.getLogger(__name__) + +DEFAULT_MIN_SCALE = 12.0 +DEFAULT_MAX_LINES = 300 + +DEFAULT_SCALE = 0.0 +DEFAULT_H_SCALE = 1.0 +DEFAULT_V_SCALE = 1.0 + +DEFAULT_THRESHOLD = 0.2 +DEFAULT_NOISE = 8 + +DEFAULT_MAX_SEPS = 0 +DEFAULT_SEP_WIDEN = 10 + +DEFAULT_MAX_COL_SEPS = 3 +DEFAULT_CS_MIN_HEIGHT = 10 +DEFAULT_CS_MIN_ASPECT = 1.1 + +DEFAULT_PAD = 3 +DEFAULT_EXPAND = 3 + parser = argparse.ArgumentParser(add_help=False) # error checking group_error_checking = parser.add_argument_group('error checking') -group_error_checking.add_argument('-n','--nocheck',action="store_true", - help="disable error checking on inputs") +group_error_checking.add_argument('-n', '--nocheck', action="store_true", + help="disable error checking on inputs") # limits -group_error_checking.add_argument('--minscale',type=float,default=12.0, - help='minimum scale permitted, default: %(default)s') -group_error_checking.add_argument('--maxlines',type=float,default=300, - help='maximum # lines permitted, default: %(default)s') +group_error_checking.add_argument('--minscale', type=float, default=DEFAULT_MIN_SCALE, + help='minimum scale permitted, default: %(default)s') +group_error_checking.add_argument('--maxlines', type=float, default=DEFAULT_MAX_LINES, + help='maximum # lines permitted, default: %(default)s') # scale parameters group_scale = parser.add_argument_group('scale parameters') -group_scale.add_argument('--scale',type=float,default=0.0, - help='the basic scale of the document (roughly, xheight) 0=automatic, default: %(default)s') -group_scale.add_argument('--hscale',type=float,default=1.0, - help='non-standard scaling of horizontal parameters, default: %(default)s') -group_scale.add_argument('--vscale',type=float,default=1.0, - help='non-standard scaling of vertical parameters, default: %(default)s') +group_scale.add_argument('--scale', type=float, default=DEFAULT_SCALE, + help='the basic scale of the document (roughly, xheight) 0=automatic, default: %(default)s') +group_scale.add_argument('--hscale', type=float, default=DEFAULT_H_SCALE, + help='non-standard scaling of horizontal parameters, default: %(default)s') +group_scale.add_argument('--vscale', type=float, default=DEFAULT_V_SCALE, + help='non-standard scaling of vertical parameters, default: %(default)s') # line parameters group_line = parser.add_argument_group('line parameters') -group_line.add_argument('--threshold',type=float,default=0.2, - help='baseline threshold, default: %(default)s') -group_line.add_argument('--noise',type=int,default=8, - help="noise threshold for removing small components from lines, default: %(default)s") -group_line.add_argument('--usegauss',action='store_true', - help='use gaussian instead of uniform, default: %(default)s') +group_line.add_argument('--threshold', type=float, default=DEFAULT_THRESHOLD, + help='baseline threshold, default: %(default)s') +group_line.add_argument('--noise', type=int, default=DEFAULT_NOISE, + help="noise threshold for removing small components from lines, default: %(default)s") +group_line.add_argument('--usegauss', action='store_true', + help='use gaussian instead of uniform, default: %(default)s') # column parameters group_column = parser.add_argument_group('column parameters') -group_column.add_argument('--maxseps',type=int,default=0, - help='maximum black column separators, default: %(default)s') -group_column.add_argument('--sepwiden',type=int,default=10, - help='widen black separators (to account for warping), default: %(default)s') +group_column.add_argument('--maxseps', type=int, default=DEFAULT_MAX_SEPS, + help='maximum black column separators, default: %(default)s') +group_column.add_argument('--sepwiden', type=int, default=DEFAULT_SEP_WIDEN, + help='widen black separators (to account for warping), default: %(default)s') # Obsolete parameter for 'also check for black column separators' # which can now be triggered simply by a positive maxseps value. -group_column.add_argument('-b','--blackseps',action="store_true", - help=argparse.SUPPRESS) +group_column.add_argument('-b', '--blackseps', action="store_true", + help=argparse.SUPPRESS) # whitespace column separators -group_column.add_argument('--maxcolseps',type=int,default=3, - help='maximum # whitespace column separators, default: %(default)s') -group_column.add_argument('--csminheight',type=float,default=10, - help='minimum column height (units=scale), default: %(default)s') +group_column.add_argument('--maxcolseps', type=int, default=DEFAULT_MAX_COL_SEPS, + help='maximum # whitespace column separators, default: %(default)s') +group_column.add_argument('--csminheight', type=float, default=DEFAULT_CS_MIN_HEIGHT, + help='minimum column height (units=scale), default: %(default)s') # Obsolete parameter for the 'minimum aspect ratio for column separators' # used in the obsolete function compute_colseps_morph -group_column.add_argument('--csminaspect',type=float,default=1.1, - help=argparse.SUPPRESS) +group_column.add_argument('--csminaspect', type=float, default=DEFAULT_CS_MIN_ASPECT, + help=argparse.SUPPRESS) # output parameters group_output = parser.add_argument_group('output parameters') -group_output.add_argument('--gray',action='store_true', - help='output grayscale lines as well, default: %(default)s') -group_output.add_argument('-p','--pad',type=int,default=3, - help='padding for extracted lines, default: %(default)s') -group_output.add_argument('-e','--expand',type=int,default=3, - help='expand mask for grayscale extraction, default: %(default)s') +group_output.add_argument('--gray', action='store_true', + help='output grayscale lines as well, default: %(default)s') +group_output.add_argument('-p', '--pad', type=int, default=DEFAULT_PAD, + help='padding for extracted lines, default: %(default)s') +group_output.add_argument('-e', '--expand', type=int, default=DEFAULT_EXPAND, + help='expand mask for grayscale extraction, default: %(default)s') # other parameters group_others = parser.add_argument_group('others') -group_others.add_argument('-q','--quiet',action='store_true', - help='be less verbose, default: %(default)s') -group_others.add_argument('-Q','--parallel',type=int,default=0, - help="number of CPUs to use") -group_others.add_argument('-d','--debug',action="store_true") +group_others.add_argument('-q', '--quiet', action='store_true', + help='be less verbose, default: %(default)s') +group_others.add_argument('-Q', '--parallel', type=int, default=0, + help="number of CPUs to use") +group_others.add_argument('-d', '--debug', action="store_true") group_others.add_argument("-h", "--help", action="help", help="show this help message and exit") # input files -parser.add_argument('files',nargs='+') - -args = parser.parse_args() -args.files = ocrolib.glob_all(args.files) - -def find(condition): - "Return the indices where ravel(condition) is true" - res, = np.nonzero(np.ravel(condition)) - return res - -def norm_max(v): - return v/np.amax(v) - - -def check_page(image): - if len(image.shape)==3: return "input image is color image %s"%(image.shape,) - if np.mean(image)10000: return "image too tall for a page image %s"%(image.shape,) - if w<600: return "image too narrow for a page image %s"%(image.shape,) - if w>10000: return "line too wide for a page image %s"%(image.shape,) - slots = int(w*h*1.0/(30*30)) - _,ncomps = measurements.label(image>np.mean(image)) - if ncomps<10: return "too few connected components for a page image (got %d)"%(ncomps,) - if ncomps>slots: return "too many connnected components for a page image (%d > %d)"%(ncomps,slots) - return None - - -def print_info(*objs): - print("INFO: ", *objs, file=sys.stdout) - - -def print_error(*objs): - print("ERROR: ", *objs, file=sys.stderr) - - -if len(args.files)<1: - parser.print_help() - sys.exit(0) - -print_info("") -print_info("#"*10,(" ".join(sys.argv))[:60]) -print_info("") - -if args.parallel>1: - args.quiet = 1 - - -def B(a): - if a.dtype==np.dtype('B'): return a - return np.array(a,'B') - - -def DSAVE(title,image): - if not args.debug: return - if type(image)==list: - assert len(image)==3 - image = np.transpose(np.array(image),[1,2,0]) - fname = "_"+title+".png" - print_info("debug " + fname) - imsave(fname,image) - - -################################################################ -### Column finding. -### -### This attempts to find column separators, either as extended -### vertical black lines or extended vertical whitespace. -### It will work fairly well in simple cases, but for unusual -### documents, you need to tune the parameters. -################################################################ - -def compute_separators_morph(binary,scale): - """Finds vertical black lines corresponding to column separators.""" - d0 = int(max(5,scale/4)) - d1 = int(max(5,scale))+args.sepwiden - thick = morph.r_dilation(binary,(d0,d1)) - vert = morph.rb_opening(thick,(10*scale,1)) - vert = morph.r_erosion(vert,(d0//2,args.sepwiden)) - vert = morph.select_regions(vert,sl.dim1,min=3,nbest=2*args.maxseps) - vert = morph.select_regions(vert,sl.dim0,min=20*scale,nbest=args.maxseps) - return vert - - -def compute_colseps_morph(binary,scale,maxseps=3,minheight=20,maxwidth=5): - """Finds extended vertical whitespace corresponding to column separators - using morphological operations.""" - boxmap = psegutils.compute_boxmap(binary,scale,dtype='B') - bounds = morph.rb_closing(B(boxmap),(int(5*scale),int(5*scale))) - bounds = np.maximum(B(1-bounds),B(boxmap)) - cols = 1-morph.rb_closing(boxmap,(int(20*scale),int(scale))) - cols = morph.select_regions(cols,sl.aspect,min=args.csminaspect) - cols = morph.select_regions(cols,sl.dim0,min=args.csminheight*scale,nbest=args.maxcolseps) - cols = morph.r_erosion(cols,(int(0.5+scale),0)) - cols = morph.r_dilation(cols,(int(0.5+scale),0),origin=(int(scale/2)-1,0)) - return cols - - -def compute_colseps_mconv(binary,scale=1.0): - """Find column separators using a combination of morphological - operations and convolution.""" - h,w = binary.shape - smoothed = gaussian_filter(1.0*binary,(scale,scale*0.5)) - smoothed = uniform_filter(smoothed,(5.0*scale,1)) - thresh = (smoothed0.5*np.amax(grad)) - DSAVE("2grad",grad) - # combine edges and whitespace - seps = np.minimum(thresh,maximum_filter(grad,(int(scale),int(5*scale)))) - seps = maximum_filter(seps,(int(2*scale),1)) - DSAVE("3seps",seps) - # select only the biggest column separators - seps = morph.select_regions(seps,sl.dim0,min=args.csminheight*scale,nbest=args.maxcolseps) - DSAVE("4seps",seps) - return seps - - -def compute_colseps(binary,scale): - """Computes column separators either from vertical black lines or whitespace.""" - print_info("considering at most %g whitespace column separators" % args.maxcolseps) - colseps = compute_colseps_conv(binary,scale) - DSAVE("colwsseps",0.7*colseps+0.3*binary) - if args.blackseps and args.maxseps == 0: - # simulate old behaviour of blackseps when the default value - # for maxseps was 2, but only when the maxseps-value is still zero - # and not set manually to a non-zero value - args.maxseps = 2 - if args.maxseps > 0: - print_info("considering at most %g black column separators" % args.maxseps) - seps = compute_separators_morph(binary,scale) - DSAVE("colseps",0.7*seps+0.3*binary) - #colseps = compute_colseps_morph(binary,scale) - colseps = np.maximum(colseps,seps) - binary = np.minimum(binary,1-seps) - return colseps,binary - - -################################################################ -### Text Line Finding. -### -### This identifies the tops and bottoms of text lines by -### computing gradients and performing some adaptive thresholding. -### Those components are then used as seeds for the text lines. -################################################################ - -def compute_gradmaps(binary,scale): - # use gradient filtering to find baselines - boxmap = psegutils.compute_boxmap(binary,scale) - cleaned = boxmap*binary - DSAVE("cleaned",cleaned) - if args.usegauss: - # this uses Gaussians - grad = gaussian_filter(1.0*cleaned,(args.vscale*0.3*scale, - args.hscale*6*scale),order=(1,0)) - else: - # this uses non-Gaussian oriented filters - grad = gaussian_filter(1.0*cleaned,(max(4,args.vscale*0.3*scale), - args.hscale*scale),order=(1,0)) - grad = uniform_filter(grad,(args.vscale,args.hscale*6*scale)) - bottom = ocrolib.norm_max((grad<0)*(-grad)) - top = ocrolib.norm_max((grad>0)*grad) - return bottom,top,boxmap - - -def compute_line_seeds(binary,bottom,top,colseps,scale): - """Base on gradient maps, computes candidates for baselines - and xheights. Then, it marks the regions between the two - as a line seed.""" - t = args.threshold - vrange = int(args.vscale*scale) - bmarked = maximum_filter(bottom==maximum_filter(bottom,(vrange,0)),(2,2)) - bmarked = bmarked*(bottom>t*np.amax(bottom)*t)*(1-colseps) - tmarked = maximum_filter(top==maximum_filter(top,(vrange,0)),(2,2)) - tmarked = tmarked*(top>t*np.amax(top)*t/2)*(1-colseps) - tmarked = maximum_filter(tmarked,(1,20)) - seeds = np.zeros(binary.shape,'i') - delta = max(3,int(scale/2)) - for x in range(bmarked.shape[1]): - transitions = sorted([(y,1) for y in find(bmarked[:,x])]+[(y,0) for y in find(tmarked[:,x])])[::-1] - transitions += [(0,0)] - for l in range(len(transitions)-1): - y0,s0 = transitions[l] - if s0==0: continue - seeds[y0-delta:y0,x] = 1 - y1,s1 = transitions[l+1] - if s1==0 and (y0-y1)<5*scale: seeds[y1:y0,x] = 1 - seeds = maximum_filter(seeds,(1,int(1+scale))) - seeds = seeds*(1-colseps) - DSAVE("lineseeds",[seeds,0.3*tmarked+0.7*bmarked,binary]) - seeds,_ = morph.label(seeds) - return seeds - - -################################################################ -### The complete line segmentation process. -################################################################ - -def remove_hlines(binary,scale,maxsize=10): - labels,_ = morph.label(binary) - objects = morph.find_objects(labels) - for i,b in enumerate(objects): - if sl.width(b)>maxsize*scale: - labels[b][labels[b]==i+1] = 0 - return np.array(labels!=0,'B') - - -def compute_segmentation(binary,scale): - """Given a binary image, compute a complete segmentation into - lines, computing both columns and text lines.""" - binary = np.array(binary,'B') - - # start by removing horizontal black lines, which only - # interfere with the rest of the page segmentation - binary = remove_hlines(binary,scale) - - # do the column finding - if not args.quiet: print_info("computing column separators") - colseps,binary = compute_colseps(binary,scale) - - # now compute the text line seeds - if not args.quiet: print_info("computing lines") - bottom,top,boxmap = compute_gradmaps(binary,scale) - seeds = compute_line_seeds(binary,bottom,top,colseps,scale) - DSAVE("seeds",[bottom,top,boxmap]) - - # spread the text line seeds to all the remaining - # components - if not args.quiet: print_info("propagating labels") - llabels = morph.propagate_labels(boxmap,seeds,conflict=0) - if not args.quiet: print_info("spreading labels") - spread = morph.spread_labels(seeds,maxdist=scale) - llabels = np.where(llabels>0,llabels,spread*binary) - segmentation = llabels*binary - return segmentation - - -################################################################ -### Processing each file. -################################################################ - -def process1(job): - fname,i = job - global base - base,_ = ocrolib.allsplitext(fname) - outputdir = base +parser.add_argument('files', nargs='+') + + +class ProcessingException(Exception): + pass + + +class GPageSeg(object): + def __init__(self, arg_in): + self.args = arg_in + + def find(self, condition): + "Return the indices where ravel(condition) is true" + res, = np.nonzero(np.ravel(condition)) + return res + + def norm_max(self, v): + return v / np.amax(v) + + def check_page(self, image): + fail_checks = [] + if len(image.shape) == 3: + fail_checks.append("input image is color image %s" % (image.shape,)) + + if np.mean(image) < np.median(image): + fail_checks.append("image may be inverted") + + h, w = image.shape + + if h < 600: + fail_checks.append("Image too short [%s]" % h) + + if h > 10000: + fail_checks.append("image too tall [%s]" % h) + + if w < 600: + fail_checks.append("image too narrow [%s]" % w) + + if w > 10000: + fail_checks.append("image too wide [%s]" % w) + + slots = int(w * h * 1.0 / (30 * 30)) + _, ncomps = measurements.label(image > np.mean(image)) + + if ncomps < 10: + fail_checks.append("too few connected components (got %d)" % (ncomps,)) + + if ncomps > slots: + fail_checks.append("too many connnected components (%d > %d)" % (ncomps, slots)) + + return fail_checks + + def CheckConvertBinaryArray(self, a): + if a.dtype == np.dtype('B'): + return a + return np.array(a, 'B') + + def DSAVE(self, title, image): + if not self.args.debug: + return + if type(image) == list: + assert len(image) == 3 + image = np.transpose(np.array(image), [1, 2, 0]) + fname = "_" + title + ".png" + logger.info("debug " + fname) + imsave(fname, image) + + ################################################################ + ### Column finding. + ### + ### This attempts to find column separators, either as extended + ### vertical black lines or extended vertical whitespace. + ### It will work fairly well in simple cases, but for unusual + ### documents, you need to tune the parameters. + ################################################################ + + def compute_separators_morph(self, binary, scale): + """Finds vertical black lines corresponding to column separators.""" + d0 = int(max(5, scale / 4)) + d1 = int(max(5, scale)) + self.args.sepwiden + thick = morph.r_dilation(binary, (d0, d1)) + vert = morph.rb_opening(thick, (10 * scale, 1)) + vert = morph.r_erosion(vert, (d0 // 2, self.args.sepwiden)) + vert = morph.select_regions(vert, sl.dim1, min=3, nbest=2 * self.args.maxseps) + vert = morph.select_regions(vert, sl.dim0, min=20 * scale, nbest=self.args.maxseps) + return vert + + def compute_colseps_morph(self, binary, scale, maxseps=3, minheight=20, maxwidth=5): + """Finds extended vertical whitespace corresponding to column separators + using morphological operations.""" + boxmap = psegutils.compute_boxmap(binary, scale, dtype='B') + bounds = morph.rb_closing(self.CheckConvertBinaryArray(boxmap), (int(5 * scale), int(5 * scale))) + bounds = np.maximum(self.CheckConvertBinaryArray(1 - bounds), self.CheckConvertBinaryArray(boxmap)) + cols = 1 - morph.rb_closing(boxmap, (int(20 * scale), int(scale))) + cols = morph.select_regions(cols, sl.aspect, min=self.args.csminaspect) + cols = morph.select_regions(cols, sl.dim0, min=self.args.csminheight * scale, nbest=self.args.maxcolseps) + cols = morph.r_erosion(cols, (int(0.5 + scale), 0)) + cols = morph.r_dilation(cols, (int(0.5 + scale), 0), origin=(int(scale / 2) - 1, 0)) + return cols + + def compute_colseps_mconv(self, binary, scale=1.0): + """Find column separators using a combination of morphological + operations and convolution.""" + h, w = binary.shape + smoothed = gaussian_filter(1.0 * binary, (scale, scale * 0.5)) + smoothed = uniform_filter(smoothed, (5.0 * scale, 1)) + thresh = (smoothed < np.amax(smoothed) * 0.1) + self.DSAVE("1thresh", thresh) + blocks = morph.rb_closing(binary, (int(4 * scale), int(4 * scale))) + self.DSAVE("2blocks", blocks) + seps = np.minimum(blocks, thresh) + seps = morph.select_regions(seps, sl.dim0, min=self.args.csminheight * scale, nbest=self.args.maxcolseps) + self.DSAVE("3seps", seps) + blocks = morph.r_dilation(blocks, (5, 5)) + self.DSAVE("4blocks", blocks) + seps = np.maximum(seps, 1 - blocks) + self.DSAVE("5combo", seps) + return seps + + def compute_colseps_conv(self, binary, scale=1.0): + """Find column separators by convolution and + thresholding.""" + h, w = binary.shape + # find vertical whitespace by thresholding + smoothed = gaussian_filter(1.0 * binary, (scale, scale * 0.5)) + smoothed = uniform_filter(smoothed, (5.0 * scale, 1)) + thresh = (smoothed < np.amax(smoothed) * 0.1) + self.DSAVE("1thresh", thresh) + # find column edges by filtering + grad = gaussian_filter(1.0 * binary, (scale, scale * 0.5), order=(0, 1)) + grad = uniform_filter(grad, (10.0 * scale, 1)) + # grad = abs(grad) # use this for finding both edges + grad = (grad > 0.5 * np.amax(grad)) + self.DSAVE("2grad", grad) + # combine edges and whitespace + seps = np.minimum(thresh, maximum_filter(grad, (int(scale), int(5 * scale)))) + seps = maximum_filter(seps, (int(2 * scale), 1)) + self.DSAVE("3seps", seps) + # select only the biggest column separators + seps = morph.select_regions(seps, sl.dim0, min=self.args.csminheight * scale, nbest=self.args.maxcolseps) + self.DSAVE("4seps", seps) + return seps + + def compute_colseps(self, binary, scale): + """Computes column separators either from vertical black lines or whitespace.""" + logger.info("considering at most %g whitespace column separators" % self.args.maxcolseps) + colseps = self.compute_colseps_conv(binary, scale) + self.DSAVE("colwsseps", 0.7 * colseps + 0.3 * binary) + if self.args.blackseps and self.args.maxseps == 0: + # simulate old behaviour of blackseps when the default value + # for maxseps was 2, but only when the maxseps-value is still zero + # and not set manually to a non-zero value + self.args.maxseps = 2 + if self.args.maxseps > 0: + logger.info("considering at most %g black column separators" % self.args.maxseps) + seps = self.compute_separators_morph(binary, scale) + self.DSAVE("colseps", 0.7 * seps + 0.3 * binary) + # colseps = compute_colseps_morph(binary,scale) + colseps = np.maximum(colseps, seps) + binary = np.minimum(binary, 1 - seps) + return colseps, binary + + ################################################################ + ## # Text Line Finding. + ### + ### This identifies the tops and bottoms of text lines by + ### computing gradients and performing some adaptive thresholding. + ### Those components are then used as seeds for the text lines. + ################################################################ + def compute_gradmaps(self, binary, scale): + # use gradient filtering to find baselines + boxmap = psegutils.compute_boxmap(binary, scale) + cleaned = boxmap * binary + self.DSAVE("cleaned", cleaned) + if self.args.usegauss: + # this uses Gaussians + grad = gaussian_filter(1.0 * cleaned, (self.args.vscale * 0.3 * scale, + self.args.hscale * 6 * scale), order=(1, 0)) + else: + # this uses non-Gaussian oriented filters + grad = gaussian_filter(1.0 * cleaned, (max(4, self.args.vscale * 0.3 * scale), + self.args.hscale * scale), order=(1, 0)) + grad = uniform_filter(grad, (self.args.vscale, self.args.hscale * 6 * scale)) + bottom = ocrolib.norm_max((grad < 0) * (-grad)) + top = ocrolib.norm_max((grad > 0) * grad) + return bottom, top, boxmap + + def compute_line_seeds(self, binary, bottom, top, colseps, scale): + """Base on gradient maps, computes candidates for baselines + and xheights. Then, it marks the regions between the two + as a line seed.""" + t = self.args.threshold + vrange = int(self.args.vscale * scale) + bmarked = maximum_filter(bottom == maximum_filter(bottom, (vrange, 0)), (2, 2)) + bmarked = bmarked * (bottom > t * np.amax(bottom) * t) * (1 - colseps) + tmarked = maximum_filter(top == maximum_filter(top, (vrange, 0)), (2, 2)) + tmarked = tmarked * (top > t * np.amax(top) * t / 2) * (1 - colseps) + tmarked = maximum_filter(tmarked, (1, 20)) + seeds = np.zeros(binary.shape, 'i') + delta = max(3, int(scale / 2)) + for x in range(bmarked.shape[1]): + transitions = sorted([(y,1) for y in self.find(bmarked[:,x])]+[(y,0) for y in self.find(tmarked[:,x])])[::-1] + transitions += [(0, 0)] + for l in range(len(transitions) - 1): + y0, s0 = transitions[l] + if s0 == 0: continue + seeds[y0 - delta:y0, x] = 1 + y1, s1 = transitions[l + 1] + if s1 == 0 and (y0 - y1) < 5 * scale: seeds[y1:y0, x] = 1 + seeds = maximum_filter(seeds, (1, int(1 + scale))) + seeds = seeds * (1 - colseps) + self.DSAVE("lineseeds", [seeds, 0.3 * tmarked + 0.7 * bmarked, binary]) + seeds, _ = morph.label(seeds) + return seeds + + ################################################################ + ### The complete line segmentation process. + ################################################################ + + def remove_hlines(self, binary, scale, maxsize=10): + labels, _ = morph.label(binary) + objects = morph.find_objects(labels) + for i, b in enumerate(objects): + if sl.width(b) > maxsize * scale: + labels[b][labels[b] == i + 1] = 0 + return np.array(labels != 0, 'B') + + def compute_segmentation(self, binary, scale): + """Given a binary image, compute a complete segmentation into + lines, computing both columns and text lines.""" + binary = np.array(binary, 'B') + + # start by removing horizontal black lines, which only + # interfere with the rest of the page segmentation + binary = self.remove_hlines(binary, scale) + + # do the column finding + logger.info("computing column separators") + colseps, binary = self.compute_colseps(binary, scale) + + # now compute the text line seeds + logger.info("computing lines") + bottom, top, boxmap = self.compute_gradmaps(binary, scale) + seeds = self.compute_line_seeds(binary, bottom, top, colseps, scale) + self.DSAVE("seeds", [bottom, top, boxmap]) + + # spread the text line seeds to all the remaining + # components + logger.info("propagating labels") + llabels = morph.propagate_labels(boxmap, seeds, conflict=0) + logger.info("spreading labels") + spread = morph.spread_labels(seeds, maxdist=scale) + llabels = np.where(llabels > 0, llabels, spread * binary) + segmentation = llabels * binary + return segmentation + + ################################################################ + ### Processing each file. + ################################################################ + def process1(self, job): + fname, i = job + base, _ = ocrolib.allsplitext(fname) + outputdir = base - try: - binary = ocrolib.read_image_binary(base+".bin.png") - except IOError: try: - binary = ocrolib.read_image_binary(fname) + binary = ocrolib.read_image_binary(base + ".bin.png") except IOError: - if ocrolib.trace: traceback.print_exc() - print_error("cannot open either %s.bin.png or %s" % (base, fname)) - return + try: + binary = ocrolib.read_image_binary(fname) + except IOError: + if ocrolib.trace: traceback.print_exc() + raise ProcessingException("cannot open either %s.bin.png or %s" % (base, fname)) + + checktype(binary, ABINARY2) + + if not self.args.nocheck: + check_failures = self.check_page(np.amax(binary) - binary) + if len(check_failures) > 0: + raise ProcessingException("Failed checks [%s]" % (", ".join(check_failures))) + + if self.args.gray: + gray_path = base + ".nrm.png" + if os.path.exists(gray_path): + logger.info("Loading gray image") + gray = ocrolib.read_image_gray(gray_path) + checktype(gray, GRAYSCALE) + + binary = 1 - binary # invert + + if self.args.scale == 0: + scale = psegutils.estimate_scale(binary) + else: + scale = self.args.scale - checktype(binary,ABINARY2) + logger.info("scale %f" % (scale)) + if np.isnan(scale) or scale > 1000.0: + raise ProcessingException("bad scale (%g)" % scale) - if not args.nocheck: - check = check_page(np.amax(binary)-binary) - if check is not None: - print_error("%s SKIPPED %s (use -n to disable this check)" % (fname, check)) - return + if scale < self.args.minscale: + raise ProcessingException("scale (%g) less than --minscale [%g]" % (scale, self.args.scale)) - if args.gray: - if os.path.exists(base+".nrm.png"): - gray = ocrolib.read_image_gray(base+".nrm.png") - checktype(gray,GRAYSCALE) + logger.info("computing segmentation") - binary = 1-binary # invert + segmentation = self.compute_segmentation(binary, scale) + line_count = np.amax(segmentation) - if args.scale==0: - scale = psegutils.estimate_scale(binary) - else: - scale = args.scale - print_info("scale %f" % (scale)) - if np.isnan(scale) or scale>1000.0: - print_error("%s: bad scale (%g); skipping\n" % (fname, scale)) - return - if scaleargs.maxlines: - print_error("%s: too many lines %g" % (fname, np.amax(segmentation))) - return - if not args.quiet: print_info("number of lines %g" % np.amax(segmentation)) - - # compute the reading order - - if not args.quiet: print_info("finding reading order") - lines = psegutils.compute_lines(segmentation,scale) - order = psegutils.reading_order([l.bounds for l in lines]) - lsort = psegutils.topsort(order) - - # renumber the labels so that they conform to the specs - - nlabels = np.amax(segmentation)+1 - renumber = np.zeros(nlabels,'i') - for i,v in enumerate(lsort): renumber[lines[v].label] = 0x010000+(i+1) - segmentation = renumber[segmentation] - - # finally, output everything - - if not args.quiet: print_info("writing lines") - if not os.path.exists(outputdir): - os.mkdir(outputdir) - lines = [lines[i] for i in lsort] - ocrolib.write_page_segmentation("%s.pseg.png"%outputdir,segmentation) - cleaned = ocrolib.remove_noise(binary,args.noise) - for i,l in enumerate(lines): - binline = psegutils.extract_masked(1-cleaned,l,pad=args.pad,expand=args.expand) - ocrolib.write_image_binary("%s/01%04x.bin.png"%(outputdir,i+1),binline) - if args.gray: - grayline = psegutils.extract_masked(gray,l,pad=args.pad,expand=args.expand) - ocrolib.write_image_gray("%s/01%04x.nrm.png"%(outputdir,i+1),grayline) - print_info("%6d %s %4.1f %d" % (i, fname, scale, len(lines))) - -if len(args.files)==1 and os.path.isdir(args.files[0]): - files = glob.glob(args.files[0]+"/????.png") -else: - files = args.files - - -def safe_process1(job): - fname,i = job + if line_count > self.args.maxlines: + raise ProcessingException("%s: too many lines %g" % (line_count)) + + logger.info("number of lines %g" % np.amax(segmentation)) + + # compute the reading order + + logger.info("finding reading order") + lines = psegutils.compute_lines(segmentation, scale) + order = psegutils.reading_order([l.bounds for l in lines]) + lsort = psegutils.topsort(order) + + # renumber the labels so that they conform to the specs + + nlabels = np.amax(segmentation) + 1 + renumber = np.zeros(nlabels, 'i') + for i, v in enumerate(lsort): renumber[lines[v].label] = 0x010000 + (i + 1) + segmentation = renumber[segmentation] + + # finally, output everything + + logger.info("writing lines") + if not os.path.exists(outputdir): + os.mkdir(outputdir) + lines = [lines[i] for i in lsort] + ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation) + cleaned = ocrolib.remove_noise(binary, self.args.noise) + for i, l in enumerate(lines): + binline = psegutils.extract_masked(1 - cleaned, l, pad=self.args.pad, expand=self.args.expand) + ocrolib.write_image_binary("%s/01%04x.bin.png" % (outputdir, i + 1), binline) + if self.args.gray: + grayline = psegutils.extract_masked(gray, l, pad=self.args.pad, expand=self.args.expand) + ocrolib.write_image_gray("%s/01%04x.nrm.png" % (outputdir, i + 1), grayline) + logger.info("%6d %s %4.1f %d" % (i, fname, scale, len(lines))) + + logger.info("%4.1f %d" % (scale, len(lines))) + + +def safe_process1(job, args): + fname, i = job try: - process1(job) + page_segmenter = GPageSeg(args) + page_segmenter.process1(job) except OcropusException as e: if e.trace: traceback.print_exc() else: - print_info(fname+":"+e) + logger.info(fname + ":" + e) except Exception as e: traceback.print_exc() -if args.parallel<2: - count = 0 - for i,f in enumerate(files): - if args.parallel==0: print_info(f) - count += 1 - safe_process1((f,i+1)) -else: - pool = Pool(processes=args.parallel) - jobs = [] - for i,f in enumerate(files): jobs += [(f,i+1)] - result = pool.map(process1,jobs) + +def process1(job, args): + fname, i = job + try: + page_segmenter = GPageSeg(args) + page_segmenter.process1(job) + except ProcessingException as ex: + logger.error("%s: %s" (fname, ex.message)) + except Exception as ex: + logger.exception(ex.message) + + +def setup_logging(quiet): + log_handler = logging.StreamHandler() + formatter = logging.Formatter('%(levelname)s: %(message)s') + log_handler.setFormatter(formatter) + + if not quiet: + logger.setLevel(logging.DEBUG) + log_handler.setLevel(logging.DEBUG) + else: + logger.setLevel(logging.WARN) + log_handler.setLevel(logging.WARN) + + logger.addHandler(log_handler) + logger.propagate = False + + +def do_script(): + args = parser.parse_args() + args.files = ocrolib.glob_all(args.files) + + if len(args.files) < 1: + parser.print_help() + sys.exit(0) + + setup_logging(args.quiet) + + logger.info("") + logger.info("%s %s" % ("#" * 10, (" ".join(sys.argv))[-60:])) + logger.info("") + + if args.parallel > 1: + args.quiet = 1 + + if len(args.files) == 1 and os.path.isdir(args.files[0]): + files = glob.glob(args.files[0] + "/????.png") + else: + files = args.files + + if args.parallel < 2: + count = 0 + for i, f in enumerate(files): + if args.parallel == 0: + logger.info(f) + count += 1 + safe_process1((f, i + 1), args) + else: + pool = Pool(processes=args.parallel) + jobs = [] + for i, f in enumerate(files): + jobs += [(f, i + 1)] + result = pool.map(process1, jobs) + + +if __name__ == '__main__': + do_script() From c2f994a3ff50e286d23e4c1dd3619a0ad1eeca4a Mon Sep 17 00:00:00 2001 From: ZoranPavlovic Date: Sun, 7 Jan 2018 00:02:48 +0200 Subject: [PATCH 2/3] Removed unused variables, parameters and imports. Minor formatting issue. --- ocropus-gpageseg | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/ocropus-gpageseg b/ocropus-gpageseg index c9d96ec3..c5cffcfd 100755 --- a/ocropus-gpageseg +++ b/ocropus-gpageseg @@ -15,9 +15,7 @@ from __future__ import print_function import argparse import glob -import os import os.path -import sys import traceback from multiprocessing import Pool @@ -215,12 +213,10 @@ class GPageSeg(object): vert = morph.select_regions(vert, sl.dim0, min=20 * scale, nbest=self.args.maxseps) return vert - def compute_colseps_morph(self, binary, scale, maxseps=3, minheight=20, maxwidth=5): + def compute_colseps_morph(self, binary, scale): """Finds extended vertical whitespace corresponding to column separators using morphological operations.""" boxmap = psegutils.compute_boxmap(binary, scale, dtype='B') - bounds = morph.rb_closing(self.CheckConvertBinaryArray(boxmap), (int(5 * scale), int(5 * scale))) - bounds = np.maximum(self.CheckConvertBinaryArray(1 - bounds), self.CheckConvertBinaryArray(boxmap)) cols = 1 - morph.rb_closing(boxmap, (int(20 * scale), int(scale))) cols = morph.select_regions(cols, sl.aspect, min=self.args.csminaspect) cols = morph.select_regions(cols, sl.dim0, min=self.args.csminheight * scale, nbest=self.args.maxcolseps) @@ -231,7 +227,6 @@ class GPageSeg(object): def compute_colseps_mconv(self, binary, scale=1.0): """Find column separators using a combination of morphological operations and convolution.""" - h, w = binary.shape smoothed = gaussian_filter(1.0 * binary, (scale, scale * 0.5)) smoothed = uniform_filter(smoothed, (5.0 * scale, 1)) thresh = (smoothed < np.amax(smoothed) * 0.1) @@ -250,7 +245,6 @@ class GPageSeg(object): def compute_colseps_conv(self, binary, scale=1.0): """Find column separators by convolution and thresholding.""" - h, w = binary.shape # find vertical whitespace by thresholding smoothed = gaussian_filter(1.0 * binary, (scale, scale * 0.5)) smoothed = uniform_filter(smoothed, (5.0 * scale, 1)) @@ -483,7 +477,7 @@ def safe_process1(job, args): else: logger.info(fname + ":" + e) except Exception as e: - traceback.print_exc() + logger.exception(e) def process1(job, args): @@ -492,7 +486,7 @@ def process1(job, args): page_segmenter = GPageSeg(args) page_segmenter.process1(job) except ProcessingException as ex: - logger.error("%s: %s" (fname, ex.message)) + logger.error("%s: %s" % (fname, ex.message)) except Exception as ex: logger.exception(ex.message) @@ -547,7 +541,7 @@ def do_script(): jobs = [] for i, f in enumerate(files): jobs += [(f, i + 1)] - result = pool.map(process1, jobs) + pool.map(process1, jobs) if __name__ == '__main__': From 443b049a0e305a8f6ad56edb3930e47aa23e9c07 Mon Sep 17 00:00:00 2001 From: ZoranPavlovic Date: Sun, 7 Jan 2018 00:07:43 +0200 Subject: [PATCH 3/3] Minor bug in parallel pool map argument passing --- ocropus-gpageseg | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ocropus-gpageseg b/ocropus-gpageseg index c5cffcfd..4b43c741 100755 --- a/ocropus-gpageseg +++ b/ocropus-gpageseg @@ -480,13 +480,13 @@ def safe_process1(job, args): logger.exception(e) -def process1(job, args): - fname, i = job +def process1(job): + fname, i, args = job try: page_segmenter = GPageSeg(args) - page_segmenter.process1(job) + page_segmenter.process1((fname, i, )) except ProcessingException as ex: - logger.error("%s: %s" % (fname, ex.message)) + logger.error("%s: %s"(fname, ex.message)) except Exception as ex: logger.exception(ex.message) @@ -540,7 +540,7 @@ def do_script(): pool = Pool(processes=args.parallel) jobs = [] for i, f in enumerate(files): - jobs += [(f, i + 1)] + jobs += [(f, i + 1, args)] pool.map(process1, jobs)