// Authors: Unknown. Please, if you are the author of this file, or if you // know who are the authors of this file, let us know, so we can give the // adequate credits and/or get the adequate authorizations. // WARNING: // This file implements an algorithm possibly linked to the patent // // David Lowe "Method and apparatus for identifying scale invariant // features in an image and use of same for locating an object in an // image", U.S. Patent 6,711,293. // // This file is made available for the exclusive aim of serving as // scientific tool to verify of the soundness and // completeness of the algorithm description. Compilation, // execution and redistribution of this file may violate exclusive // patents rights in certain countries. // The situation being different for every country and changing // over time, it is your responsibility to determine which patent // rights restrictions apply to you before you compile, use, // modify, or redistribute this file. A patent lawyer is qualified // to make this determination. // If and only if they don't conflict with any patent terms, you // can benefit from the following license terms attached to this // file. // // This program is provided for scientific and educational only: // you can use and/or modify it for these purposes, but you are // not allowed to redistribute this work or derivative works in // source or executable form. A license must be obtained from the // patent right holders for any other use. #include "demo_lib_sift.h" #define DEBUG 0 #define ABS(x) (((x) > 0) ? (x) : (-(x))) void default_sift_parameters(siftPar &par) { par.OctaveMax=100000; par.DoubleImSize = 0; par.order = 3; par.InitSigma = 1.6; par.BorderDist = 5; par.Scales = 3; par.PeakThresh = 255.0 * 0.04 / 3.0; par.EdgeThresh = 0.06; par.EdgeThresh1 = 0.08; par.OriBins = 36; par.OriSigma = 1.5; par.OriHistThresh = 0.8; par.MaxIndexVal = 0.2; par.MagFactor = 3; par.IndexSigma = 1.0; par.IgnoreGradSign = 0; // par.MatchRatio = 0.6; // par.MatchRatio = 0.75; // Guoshen Yu. Since l1 distance is used for matching instead of l2, a larger threshold is needed. par.MatchRatio = 0.73; // Guoshen Yu. Since l1 distance is used for matching instead of l2, a larger threshold is needed. par.MatchXradius = 1000000.0f; par.MatchYradius = 1000000.0f; par.noncorrectlylocalized = 0; }; ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// SIFT Keypoint detection ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void OctaveKeypoints(flimage & image, float octSize, keypointslist& keys,siftPar &par); void FindMaxMin( flimage* dogs, flimage* blur, float octSize ,keypointslist& keys,siftPar &par); bool LocalMax(float val, flimage& dog, int y0, int x0); bool LocalMin(float val, flimage& dog, int y0, int x0); bool LocalMaxMin(float val, const flimage& dog, int y0, int x0); int NotOnEdge( flimage& dog, int r, int c, float octSize,siftPar &par); float FitQuadratic(float offset[3], flimage* dogs, int s, int r, int c); void InterpKeyPoint( flimage* dogs, int s, int r, int c, const flimage& grad, const flimage& ori, flimage& map, float octSize, keypointslist& keys, int movesRemain,siftPar &par); void AssignOriHist( const flimage& grad, const flimage& ori, float octSize, float octScale, float octRow, float octCol, keypointslist& keys,siftPar &par); void SmoothHistogram( float* hist, int bins); float InterpPeak( float a, float b, float c); void MakeKeypoint( const flimage& grad, const flimage& ori, float octSize, float octScale, float octRow, float octCol, float angle, keypointslist& keys,siftPar &par); void MakeKeypointSample( keypoint& key, const flimage& grad, const flimage& ori, float scale, float row, float col,siftPar &par); void NormalizeVec( float* vec); void KeySampleVec( keypoint& key, const flimage& grad, const flimage& ori, float scale, float row, float col,siftPar &par); void KeySample( float index[IndexSize][IndexSize][OriSize], keypoint& key, const flimage& grad, const flimage& ori, float scale, float row, float col,siftPar &par); void AddSample( float index[IndexSize][IndexSize][OriSize], keypoint& key, const flimage& grad, const flimage& orim, float r, float c, float rpos, float cpos, float rx, float cx,siftPar &par); void PlaceInIndex( float index[IndexSize][IndexSize][OriSize], float mag, float ori, float rx, float cx,siftPar &par); void compute_sift_keypoints(float *input, keypointslist& keypoints, int width, int height, siftPar &par) { flimage image; /// Make zoom of image if necessary float octSize = 1.0; if (par.DoubleImSize){ //printf("... compute_sift_keypoints :: applying zoom\n"); // image.create(2*width, 2*height); // apply_zoom(input,image.getPlane(),2.0,par.order,width,height); // octSize *= 0.5; printf("Doulbe image size not allowed. Guoshen Yu\n"); exit(-1); } else { image.create(width,height,input); } // printf("Using initial Dog value: %f\n", par.PeakThresh); // printf("Double image size: %d\n", par.DoubleImSize); // printf("Interpolation order: %d\n", par.order); /// Apply initial smoothing to input image to raise its smoothing to par.InitSigma. /// We assume image from camera has smoothing of sigma = 0.5, which becomes sigma = 1.0 if image has been doubled. /// increase = sqrt(Init^2 - Current^2) float curSigma; if (par.DoubleImSize) curSigma = 1.0; else curSigma = 0.5; if (par.InitSigma > curSigma ) { if (DEBUG) printf("Convolving initial image to achieve std: %f \n", par.InitSigma); float sigma = (float) sqrt((double)(par.InitSigma * par.InitSigma - curSigma * curSigma)); gaussian_convolution( image.getPlane(), image.getPlane(), image.nwidth(), image.nheight(), sigma); } /// Convolve by par.InitSigma at each step inside OctaveKeypoints by steps of /// Subsample of factor 2 while reasonable image size /// Keep reducing image by factors of 2 until one dimension is /// smaller than minimum size at which a feature could be detected. int minsize = 2 * par.BorderDist + 2; int OctaveCounter = 0; //printf("... compute_sift_keypoints :: maximum number of scales : %d\n", par.OctaveMax); while (image.nwidth() > minsize && image.nheight() > minsize && OctaveCounter < par.OctaveMax) { if (DEBUG) printf("Calling OctaveKeypoints \n"); OctaveKeypoints(image, octSize, keypoints,par); // image is blurred inside OctaveKeypoints and therefore can be sampled flimage aux( (int)((float) image.nwidth() / 2.0f) , (int)((float) image.nheight() / 2.0f)); if (DEBUG) printf("Sampling initial image \n"); sample(image.getPlane(), aux.getPlane(), 2.0f, image.nwidth(), image.nheight()); image = aux; octSize *= 2.0; OctaveCounter++; } /* printf("sift:: %d keypoints\n", keypoints.size()); printf("sift:: plus non correctly localized: %d \n", par.noncorrectlylocalized);*/ } ///////////////////////////////////////////////// /// EXTREMA DETECTION IN ONE SCALE-SPACE OCTAVE: ///////////////////////////////////////////////// /// par.Scales determine how many steps we perform to pass from one scale to the next one: sigI --> 2*sigI /// At each step we pass from sigma_0 --> sigma0 * (1 + R) /// At the last step sigI * (1 + R)^par.Scales = 2 * sigI /// (1+R) = 2 ^(1 / par.Scales) it is called sigmaRatio /// It seems that blur[par.Scales+1] is compared in two succesive iterations void OctaveKeypoints(flimage & image, float octSize, keypointslist& keys,siftPar &par) { // Guoshen Yu, 2010.09.21, Windows version // flimage blur[par.Scales+3], dogs[par.Scales+2]; int size_blur = par.Scales+3; int size_dogs = par.Scales+2; flimage *blur = new flimage[size_blur]; flimage *dogs = new flimage[size_dogs]; float sigmaRatio = (float) pow(2.0, 1.0 / (double) par.Scales); /* Build array, blur, holding par.Scales+3 blurred versions of the image. */ blur[0] = flimage(image); /* First level is input to this routine. */ float prevSigma = par.InitSigma; /* Input image has par.InitSigma smoothing. */ /* Form each level by adding incremental blur from previous level. Increase in blur is from prevSigma to prevSigma * sigmaRatio, so increase^2 = (prevSigma * sigmaRatio)^2 - prevSigma^2 */ for (int i = 1; i < par.Scales + 3; i++) { if (DEBUG) printf("Convolving scale: %d \n", i); blur[i] = flimage(blur[i-1]); float increase = prevSigma*(float)sqrt((double)(sigmaRatio*sigmaRatio-1.0)); gaussian_convolution( blur[i].getPlane(), blur[i].getPlane(), blur[i].nwidth(), blur[i].nheight(), increase); prevSigma *= sigmaRatio; } /* Compute an array, dogs, of difference-of-Gaussian images by subtracting each image from its next blurred version. */ for (int i = 0; i < par.Scales + 2; i++) { dogs[i] = flimage(blur[i]); /// dogs[i] = dogs[i] - blur[i+1] combine(dogs[i].getPlane(),1.0f, blur[i+1].getPlane(),-1.0f, dogs[i].getPlane(), dogs[i].nwidth() * dogs[i].nheight()); } // Image with exact blur to be subsampled is blur[scales] image = blur[par.Scales]; /* Scale-space extrema detection in this octave */ if (DEBUG) printf("Looking for local maxima \n"); FindMaxMin(dogs, blur, octSize, keys,par); // Guoshen Yu, 2010.09.22, Windows version delete [] blur; delete [] dogs; } ///////////////////////////////////////////////// ///Find the local maxima and minima of the DOG images in scale space. Return the keypoints for these locations. ///////////////////////////////////////////////// /// For each point at each scale we decide if it is a local maxima: /// - |dogs(x,y,s)| > 0.8 * par.PeakThresh /// - Local maximum or minimum in s-1,s,s+1 /// - NotonEdge: ratio of the two principle curvatures of the DOG function at this point be below a threshold. /// blur[par.Scales+1] is not used in order to look for extrema /// while these could be computed using avalaible blur and dogs void FindMaxMin( flimage* dogs, flimage* blur, float octSize, keypointslist& keys,siftPar &par) { int width = dogs[0].nwidth(), height = dogs[0].nheight(); /* Create an image map in which locations that have a keypoint are marked with value 1.0, to prevent two keypoints being located at same position. This may seem an inefficient data structure, but does not add significant overhead. */ flimage map(width,height,0.0f); flimage grad(width,height,0.0f); flimage ori(width,height,0.0f); /* Search through each scale, leaving 1 scale below and 1 above. There are par.Scales+2 dog images. */ for (int s = 1; s < par.Scales+1; s++) { if (DEBUG) printf("************************scale: %d\n", s); //getchar(); /* For each intermediate image, compute gradient and orientation images to be used for keypoint description. */ compute_gradient_orientation(blur[s].getPlane(), grad.getPlane(), ori.getPlane(), blur[s].nwidth(), blur[s].nheight()); /* Only find peaks at least par.BorderDist samples from image border, as peaks centered close to the border will lack stability. */ assert(par.BorderDist >= 2); float val; int partialcounter = 0; for (int r = par.BorderDist; r < height - par.BorderDist; r++) for (int c = par.BorderDist; c < width - par.BorderDist; c++) { /* Pixel value at (c,r) position. */ val = dogs[s](c,r); /* DOG magnitude must be above 0.8 * par.PeakThresh threshold (precise threshold check will be done once peak interpolation is performed). Then check whether this point is a peak in 3x3 region at each level, and is not on an elongated edge. */ if (fabs(val) > 0.8 * par.PeakThresh) { /* // If local maxima if (LocalMax(val, dogs[s-1], r, c,par) && LocalMax(val, dogs[s], r, c, par) && LocalMax(val, dogs[s+1], r, c,par) && NotOnEdge(dogs[s], r, c, octSize,par)) { if (DEBUG) printf("Maximum Keypoint found (%d,%d,%d) val: %f\n",s,r,c,val); InterpKeyPoint( dogs, s, r, c, grad, ori, map, octSize, keys, 5,par); } else if (LocalMin(val, dogs[s-1], r, c,par) && LocalMin(val, dogs[s], r, c,par) && LocalMin(val, dogs[s+1], r, c,par) && NotOnEdge(dogs[s], r, c, octSize,par)) { if (DEBUG) printf("Minimum Keypoint found (%d,%d,%d) val: %f\n",s,r,c,val); InterpKeyPoint( dogs, s, r, c, grad, ori, map, octSize, keys, 5,par); } */ if (LocalMaxMin(val, dogs[s-1], r, c) && LocalMaxMin(val, dogs[s], r, c) && LocalMaxMin(val, dogs[s+1], r, c) && NotOnEdge(dogs[s], r, c, octSize,par)) { partialcounter++; if (DEBUG) printf("%d: (%d,%d,%d) val: %f\n",partialcounter, s,r,c,val); InterpKeyPoint( dogs, s, r, c, grad, ori, map, octSize, keys, 5,par); //getchar(); } } } } } //bool LocalMax(float val, flimage& dog, int y0, int x0, siftPar &par) bool LocalMax(float val, flimage& dog, int y0, int x0) { for (int x = x0 - 1; x <= x0 + 1; x++) for (int y = y0 - 1; y <= y0 + 1; y++){ //printf("%f \t", dog(x,y)); if (dog(x,y) > val) return 0; } return 1; } bool LocalMin(float val, flimage& dog, int y0, int x0) { for (int x = x0 - 1; x <= x0 + 1; x++) for (int y = y0 - 1; y <= y0 + 1; y++){ //printf("%f \t", dog(x,y)); if (dog(x,y) < val) return 0; } return 1; } /* Return TRUE iff val is a local maximum (positive value) or minimum (negative value) compared to the 3x3 neighbourhood that is centered at (row,col). */ bool LocalMaxMin(float val, const flimage& dog, int y0, int x0) { // For efficiency, use separate cases for maxima or minima, and // return as soon as possible if (val > 0.0) { for (int x = x0 - 1; x <= x0 + 1; x++) for (int y = y0 - 1; y <= y0 + 1; y++){ if (dog(x,y) > val) return false; } } else { for (int x = x0 - 1; x <= x0 + 1; x++) for (int y = y0 - 1; y <= y0 + 1; y++){ if (dog(x,y) < val) return false; } } return true; } /* Returns FALSE if this point on the DOG function lies on an edge. This test is done early because it is very efficient and eliminates many points. It requires that the ratio of the two principle curvatures of the DOG function at this point be below a threshold. Edge threshold is higher on the first scale where SNR is small in order to reduce the number of unstable keypoints. */ int NotOnEdge(flimage& dog, int r, int c, float octSize,siftPar &par) { /* Compute 2x2 Hessian values from pixel differences. */ float H00 = dog(c,r-1) - 2.0 * dog(c,r) + dog(c,r+1), /* AMIR: div by ? */ H11 = dog(c-1,r) - 2.0 * dog(c,r) + dog(c+1,r), H01 = ( (dog(c+1,r+1) - dog(c-1,r+1)) - (dog(c+1,r-1) - dog(c-1,r-1)) ) / 4.0; /* Compute determinant and trace of the Hessian. */ float det = H00 * H11 - H01 * H01, /// Det H = \prod l_i trace = H00 + H11; /// tr H = \sum l_i /// As we do not desire edges but only corners we demand l_max / l_min less than a threshold /// In practice if A = k B, A*B = k B^2 /// (A + B)^2 = (k+1)^2 * B^2 /// k B^2 > t * (k+1)^2 * B^2 sii k / (k+1)^2 > t /// This is a decreasing function for k > 1 and value 0.3 at k=1. /// Setting t = 0.08, means k<=10 /* To detect an edge response, we require the ratio of smallest to largest principle curvatures of the DOG function (eigenvalues of the Hessian) to be below a threshold. For efficiency, we use Harris' idea of requiring the determinant to be above par.EdgeThresh times the squared trace, as for eigenvalues A and B, det = AB, trace = A+B. So if A = 10B, then det = 10B**2, and trace**2 = (11B)**2 = 121B**2, so par.EdgeThresh = 10/121 = 0.08 to require ratio of eigenvalues less than 10. */ if (octSize <= 1) return (det > par.EdgeThresh1 * trace * trace); else return (det > par.EdgeThresh * trace * trace); } /* Create a keypoint at a peak near scale space location (s,r,c), where s is scale (index of DOGs image), and (r,c) is (row, col) location. Add to the list of keys with any new keys added. */ void InterpKeyPoint( flimage* dogs, int s, int r, int c, const flimage& grad, const flimage& ori, flimage& map, float octSize, keypointslist& keys, int movesRemain,siftPar &par) { /* Fit quadratic to determine offset and peak value. */ float offset[3]; float peakval = FitQuadratic(offset, dogs, s, r, c); if (DEBUG) printf("peakval: %f, of[0]: %f of[1]: %f of[2]: %f\n", peakval, offset[0], offset[1], offset[2]); /* Move to an adjacent (row,col) location if quadratic interpolation is larger than 0.6 units in some direction (we use 0.6 instead of 0.5 to avoid jumping back and forth near boundary). We do not perform move to adjacent scales, as it is seldom useful and we do not have easy access to adjacent scale structures. The movesRemain counter allows only a fixed number of moves to prevent possibility of infinite loops. */ int newr = r, newc = c; if (offset[1] > 0.6 && r < dogs[0].nheight() - 3) newr++; else if (offset[1] < -0.6 && r > 3) newr--; if (offset[2] > 0.6 && c < dogs[0].nwidth() - 3) newc++; else if (offset[2] < -0.6 && c > 3) newc--; if (movesRemain > 0 && (newr != r || newc != c)) { InterpKeyPoint( dogs, s, newr, newc, grad, ori, map, octSize, keys,movesRemain - 1,par); return; } /* Do not create a keypoint if interpolation still remains far outside expected limits, or if magnitude of peak value is below threshold (i.e., contrast is too low). */ if ( fabs(offset[0]) > 1.5 || fabs(offset[1]) > 1.5 || fabs(offset[2]) > 1.5 || fabs(peakval) < par.PeakThresh) { if (DEBUG) printf("Point not well localized by FitQuadratic\n"); par.noncorrectlylocalized++; return; } /* Check that no keypoint has been created at this location (to avoid duplicates). Otherwise, mark this map location. */ if (map(c,r) > 0.0) return; map(c,r) = 1.0; /* The scale relative to this octave is given by octScale. The scale units are in terms of sigma for the smallest of the Gaussians in the DOG used to identify that scale. */ // Guoshen Yu, 2010.09.21 Windows version // float octScale = par.InitSigma * pow(2.0, (s + offset[0]) / (float) par.Scales); float octScale = par.InitSigma * pow(2.0, (s + offset[0]) / (double) par.Scales); /// always use histogram of orientations //if (UseHistogramOri) AssignOriHist( grad, ori, octSize, octScale, r + offset[1], c + offset[2], keys, par); //else // AssignOriAvg( // grad, ori, octSize, octScale, // r + offset[1], c + offset[2], keys); } /* Apply the method developed by Matthew Brown (see BMVC 02 paper) to fit a 3D quadratic function through the DOG function values around the location (s,r,c), i.e., (scale,row,col), at which a peak has been detected. Return the interpolated peak position as a vector in "offset", which gives offset from position (s,r,c). The returned value is the interpolated DOG magnitude at this peak. */ float FitQuadratic(float offset[3], flimage* dogs, int s, int r, int c) { float g[3]; flimage *dog0, *dog1, *dog2; int i; //s = 1; r = 128; c = 128; float ** H = allocate_float_matrix(3, 3); /* Select the dog images at peak scale, dog1, as well as the scale below, dog0, and scale above, dog2. */ dog0 = &dogs[s-1]; dog1 = &dogs[s]; dog2 = &dogs[s+1]; /* Fill in the values of the gradient from pixel differences. */ g[0] = ((*dog2)(c,r) - (*dog0)(c,r)) / 2.0; g[1] = ((*dog1)(c,r+1) - (*dog1)(c,r-1)) / 2.0; g[2] = ((*dog1)(c+1,r) - (*dog1)(c-1,r)) / 2.0; /* Fill in the values of the Hessian from pixel differences. */ H[0][0] = (*dog0)(c,r) - 2.0 * (*dog1)(c,r) + (*dog2)(c,r); H[1][1] = (*dog1)(c,r-1) - 2.0 * (*dog1)(c,r) + (*dog1)(c,r+1); H[2][2] = (*dog1)(c-1,r) - 2.0 * (*dog1)(c,r) + (*dog1)(c+1,r); H[0][1] = H[1][0] = ( ((*dog2)(c,r+1) - (*dog2)(c,r-1)) - ((*dog0)(c,r+1) - (*dog0)(c,r-1)) ) / 4.0; H[0][2] = H[2][0] = ( ((*dog2)(c+1,r) - (*dog2)(c-1,r)) - ((*dog0)(c+1,r) - (*dog0)(c-1,r)) ) / 4.0; H[1][2] = H[2][1] = ( ((*dog1)(c+1,r+1) - (*dog1)(c-1,r+1)) - ((*dog1)(c+1,r-1) - (*dog1)(c-1,r-1)) ) / 4.0; /* Solve the 3x3 linear sytem, Hx = -g. Result, x, gives peak offset. Note that SolveLinearSystem destroys contents of H. */ offset[0] = - g[0]; offset[1] = - g[1]; offset[2] = - g[2]; // for(i=0; i < 3; i++){ // // for(j=0; j < 3; j++) printf("%f ", H[i][j]); // printf("\n"); // } // printf("\n"); // // for(i=0; i < 3; i++) printf("%f ", offset[i]); // printf("\n"); float solution[3]; lusolve(H, solution, offset,3); // printf("\n"); // for(i=0; i < 3; i++) printf("%f ", solution[i]); // printf("\n"); desallocate_float_matrix(H,3,3); delete[] H; /*memcheck*/ /* Also return value of DOG at peak location using initial value plus 0.5 times linear interpolation with gradient to peak position (this is correct for a quadratic approximation). */ for(i=0; i < 3; i++) offset[i] = solution[i]; return ((*dog1)(c,r) + 0.5 * (solution[0]*g[0]+solution[1]*g[1]+solution[2]*g[2])); } /// - Compute histogram of orientation in a neighborhood weighted by gradient and distance to center /// - Look for local (3-neighborhood) maximum with valuer larger or equal than par.OriHistThresh * maxval /* Assign an orientation to this keypoint. This is done by creating a Gaussian weighted histogram of the gradient directions in the region. The histogram is smoothed and the largest peak selected. The results are in the range of -PI to PI. */ void AssignOriHist( const flimage& grad, const flimage& ori, float octSize, float octScale, float octRow, float octCol,keypointslist& keys,siftPar &par) { int bin, prev, next; // Guoshen Yu, 2010.09.21 Windows version // float hist[par.OriBins], distsq, dif, gval, weight, angle, interp; float distsq, dif, gval, weight, angle, interp; int tmp_size = par.OriBins; float *hist = new float[tmp_size]; float radius2, sigma2; int row = (int) (octRow+0.5), col = (int) (octCol+0.5), rows = grad.nheight(), cols = grad.nwidth(); for (int i = 0; i < par.OriBins; i++) hist[i] = 0.0; /* Look at pixels within 3 sigma around the point and sum their Gaussian weighted gradient magnitudes into the histogram. */ float sigma = par.OriSigma * octScale; int radius = (int) (sigma * 3.0); int rmin = MAX(0,row-radius); int cmin = MAX(0,col-radius); int rmax = MIN(row+radius,rows-2); int cmax = MIN(col+radius,cols-2); radius2 = (float)(radius * radius); sigma2 = 2.0*sigma*sigma; for (int r = rmin; r <= rmax; r++) { for (int c = cmin; c <= cmax; c++) { gval = grad(c,r); dif = (r - octRow); distsq = dif*dif; dif = (c - octCol); distsq += dif*dif; if (gval > 0.0 && distsq < radius2 + 0.5) { weight = exp(- distsq / sigma2); /* Ori is in range of -PI to PI. */ angle = ori(c,r); bin = (int) (par.OriBins * (angle + PI + 0.001) / (2.0 * PI)); assert(bin >= 0 && bin <= par.OriBins); bin = MIN(bin, par.OriBins - 1); hist[bin] += weight * gval; } } } /* Apply smoothing 6 times for accurate Gaussian approximation. */ for (int i = 0; i < 6; i++) SmoothHistogram(hist, par.OriBins); /* Find maximum value in histogram. */ float maxval = 0.0; for (int i = 0; i < par.OriBins; i++) if (hist[i] > maxval) maxval = hist[i]; /* Look for each local peak in histogram. If value is within par.OriHistThresh of maximum value, then generate a keypoint. */ for (int i = 0; i < par.OriBins; i++) { prev = (i == 0 ? par.OriBins - 1 : i - 1); next = (i == par.OriBins - 1 ? 0 : i + 1); if ( hist[i] > hist[prev] && hist[i] > hist[next] && hist[i] >= par.OriHistThresh * maxval ) { /* Use parabolic fit to interpolate peak location from 3 samples. Set angle in range -PI to PI. */ interp = InterpPeak(hist[prev], hist[i], hist[next]); angle = 2.0 * PI * (i + 0.5 + interp) / par.OriBins - PI; assert(angle >= -PI && angle <= PI); if (DEBUG) printf("angle selected: %f \t location: (%f,%f)\n", angle, octRow, octCol); ; /* Create a keypoint with this orientation. */ MakeKeypoint( grad, ori, octSize, octScale, octRow, octCol, angle, keys,par); } } // Guoshen Yu, 2010.09.22, Windows version delete [] hist; } /* Smooth a histogram by using a [1/3 1/3 1/3] kernel. Assume the histogram is connected in a circular buffer. */ void SmoothHistogram(float* hist, int bins) { float prev, temp; prev = hist[bins - 1]; for (int i = 0; i < bins; i++) { temp = hist[i]; hist[i] = ( prev + hist[i] + hist[(i + 1 == bins) ? 0 : i + 1] ) / 3.0; prev = temp; } } /* Return a number in the range [-0.5, 0.5] that represents the location of the peak of a parabola passing through the 3 evenly spaced samples. The center value is assumed to be greater than or equal to the other values if positive, or less than if negative. */ float InterpPeak(float a, float b, float c) { if (b < 0.0) { a = -a; b = -b; c = -c; } assert(b >= a && b >= c); return 0.5 * (a - c) / (a - 2.0 * b + c); } /* Joan Pau: Add a new keypoint to a vector of keypoints Create a new keypoint and return list of keypoints with new one added. */ void MakeKeypoint( const flimage& grad, const flimage& ori, float octSize, float octScale, float octRow, float octCol, float angle, keypointslist& keys,siftPar &par) { keypoint newkeypoint; newkeypoint.x = octSize * octCol; /*x coordinate */ newkeypoint.y = octSize * octRow; /*y coordinate */ newkeypoint.scale = octSize * octScale; /* scale */ newkeypoint.angle = angle; /* orientation */ MakeKeypointSample(newkeypoint,grad,ori,octScale,octRow,octCol,par); keys.push_back(newkeypoint); } /* Use the parameters of this keypoint to sample the gradient images at a set of locations within a circular region around the keypoint. The (scale,row,col) values are relative to current octave sampling. The resulting vector is stored in the key. */ void MakeKeypointSample( keypoint& key, const flimage& grad, const flimage& ori, float scale, float row, float col,siftPar &par) { /* Produce sample vector. */ KeySampleVec(key, grad, ori, scale, row, col,par); /* Normalize vector. This should provide illumination invariance for planar lambertian surfaces (except for saturation effects). Normalization also improves nearest-neighbor metric by increasing relative distance for vectors with few features. It is also useful to implement a distance threshold and to allow conversion to integer format. */ NormalizeVec(key.vec); /* Now that normalization has been done, threshold elements of index vector to decrease emphasis on large gradient magnitudes. Admittedly, the large magnitude values will have affected the normalization, and therefore the threshold, so this is of limited value. */ bool changed = false; for (int i = 0; i < VecLength; i++) if (key.vec[i] > par.MaxIndexVal) { key.vec[i] = par.MaxIndexVal; changed = true; } if (changed) NormalizeVec(key.vec); /* Convert float vector to integer. Assume largest value in normalized vector is likely to be less than 0.5. */ /// QUESTION: why is the vector quantized to integer int intval; for (int i = 0; i < VecLength; i++) { intval = (int)(512.0 * key.vec[i]); key.vec[i] = (int) MIN(255, intval); } } /* Normalize length of vec to 1.0. */ void NormalizeVec(float* vec) { float val, fac; float sqlen = 0.0; for (int i = 0; i < VecLength; i++) { val = vec[i]; sqlen += val * val; } fac = 1.0 / sqrt(sqlen); for (int i = 0; i < VecLength; i++) vec[i] *= fac; } /* Create a 3D index array into which gradient values are accumulated. After filling array, copy values back into vec. */ void KeySampleVec( keypoint& key, const flimage& grad, const flimage& ori, float scale, float row, float col,siftPar &par) { float index[IndexSize][IndexSize][OriSize]; /* Initialize index array. */ for (int i = 0; i < IndexSize; i++) for (int j = 0; j < IndexSize; j++) for (int k = 0; k < OriSize; k++) index[i][j][k] = 0.0; KeySample(index, key, grad, ori, scale, row, col, par); /* Unwrap the 3D index values into 1D vec. */ int v = 0; for (int i = 0; i < IndexSize; i++) for (int j = 0; j < IndexSize; j++) for (int k = 0; k < OriSize; k++) key.vec[v++] = index[i][j][k]; } /* Add features to vec obtained from sampling the grad and ori images for a particular scale. Location of key is (scale,row,col) with respect to images at this scale. We examine each pixel within a circular region containing the keypoint, and distribute the gradient for that pixel into the appropriate bins of the index array. */ void KeySample( float index[IndexSize][IndexSize][OriSize], keypoint& key, const flimage& grad, const flimage& ori, float scale, float row, float col,siftPar &par) { float rpos, cpos, rx, cx; int irow = (int) (row + 0.5), icol = (int) (col + 0.5); float sine = (float) sin(key.angle), cosine = (float) cos(key.angle); /* The spacing of index samples in terms of pixels at this scale. */ float spacing = scale * par.MagFactor; /* Radius of index sample region must extend to diagonal corner of index patch plus half sample for interpolation. */ float radius = 1.414 * spacing * (IndexSize + 1) / 2.0; int iradius = (int) (radius + 0.5); /* Examine all points from the gradient image that could lie within the index square. */ for (int i = -iradius; i <= iradius; i++) { for (int j = -iradius; j <= iradius; j++) { /* Rotate sample offset to make it relative to key orientation. Uses (row,col) instead of (x,y) coords. Also, make subpixel correction as later image offset must be an integer. Divide by spacing to put in index units. */ /* Guoshen Yu, inverse the rotation */ rpos = ((cosine * i - sine * j) - (row - irow)) / spacing; cpos = ((sine * i + cosine * j) - (col - icol)) / spacing; /* rpos = ((cosine * i + sine * j) - (row - irow)) / spacing; cpos = ((- sine * i + cosine * j) - (col - icol)) / spacing;*/ /* Compute location of sample in terms of real-valued index array coordinates. Subtract 0.5 so that rx of 1.0 means to put full weight on index[1] (e.g., when rpos is 0 and IndexSize is 3. */ rx = rpos + IndexSize / 2.0 - 0.5; cx = cpos + IndexSize / 2.0 - 0.5; /* Test whether this sample falls within boundary of index patch. */ if ( rx > -1.0 && rx < (float) IndexSize && cx > -1.0 && cx < (float) IndexSize ) AddSample( index, key, grad, ori, irow + i, icol + j, rpos, cpos, rx, cx,par); } } } /* Given a sample from the image gradient, place it in the index array. */ void AddSample( float index[IndexSize][IndexSize][OriSize], keypoint& key, const flimage& grad, const flimage& orim, float r, float c, float rpos, float cpos, float rx, float cx,siftPar &par) { /* Clip at image boundaries. */ if (r < 0 || r >= grad.nheight() || c < 0 || c >= grad.nwidth()) return; /* Compute Gaussian weight for sample, as function of radial distance from center. Sigma is relative to half-width of index. */ float sigma = par.IndexSigma * 0.5 * IndexSize, weight = exp(- (rpos * rpos + cpos * cpos) / (2.0 * sigma * sigma)), // mag = weight * grad(c,r); mag = weight * grad((int)c,(int)r); // Guoshen Yu, explicitely cast to int to avoid warning /* Subtract keypoint orientation to give ori relative to keypoint. */ // float ori = orim(c,r) - key.angle; float ori = orim((int)c,(int)r) - key.angle; // Guoshen Yu, explicitely cast to int to avoid warning /* Put orientation in range [0, 2*PI]. If sign of gradient is to be ignored, then put in range [0, PI]. */ if (par.IgnoreGradSign) { while (ori > PI ) ori -= PI; while (ori < 0.0) ori += PI; } else { while (ori > 2.0*PI) ori -= 2.0*PI; while (ori < 0.0 ) ori += 2.0*PI; } PlaceInIndex(index, mag, ori, rx, cx,par); } /* Increment the appropriate locations in the index to incorporate this image sample. The location of the sample in the index is (rx,cx). */ void PlaceInIndex( float index[IndexSize][IndexSize][OriSize], float mag, float ori, float rx, float cx,siftPar &par) { int orr, rindex, cindex, oindex; float rweight, cweight, oweight; float *ivec; float oval = OriSize * ori / (par.IgnoreGradSign ? PI : 2.0*PI); // int ri = (rx >= 0.0) ? rx : rx - 1.0, /* Round down to next integer. */ // ci = (cx >= 0.0) ? cx : cx - 1.0, // oi = (oval >= 0.0) ? oval : oval - 1.0; int ri = (int)((rx >= 0.0) ? rx : rx - 1.0), /* Round down to next integer. */ // Guoshen Yu, explicitely cast to int to avoid warning ci = (int)((cx >= 0.0) ? cx : cx - 1.0), // Guoshen Yu, explicitely cast to int to avoid warning oi = (int)((oval >= 0.0) ? oval : oval - 1.0); // Guoshen Yu, explicitely cast to int to avoid warning float rfrac = rx - ri, /* Fractional part of location. */ cfrac = cx - ci, ofrac = oval - oi; assert( ri >= -1 && ri < IndexSize && oi >= 0 && oi <= OriSize && rfrac >= 0.0 && rfrac <= 1.0); /* Put appropriate fraction in each of 8 buckets around this point in the (row,col,ori) dimensions. This loop is written for efficiency, as it is the inner loop of key sampling. */ for (int r = 0; r < 2; r++) { rindex = ri + r; if (rindex >=0 && rindex < IndexSize) { rweight = mag * ((r == 0) ? 1.0 - rfrac : rfrac); for (int c = 0; c < 2; c++) { cindex = ci + c; if (cindex >=0 && cindex < IndexSize) { cweight = rweight * ((c == 0) ? 1.0 - cfrac : cfrac); ivec = index[rindex][cindex]; for (orr = 0; orr < 2; orr++) { oindex = oi + orr; if (oindex >= OriSize) /* Orientation wraps around at PI. */ oindex = 0; oweight = cweight * ((orr == 0) ? 1.0 - ofrac : ofrac); ivec[oindex] += oweight; } } } } } } ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// SIFT keypoint matching ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// float DistSquared(keypoint &k1,keypoint &k2, float tdist, siftPar &par) { float dif; float distsq = 0.0; // if (abs(k1.x - k2.x) > par.MatchXradius || abs(k1.y - k2.y) > par.MatchYradius) return tdist; if (ABS(k1.x - k2.x) > par.MatchXradius || ABS(k1.y - k2.y) > par.MatchYradius) return tdist; float *ik1 = k1.vec; float *ik2 = k2.vec; for (int i = 0; i < VecLength && distsq <= tdist; i++) { // for (int i = 0; i < VecLength ; i++) { dif = ik1[i] - ik2[i]; distsq += dif * dif; //distsq += ABS(dif); // distsq += ((ik1[i] > ik2[i]) ? (ik1[i] - ik2[i]) : (-ik1[i] + ik2[i])); } return distsq; } float DistSquared_short(keypoint_short &k1,keypoint_short &k2, float tdist, siftPar &par) { // For Mac/Linux compilation using make: vectorization is possible with short. unsigned short distsq = 0; // For Windows compilation using Intel C++ compiler: vectorization is possible with int. // int distsq = 0; if (ABS(k1.x - k2.x) > par.MatchXradius || ABS(k1.y - k2.y) > par.MatchYradius) return tdist; unsigned short *ik1 = k1.vec; unsigned short *ik2 = k2.vec; for (int i = 0; i < VecLength ; i++) { distsq += ((ik1[i] > ik2[i]) ? (ik1[i] - ik2[i]) : (-ik1[i] + ik2[i])); } return distsq; } /* This searches through the keypoints in klist for the two closest matches to key. It returns the ratio of the distance to key of the closest and next to closest keypoints in klist, while bestindex is the index of the closest keypoint. */ float CheckForMatch( keypoint& key, keypointslist& klist, int& min,siftPar &par) { int nexttomin = -1; float dsq, distsq1, distsq2; distsq1 = distsq2 = 1000000000000.0f; for (int j=0; j< (int) klist.size(); j++){ dsq = DistSquared(key, klist[j], distsq2,par); if (dsq < distsq1) { distsq2 = distsq1; distsq1 = dsq; nexttomin = min; min = j; } else if (dsq < distsq2) { distsq2 = dsq; nexttomin = j; } } return distsq1/distsq2 ; } float CheckForMatch_short( keypoint_short& key, keypointslist_short& klist, int& min,siftPar &par) { int nexttomin = -1; float dsq, distsq1, distsq2; distsq1 = distsq2 = 1000000000000.0f; for (int j=0; j< (int) klist.size(); j++){ dsq = DistSquared_short(key, klist[j], distsq2,par); if (dsq < distsq1) { distsq2 = distsq1; distsq1 = dsq; nexttomin = min; min = j; } else if (dsq < distsq2) { distsq2 = dsq; nexttomin = j; } } return distsq1/distsq2 ; } void compute_sift_matches( keypointslist& keys1, keypointslist& keys2, matchingslist& matchings,siftPar &par) { int imatch=0; float sqminratio = par.MatchRatio * par.MatchRatio, sqratio; // write the keypoint descriptors in char keypointslist_short keys1_short(keys1.size()); for (int i=0; i< (int) keys1.size(); i++) { keys1_short[i].x = keys1[i].x; keys1_short[i].y = keys1[i].y; keys1_short[i].scale = keys1[i].scale; keys1_short[i].angle = keys1[i].angle; for (int k=0; k < VecLength; k++) { keys1_short[i].vec[k] = (unsigned short) (keys1[i].vec[k]); } } keypointslist_short keys2_short(keys2.size()); for (int i=0; i< (int) keys2.size(); i++) { keys2_short[i].x = keys2[i].x; keys2_short[i].y = keys2[i].y; keys2_short[i].scale = keys2[i].scale; keys2_short[i].angle = keys2[i].angle; for (int k=0; k < VecLength; k++) { keys2_short[i].vec[k] = (unsigned short) (keys2[i].vec[k]); } } for (int i=0; i< (int) keys1.size(); i++) { // sqratio = CheckForMatch(keys1[i], keys2, imatch,par); sqratio = CheckForMatch_short(keys1_short[i], keys2_short, imatch,par); if (sqratio< sqminratio) matchings.push_back( matching(keys1[i],keys2[imatch] )); // matchings.push_back( matching_char(keys1_char[i],keys2_char[imatch] )); } }