qemu_img: is_not_zero() optimization
I run qemu-img under profiler and realized, that most of CPU time is consumed by is_not_zero() function. I had made a couple of optimizations on it and got the following output for `time qemu-img convert -O qcow2 volume.qcow2 snapshot.qcow2`: Original qemu-img: real 0m56.159s user 0m34.670s sys 0m12.079s Patched qemu-img: real 0m34.805s user 0m18.445s sys 0m12.552s Signed-off-by: Dmitry Konishchev <konishchev@gmail.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
		
							parent
							
								
									6f321e93ab
								
							
						
					
					
						commit
						f6a00aa150
					
				
							
								
								
									
										29
									
								
								qemu-img.c
									
									
									
									
									
								
							
							
						
						
									
										29
									
								
								qemu-img.c
									
									
									
									
									
								
							@ -496,14 +496,37 @@ static int img_commit(int argc, char **argv)
 | 
				
			|||||||
    return 0;
 | 
					    return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Checks whether the sector is not a zero sector.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Attention! The len must be a multiple of 4 * sizeof(long) due to
 | 
				
			||||||
 | 
					 * restriction of optimizations in this function.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
static int is_not_zero(const uint8_t *sector, int len)
 | 
					static int is_not_zero(const uint8_t *sector, int len)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					    /*
 | 
				
			||||||
 | 
					     * Use long as the biggest available internal data type that fits into the
 | 
				
			||||||
 | 
					     * CPU register and unroll the loop to smooth out the effect of memory
 | 
				
			||||||
 | 
					     * latency.
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    int i;
 | 
					    int i;
 | 
				
			||||||
    len >>= 2;
 | 
					    long d0, d1, d2, d3;
 | 
				
			||||||
    for(i = 0;i < len; i++) {
 | 
					    const long * const data = (const long *) sector;
 | 
				
			||||||
        if (((uint32_t *)sector)[i] != 0)
 | 
					
 | 
				
			||||||
 | 
					    len /= sizeof(long);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(i = 0; i < len; i += 4) {
 | 
				
			||||||
 | 
					        d0 = data[i + 0];
 | 
				
			||||||
 | 
					        d1 = data[i + 1];
 | 
				
			||||||
 | 
					        d2 = data[i + 2];
 | 
				
			||||||
 | 
					        d3 = data[i + 3];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (d0 || d1 || d2 || d3) {
 | 
				
			||||||
            return 1;
 | 
					            return 1;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return 0;
 | 
					    return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user