Merge branch 'dev' into beta_release

This commit is contained in:
Fei Xia 2017-10-24 13:04:54 -07:00 committed by GitHub
commit 73d35eec0a
6 changed files with 469 additions and 279 deletions

View File

@ -160,6 +160,13 @@ download_data () {
wget --quiet https://www.dropbox.com/s/msd32wg144eew5r/coord.npy
cd -
fi
if [ ! -f ./realenv/core/render/model.pth ]; then
cd ./realenv/core/render/
wget --quiet https://www.dropbox.com/s/e7far9okgv7oq8p/model.pth
cd -
fi
if [ -f realenv/data/*.pkl ]; then
rm realenv/data/*.pkl
@ -209,4 +216,4 @@ case "$subcommand" in
default "$@"
exit 1
;;
esac
esac

View File

@ -62,7 +62,7 @@ if __name__ == '__main__':
if not done: continue
if restart_delay==0:
print("score=%0.2f in %i frames" % (score, frame))
restart_delay = 20 # 2 sec at 60 fps
restart_delay = 40 # 2 sec at 60 fps
else:
restart_delay -= 1
if restart_delay==0: break
@ -70,4 +70,4 @@ if __name__ == '__main__':
except KeyboardInterrupt:
env._end()
print("Program finished")
print("Program finished")

View File

@ -542,7 +542,6 @@ int main( int argc, char * argv[] )
int i = 0;
zmq::context_t context (1);
zmq::socket_t socket (context, ZMQ_REP);
socket.bind ("tcp://127.0.0.1:5555");
@ -628,207 +627,329 @@ int main( int argc, char * argv[] )
// create buffer, 3 channels per Pixel
float* dataBuffer = (float*)malloc(nByte);
// First let's create our buffer, 3 channels per Pixel
//float* dataBuffer = (float*)malloc(nByte);
//char* dataBuffer = (char*)malloc(nSize*sizeof(char));
float * dataBuffer_c = (float * ) malloc(windowWidth*windowHeight * sizeof(float));
if (!dataBuffer) return false;
if (!dataBuffer_c) return false;
//float * dataBuffer_c = (float * ) malloc(windowWidth*windowHeight * sizeof(float));
//if (!dataBuffer) return false;
//if (!dataBuffer_c) return false;
for (int k = 0; k < 6; k ++ )
bool pano = False;
if (pano)
{
for (int k = 0; k < 6; k ++ )
{
// Render to our framebuffer
// Clear the screen
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
// Use our shader
glUseProgram(programID);
// Compute the MVP matrix from keyboard and mouse input
//computeMatricesFromInputs();
//computeMatricesFromFile(name_loc);
float fov = glm::radians(90.0f);
glm::mat4 ProjectionMatrix = glm::perspective(fov, 1.0f, 0.1f, 5000.0f); // near & far are not verified, but accuracy seems to work well
glm::mat4 ViewMatrix = getView(viewMat, k);
//glm::mat4 ViewMatrix = getViewMatrix();
glm::mat4 viewMatPose = glm::inverse(ViewMatrix);
// printf("View (pose) matrix for skybox %d\n", k);
// for (int i = 0; i < 4; ++i) {
// printf("\t %f %f %f %f\n", viewMatPose[0][i], viewMatPose[1][i], viewMatPose[2][i], viewMatPose[3][i]);
// //printf("\t %f %f %f %f\n", ViewMatrix[0][i], ViewMatrix[1][i], ViewMatrix[2][i], ViewMatrix[3][i]);
// }
glm::mat4 ModelMatrix = glm::mat4(1.0);
pose_idx ++;
//glm::mat4 tempMat = getViewMatrix();
//debug_mat(tempMat, "csv");
// glm::mat4 revertZ = glm::mat4();
// revertZ[2][2] = -1;
// glm::quat rotateZ_N90 = glm::quat(glm::vec3(0.0f, 0.0f, glm::radians(-90.0f)));
// glm::quat rotateX_90 = glm::quat(glm::vec3(glm::radians(-90.0f), 0.0f, 0.0f));
//glm::mat4 MVP = ProjectionMatrix * ViewMatrix * revertZ * ModelMatrix;
glm::mat4 MVP = ProjectionMatrix * ViewMatrix * ModelMatrix;
// Send our transformation to the currently bound shader,
// in the "MVP" uniform
glUniformMatrix4fv(MatrixID, 1, GL_FALSE, &MVP[0][0]);
glUniformMatrix4fv(ModelMatrixID, 1, GL_FALSE, &ModelMatrix[0][0]);
glUniformMatrix4fv(ViewMatrixID, 1, GL_FALSE, &ViewMatrix[0][0]);
glm::vec3 lightPos = glm::vec3(4,4,4);
glUniform3f(LightID, lightPos.x, lightPos.y, lightPos.z);
// Bind our texture in Texture Unit 0
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, Texture);
// Set our "myTextureSampler" sampler to use Texture Unit 0
glUniform1i(TextureID, 0);
// 1rst attribute buffer : vertices
glEnableVertexAttribArray(0);
glBindBuffer(GL_ARRAY_BUFFER, vertexbuffer);
glVertexAttribPointer(
0, // attribute
3, // size
GL_FLOAT, // type
GL_FALSE, // normalized?
0, // stride
(void*)0 // array buffer offset
);
// 2nd attribute buffer : UVs
glEnableVertexAttribArray(1);
glBindBuffer(GL_ARRAY_BUFFER, uvbuffer);
glVertexAttribPointer(
1, // attribute
2, // size
GL_FLOAT, // type
GL_FALSE, // normalized?
0, // stride
(void*)0 // array buffer offset
);
// 3rd attribute buffer : normals
glEnableVertexAttribArray(2);
glBindBuffer(GL_ARRAY_BUFFER, normalbuffer);
glVertexAttribPointer(
2, // attribute
3, // size
GL_FLOAT, // type
GL_FALSE, // normalized?
0, // stride
(void*)0 // array buffer offset
);
// Index buffer
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, elementbuffer);
// Draw the triangles !
glDrawElements(
GL_TRIANGLES, // mode
indices.size(), // count
GL_UNSIGNED_INT, // type
(void*)0 // element array buffer offset
);
glDisableVertexAttribArray(0);
glDisableVertexAttribArray(1);
glDisableVertexAttribArray(2);
/*
// Render to the screen
glBindFramebuffer(GL_FRAMEBUFFER, 0);
// Render on the whole framebuffer, complete from the lower left corner to the upper right
glViewport(0,0,windowWidth,windowHeight);
// Clear the screen
glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
// Use our shader
glUseProgram(quad_programID);
// Bind our texture in Texture Unit 0
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, renderedTexture);
//glBindTexture(GL_TEXTURE_2D, depthTexture);
// Set our "renderedTexture" sampler to use Texture Unit 0
glUniform1i(texID, 0);
glUniform1f(timeID, (float)(glfwGetTime()*10.0f) );
// 1rst attribute buffer : vertices
glEnableVertexAttribArray(0);
glBindBuffer(GL_ARRAY_BUFFER, quad_vertexbuffer);
glVertexAttribPointer(
0, // attribute 0. No particular reason for 0, but must match the layout in the shader.
3, // size
GL_FLOAT, // type
GL_FALSE, // normalized?
0, // stride
(void*)0 // array buffer offset
);
// Draw the triangles !
glDrawArrays(GL_TRIANGLES, 0, 6); // 2*3 indices starting at 0 -> 2 triangles
glDisableVertexAttribArray(0);
*/
/*
if (false) {
char buffer[100];
//printf("before: %s\n", buffer);
sprintf(buffer, "/home/jerry/Pictures/%s_mist.png", filename);
//printf("after: %s\n", buffer);
//printf("file name is %s\n", filename);
//printf("saving screenshot to %s\n", buffer);
save_screenshot(buffer, windowWidth, windowHeight, renderedTexture);
}
*/
// Swap buffers
//glfwSwapBuffers(window);
//glfwPollEvents();
// Let's fetch them from the backbuffer
// We request the pixels in GL_BGR format, thanks to Berzeger for the tip
//glReadPixels((GLint)0, (GLint)0,
// (GLint)windowWidth, (GLint)windowHeight,
// GL_BGR, GL_UNSIGNED_SHORT, dataBuffer);
//glReadPixels((GLint)0, (GLint)0,
// (GLint)windowWidth, (GLint)windowHeight,
// GL_BGR, GL_FLOAT, dataBuffer);
//glGetTextureImage(renderedTexture, 0, GL_RGB, GL_UNSIGNED_SHORT, nSize*sizeof(unsigned short), dataBuffer);
// float* loc = dataBuffer + windowWidth*windowHeight * k;
// glGetTextureImage(renderedTexture, 0, GL_BLUE, GL_FLOAT,
// (nSize/3)*sizeof(float), loc);
// Map the OpenGL texture buffer to CUDA memory space
checkCudaErrors(cudaGraphicsMapResources(1, &resource));
cudaArray_t writeArray;
checkCudaErrors(cudaGraphicsSubResourceGetMappedArray(&writeArray, resource, 0, 0));
// Copy the blue channel of the texture to the appropriate part of the cubemap that CUDA will use
fillBlue(cubeMapGpuBuffer, writeArray, windowWidth * windowHeight * k, windowWidth, windowHeight);
// Unmap the OpenGL texture so that it can be rewritten
checkCudaErrors(cudaGraphicsUnmapResources(1, &resource));
}
checkCudaErrors(cudaStreamSynchronize(0));
zmq::message_t reply (panoWidth*panoHeight*sizeof(float));
projectCubeMapToEquirectangular((float*)reply.data(), cubeMapGpuBuffer, d_cubeMapCoordToPanoCoord, cubeMapCoordToPanoCoord.size(), (size_t) nSize/3);
std::cout << "Render time: " << t.elapsed() << std::endl;
socket.send (reply);
//free(dataBuffer);
//free(dataBuffer_c);
}
else {
//Pinhole mode
// Render to our framebuffer
// Clear the screen
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
// Clear the screen
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
// Use our shader
glUseProgram(programID);
// Use our shader
glUseProgram(programID);
// Compute the MVP matrix from keyboard and mouse input
// computeMatricesFromInputs();
// computeMatricesFromFile(name_loc);
float fov = glm::radians(90.0f);
glm::mat4 ProjectionMatrix = glm::perspective(fov, 1.0f, 0.1f, 5000.0f);
// TODO: (hzyjerry) near & far are not verified, but accuracy seems to work well
glm::mat4 ViewMatrix = getView(viewMat, k);
//glm::mat4 ViewMatrix = getViewMatrix();
glm::mat4 viewMatPose = glm::inverse(ViewMatrix);
// Compute the MVP matrix from keyboard and mouse input
//computeMatricesFromInputs();
//computeMatricesFromFile(name_loc);
float fov = glm::radians(90.0f);
glm::mat4 ProjectionMatrix = glm::perspective(fov, 1.0f, 0.1f, 5000.0f); // near & far are not verified, but accuracy seems to work well
glm::mat4 ViewMatrix = getView(viewMat, 2);
glm::mat4 viewMatPose = glm::inverse(ViewMatrix);
glm::mat4 ModelMatrix = glm::mat4(1.0);
glm::mat4 MVP = ProjectionMatrix * ViewMatrix * ModelMatrix;
glm::mat4 ModelMatrix = glm::mat4(1.0);
// Send our transformation to the currently bound shader,
// in the "MVP" uniform
glUniformMatrix4fv(MatrixID, 1, GL_FALSE, &MVP[0][0]);
glUniformMatrix4fv(ModelMatrixID, 1, GL_FALSE, &ModelMatrix[0][0]);
glUniformMatrix4fv(ViewMatrixID, 1, GL_FALSE, &ViewMatrix[0][0]);
pose_idx ++;
glm::vec3 lightPos = glm::vec3(4,4,4);
glUniform3f(LightID, lightPos.x, lightPos.y, lightPos.z);
//glm::mat4 tempMat = getViewMatrix();
//debug_mat(tempMat, "csv");
// Bind our texture in Texture Unit 0
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, Texture);
// Set our "myTextureSampler" sampler to use Texture Unit 0
glUniform1i(TextureID, 0);
// glm::mat4 revertZ = glm::mat4();
// revertZ[2][2] = -1;
// glm::quat rotateZ_N90 = glm::quat(glm::vec3(0.0f, 0.0f, glm::radians(-90.0f)));
// glm::quat rotateX_90 = glm::quat(glm::vec3(glm::radians(-90.0f), 0.0f, 0.0f));
// 1rst attribute buffer : vertices
glEnableVertexAttribArray(0);
glBindBuffer(GL_ARRAY_BUFFER, vertexbuffer);
glVertexAttribPointer(
0, // attribute
3, // size
GL_FLOAT, // type
GL_FALSE, // normalized?
0, // stride
(void*)0 // array buffer offset
);
//glm::mat4 MVP = ProjectionMatrix * ViewMatrix * revertZ * ModelMatrix;
glm::mat4 MVP = ProjectionMatrix * ViewMatrix * ModelMatrix;
// 2nd attribute buffer : UVs
glEnableVertexAttribArray(1);
glBindBuffer(GL_ARRAY_BUFFER, uvbuffer);
glVertexAttribPointer(
1, // attribute
2, // size
GL_FLOAT, // type
GL_FALSE, // normalized?
0, // stride
(void*)0 // array buffer offset
);
// Send our transformation to the currently bound shader,
// in the "MVP" uniform
glUniformMatrix4fv(MatrixID, 1, GL_FALSE, &MVP[0][0]);
glUniformMatrix4fv(ModelMatrixID, 1, GL_FALSE, &ModelMatrix[0][0]);
glUniformMatrix4fv(ViewMatrixID, 1, GL_FALSE, &ViewMatrix[0][0]);
// 3rd attribute buffer : normals
glEnableVertexAttribArray(2);
glBindBuffer(GL_ARRAY_BUFFER, normalbuffer);
glVertexAttribPointer(
2, // attribute
3, // size
GL_FLOAT, // type
GL_FALSE, // normalized?
0, // stride
(void*)0 // array buffer offset
);
glm::vec3 lightPos = glm::vec3(4,4,4);
glUniform3f(LightID, lightPos.x, lightPos.y, lightPos.z);
// Index buffer
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, elementbuffer);
// Bind our texture in Texture Unit 0
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, Texture);
// Set our "myTextureSampler" sampler to use Texture Unit 0
glUniform1i(TextureID, 0);
// Draw the triangles !
glDrawElements(
GL_TRIANGLES, // mode
indices.size(), // count
GL_UNSIGNED_INT, // type
(void*)0 // element array buffer offset
);
// 1rst attribute buffer : vertices
glEnableVertexAttribArray(0);
glBindBuffer(GL_ARRAY_BUFFER, vertexbuffer);
glVertexAttribPointer(
0, // attribute
3, // size
GL_FLOAT, // type
GL_FALSE, // normalized?
0, // stride
(void*)0 // array buffer offset
);
glDisableVertexAttribArray(0);
glDisableVertexAttribArray(1);
glDisableVertexAttribArray(2);
zmq::message_t reply (windowWidth*windowHeight*sizeof(float));
float * reply_data_handle = (float*)reply.data();
glGetTextureImage(renderedTexture, 0, GL_BLUE, GL_FLOAT, windowWidth * windowHeight *sizeof(float), reply_data_handle);
std::cout << "Render time: " << t.elapsed() << std::endl;
float tmp;
for (int i = 0; i < windowHeight/2; i++)
for (int j = 0; j < windowWidth; j++) {
tmp = reply_data_handle[i * windowWidth + j];
reply_data_handle[i * windowWidth + j] = reply_data_handle[(windowHeight - 1 -i) * windowWidth + j];
reply_data_handle[(windowHeight - 1 -i) * windowWidth + j] = tmp;
}
socket.send (reply);
// 2nd attribute buffer : UVs
glEnableVertexAttribArray(1);
glBindBuffer(GL_ARRAY_BUFFER, uvbuffer);
glVertexAttribPointer(
1, // attribute
2, // size
GL_FLOAT, // type
GL_FALSE, // normalized?
0, // stride
(void*)0 // array buffer offset
);
// 3rd attribute buffer : normals
glEnableVertexAttribArray(2);
glBindBuffer(GL_ARRAY_BUFFER, normalbuffer);
glVertexAttribPointer(
2, // attribute
3, // size
GL_FLOAT, // type
GL_FALSE, // normalized?
0, // stride
(void*)0 // array buffer offset
);
// Index buffer
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, elementbuffer);
// Draw the triangles !
glDrawElements(
GL_TRIANGLES, // mode
indices.size(), // count
GL_UNSIGNED_INT, // type
(void*)0 // element array buffer offset
);
glDisableVertexAttribArray(0);
glDisableVertexAttribArray(1);
glDisableVertexAttribArray(2);
/*
// Render to the screen
glBindFramebuffer(GL_FRAMEBUFFER, 0);
// Render on the whole framebuffer, complete from the lower left corner to the upper right
glViewport(0,0,windowWidth,windowHeight);
// Clear the screen
glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
// Use our shader
glUseProgram(quad_programID);
// Bind our texture in Texture Unit 0
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, renderedTexture);
//glBindTexture(GL_TEXTURE_2D, depthTexture);
// Set our "renderedTexture" sampler to use Texture Unit 0
glUniform1i(texID, 0);
glUniform1f(timeID, (float)(glfwGetTime()*10.0f) );
// 1rst attribute buffer : vertices
glEnableVertexAttribArray(0);
glBindBuffer(GL_ARRAY_BUFFER, quad_vertexbuffer);
glVertexAttribPointer(
0, // attribute 0. No particular reason for 0, but must match the layout in the shader.
3, // size
GL_FLOAT, // type
GL_FALSE, // normalized?
0, // stride
(void*)0 // array buffer offset
);
// Draw the triangles !
glDrawArrays(GL_TRIANGLES, 0, 6); // 2*3 indices starting at 0 -> 2 triangles
glDisableVertexAttribArray(0);
*/
/*
if (false) {
char buffer[100];
//printf("before: %s\n", buffer);
sprintf(buffer, "/home/jerry/Pictures/%s_mist.png", filename);
//printf("after: %s\n", buffer);
//printf("file name is %s\n", filename);
//printf("saving screenshot to %s\n", buffer);
save_screenshot(buffer, windowWidth, windowHeight, renderedTexture);
}
*/
// Swap buffers
//glfwSwapBuffers(window);
//glfwPollEvents();
// Let's fetch them from the backbuffer
// We request the pixels in GL_BGR format, thanks to Berzeger for the tip
//glReadPixels((GLint)0, (GLint)0,
// (GLint)windowWidth, (GLint)windowHeight,
// GL_BGR, GL_UNSIGNED_SHORT, dataBuffer);
//glReadPixels((GLint)0, (GLint)0,
// (GLint)windowWidth, (GLint)windowHeight,
// GL_BGR, GL_FLOAT, dataBuffer);
//glGetTextureImage(renderedTexture, 0, GL_RGB, GL_UNSIGNED_SHORT, nSize*sizeof(unsigned short), dataBuffer);
// float* loc = dataBuffer + windowWidth*windowHeight * k;
// glGetTextureImage(renderedTexture, 0, GL_BLUE, GL_FLOAT,
// (nSize/3)*sizeof(float), loc);
// Map the OpenGL texture buffer to CUDA memory space
checkCudaErrors(cudaGraphicsMapResources(1, &resource));
cudaArray_t writeArray;
checkCudaErrors(cudaGraphicsSubResourceGetMappedArray(&writeArray, resource, 0, 0));
// Copy the blue channel of the texture to the appropriate part of the cubemap that CUDA will use
fillBlue(cubeMapGpuBuffer, writeArray, windowWidth * windowHeight * k, windowWidth, windowHeight);
// Unmap the OpenGL texture so that it can be rewritten
checkCudaErrors(cudaGraphicsUnmapResources(1, &resource));
}
checkCudaErrors(cudaStreamSynchronize(0));
zmq::message_t reply (panoWidth*panoHeight*sizeof(float));
projectCubeMapToEquirectangular((float*)reply.data(), cubeMapGpuBuffer, d_cubeMapCoordToPanoCoord, cubeMapCoordToPanoCoord.size(), (size_t) nSize/3);
std::cout << "Render time: " << t.elapsed() << std::endl;
socket.send (reply);
free(dataBuffer);
free(dataBuffer_c);
//free(dataBuffer);
//free(dataBuffer_c);
}

View File

@ -26,6 +26,7 @@ const int BLOCK_ROWS = 8;
#endif
const bool pano = false;
__global__ void copy_mem(unsigned char *source, unsigned char *render)
{
@ -114,6 +115,7 @@ __global__ void merge(unsigned char * img_all, unsigned char * img, float * sele
for (idx = 0; idx < n; idx ++) {
weight = selection[idx * stride + ((y+j)*width + x)] / (sum + 1e-4);
//weight = 0.25;
img[3*((y+j)*width + x)] += (unsigned char) (img_all[idx * stride * 3 + 3*((y+j)*width + x)] * weight);
img[3*((y+j)*width + x)+1] += (unsigned char) (img_all[idx * stride * 3 + 3*((y+j)*width + x) + 1] * weight);
@ -184,9 +186,17 @@ __global__ void transform2d(float *points3d_after)
float y = points3d_after[(ih * w + iw) * 3 + 1];
float z = points3d_after[(ih * w + iw) * 3 + 2];
points3d_after[(ih * w + iw) * 3 + 0] = sqrt(x * x + y * y + z * z);
points3d_after[(ih * w + iw) * 3 + 1] = atan2(y, x);
points3d_after[(ih * w + iw) * 3 + 2] = atan2(sqrt(x * x + y * y), z);
points3d_after[(ih * w + iw) * 3 + 0] = sqrt(x * x + y * y + z * z);
//points3d_after[(ih * w + iw) * 3 + 1] = atan2(y, x);
//points3d_after[(ih * w + iw) * 3 + 2] = atan2(sqrt(x * x + y * y), z);
if ((x > 0) && (y < x) && (y > -x) && (z < x) && (z > -x)) {
points3d_after[(ih * w + iw) * 3 + 1] = y / (x + 1e-5);
points3d_after[(ih * w + iw) * 3 + 2] = -z / (x + 1e-5);
}
else {
points3d_after[(ih * w + iw) * 3 + 1] = 0;
points3d_after[(ih * w + iw) * 3 + 2] = 0;
}
}
}
@ -263,7 +273,8 @@ __global__ void fill_with_average(unsigned char *img, int * nz, int * average, i
__global__ void render_final(float *points3d_polar, float * selection, float * depth_render, int * img, int * render, int s)
__global__ void render_final(float *points3d_polar, float * selection, float * depth_render, int * img, int * render, int oh, int ow)
{
int x = blockIdx.x * TILE_DIM + threadIdx.x;
int y = blockIdx.y * TILE_DIM + threadIdx.y;
@ -272,25 +283,32 @@ __global__ void render_final(float *points3d_polar, float * selection, float * d
for (int j = 0; j < TILE_DIM; j+= BLOCK_ROWS)
{
int iw = x;
int ih = y + j;
int tx = round((points3d_polar[(ih * w + iw) * 3 + 1] + M_PI)/(2*M_PI) * w * s - 0.5);
int ty = round((points3d_polar[(ih * w + iw) * 3 + 2])/M_PI * h * s - 0.5);
//int tx = round((points3d_polar[(ih * w + iw) * 3 + 1] + 1)/2 * ow - 0.5);
//int ty = round((points3d_polar[(ih * w + iw) * 3 + 2] + 1)/2 * oh - 0.5);
float tx_offset = ((points3d_polar[(ih * w + iw) * 3 + 1] + M_PI)/(2*M_PI) * w * s - 0.5);
float ty_offset = ((points3d_polar[(ih * w + iw) * 3 + 2])/M_PI * h * s - 0.5);
int tx = round((points3d_polar[(ih * w + iw) * 3 + 1] + 1)/2 * ow - 0.5);
int ty = round((points3d_polar[(ih * w + iw) * 3 + 2] + 1)/2 * oh - 0.5);
float tx_offset = ((points3d_polar[(ih * w + iw) * 3 + 1] + 1)/2 * ow - 0.5);
float ty_offset = ((points3d_polar[(ih * w + iw) * 3 + 2] + 1)/2 * oh - 0.5);
float tx00 = 0;
float ty00 = 0;
float tx01 = ((points3d_polar[(ih * w + iw + 1) * 3 + 1] + M_PI)/(2*M_PI) * w * s - 0.5) - tx_offset;
float ty01 = ((points3d_polar[(ih * w + iw + 1) * 3 + 2])/M_PI * h * s - 0.5) - ty_offset;
float tx01 = ((points3d_polar[(ih * w + iw + 1) * 3 + 1] + 1)/2 * ow - 0.5) - tx_offset;
float ty01 = ((points3d_polar[(ih * w + iw + 1) * 3 + 2] + 1)/2 * oh - 0.5) - ty_offset;
float tx10 = ((points3d_polar[((ih + 1) * w + iw) * 3 + 1] + M_PI)/(2*M_PI) * w * s - 0.5) - tx_offset;
float ty10 = ((points3d_polar[((ih + 1) * w + iw) * 3 + 2])/M_PI * h * s - 0.5) - ty_offset;
float tx10 = ((points3d_polar[((ih + 1) * w + iw) * 3 + 1] + 1)/2 * ow - 0.5) - tx_offset;
float ty10 = ((points3d_polar[((ih + 1) * w + iw) * 3 + 2] + 1)/2 * oh - 0.5) - ty_offset;
float tx11 = ((points3d_polar[((ih+1) * w + iw + 1) * 3 + 1] + M_PI)/(2*M_PI) * w * s - 0.5) - tx_offset;
float ty11 = ((points3d_polar[((ih+1) * w + iw + 1) * 3 + 2])/M_PI * h * s - 0.5) - ty_offset;
float tx11 = ((points3d_polar[((ih+1) * w + iw + 1) * 3 + 1] + 1)/2 * ow - 0.5) - tx_offset;
float ty11 = ((points3d_polar[((ih+1) * w + iw + 1) * 3 + 2] + 1)/2 * oh - 0.5) - ty_offset;
float t00 = 0 * (float)tx00 + (float)tx01 * -1.0/3 + (float)tx10 * 2.0/3 + (float)tx11 * 1.0/3;
float t01 = 0 * (float)ty00 + (float)ty01 * -1.0/3 + (float)ty10 * 2.0/3 + (float)ty11 * 1.0/3;
@ -311,10 +329,10 @@ __global__ void render_final(float *points3d_polar, float * selection, float * d
//printf("inverse %f %f %f %f\n", it00, it01, it10, it11);
int this_depth = (int)(12800/128 * points3d_polar[(ih * w + iw) * 3 + 0]);
int delta00 = (int)(12800/128 * points3d_polar[(ih * w + iw) * 3 + 0]) - (int)(100 * depth_render[(ty * w * s + tx)]);
int delta01 = (int)(12800/128 * points3d_polar[(ih * w + iw + 1) * 3 + 0]) - (int)(100 * depth_render[(ty * w * s + tx + 1)]);
int delta10 = (int)(12800/128 * points3d_polar[((ih + 1) * w + iw) * 3 + 0]) - (int)(100 * depth_render[((ty+1) * w * s + tx)]);
int delta11 = (int)(12800/128 * points3d_polar[((ih+1) * w + iw + 1) * 3 + 0]) - (int)(100 * depth_render[((ty+1) * w * s + tx + 1)]);
int delta00 = (int)(12800/128 * points3d_polar[(ih * w + iw) * 3 + 0]) - (int)(100 * depth_render[(ty * ow + tx)]);
int delta01 = (int)(12800/128 * points3d_polar[(ih * w + iw + 1) * 3 + 0]) - (int)(100 * depth_render[(ty * ow + tx + 1)]);
int delta10 = (int)(12800/128 * points3d_polar[((ih + 1) * w + iw) * 3 + 0]) - (int)(100 * depth_render[((ty+1) * ow + tx)]);
int delta11 = (int)(12800/128 * points3d_polar[((ih+1) * w + iw + 1) * 3 + 0]) - (int)(100 * depth_render[((ty+1) * ow + tx + 1)]);
int mindelta = min(min(delta00, delta01), min(delta10, delta11));
int maxdelta = max(max(delta00, delta01), max(delta10, delta11));
@ -329,9 +347,12 @@ __global__ void render_final(float *points3d_polar, float * selection, float * d
int r,g,b;
int itx, ity;
//render[(ty * ow + tx)] = img[ih * w + iw];
if ((y > h/8) && (y < (h*7)/8))
if ((mindelta > -10) && (maxdelta < 10) && (this_depth < 10000)) {
if ((txmax - txmin) * (tymax - tymin) < 100 * s * s)
if ((txmax - txmin) * (tymax - tymin) < 500)
{
for (itx = txmin; itx < txmax; itx ++)
for (ity = tymin; ity < tymax; ity ++)
@ -355,36 +376,40 @@ __global__ void render_final(float *points3d_polar, float * selection, float * d
if (r > 255) r = 255;
if (g > 255) g = 255;
if (b > 255) b = 255;
if ((ity > 0) && (ity < h * s) && (itx > 0) && (ity < w * s)) {
render[(ity * w * s + itx)] = r * 256 * 256 + g * 256 + b;
selection[(ity * w * s + itx)] = 1.0 / abs(det);
}
//printf("%f\n", selection[(ity * w * s + itx)]);
render[(ity * ow + itx)] = r * 256 * 256 + g * 256 + b;
selection[(ity * ow + itx)] = 1.0 / abs(det);
}
}
}
}
}
}
extern "C"{
void render(int n, int h,int w, int s, unsigned char * img, float * depth,float * pose, unsigned char * render, float * depth_render){
void render(int n, int h,int w, int oh, int ow, unsigned char * img, float * depth,float * pose, unsigned char * render, float * depth_render){
//int ih, iw, i, ic;
//printf("inside cuda code %d\n", depth);
printf("scale %d\n", s);
const int nx = w/s;
const int ny = h/s;
const size_t depth_mem_size = nx*ny*sizeof(float);
const size_t frame_mem_size = nx*ny*sizeof(unsigned char) * 3;
//printf("scale %d\n", s);
const int nx = w;
const int ny = h;
const size_t render_mem_size = nx * ny * s * s;
const int onx = ow;
const int ony = oh;
const size_t input_mem_size = nx*ny;
const size_t output_mem_size = onx * ony;
dim3 dimGrid(nx/TILE_DIM, ny/TILE_DIM, 1);
dim3 dimGrid2(nx * s/TILE_DIM, ny * s/TILE_DIM, 1);
dim3 dimGrid_out(onx/TILE_DIM, ony/TILE_DIM, 1);
dim3 dimBlock(TILE_DIM, BLOCK_ROWS, 1);
unsigned char *d_img, *d_render, *d_render_all;
@ -399,42 +424,46 @@ void render(int n, int h,int w, int s, unsigned char * img, float * depth,float
int *d_render2, *d_img2;
cudaMalloc((void **)&d_img, frame_mem_size);
cudaMalloc((void **)&d_render, render_mem_size * sizeof(unsigned char) * 3);
cudaMalloc((void **)&d_render_all, render_mem_size * sizeof(unsigned char) * 3 * n);
cudaMalloc((void **)&d_depth, depth_mem_size);
cudaMalloc((void **)&d_depth_render, render_mem_size * sizeof(float));
cudaMalloc((void **)&d_3dpoint, depth_mem_size * 4);
cudaMalloc((void **)&d_3dpoint_after, depth_mem_size * 4);
cudaMalloc((void **)&d_img, input_mem_size * sizeof(unsigned char) * 3);
cudaMalloc((void **)&d_img2, input_mem_size * sizeof(int));
cudaMalloc((void **)&d_render, output_mem_size * sizeof(unsigned char) * 3);
cudaMalloc((void **)&d_render_all, output_mem_size * sizeof(unsigned char) * 3 * n);
cudaMalloc((void **)&d_depth, input_mem_size * sizeof(float));
cudaMalloc((void **)&d_depth_render, output_mem_size * sizeof(float));
cudaMalloc((void **)&d_3dpoint, input_mem_size * sizeof(float) * 4);
cudaMalloc((void **)&d_3dpoint_after, input_mem_size * sizeof(float) * 4);
cudaMalloc((void **)&d_pose, sizeof(float) * 16);
cudaMalloc((void **)&d_render2, render_mem_size * sizeof(int));
cudaMalloc((void **)&d_img2, render_mem_size * sizeof(int));
cudaMalloc((void **)&d_selection, render_mem_size * sizeof(float) * n);
cudaMalloc((void **)&d_selection, output_mem_size * sizeof(float) * n);
cudaMalloc((void **)&d_render2, output_mem_size * sizeof(int));
cudaMalloc((void **)&nz, render_mem_size * sizeof(int));
cudaMalloc((void **)&average, render_mem_size * sizeof(int) * 3);
cudaMemcpy(d_depth_render, depth_render, render_mem_size * sizeof(float), cudaMemcpyHostToDevice);
cudaMemset(d_render_all, 0, render_mem_size * sizeof(unsigned char) * 3 * n);
cudaMemset(d_selection, 0, render_mem_size * sizeof(float) * n);
cudaMalloc((void **)&nz, output_mem_size * sizeof(int));
cudaMalloc((void **)&average, output_mem_size * sizeof(int) * 3);
cudaMemset(nz, 0, render_mem_size * sizeof(int));
cudaMemset(average, 0, render_mem_size * sizeof(int) * 3);
cudaMemset(nz, 0, output_mem_size * sizeof(int));
cudaMemset(average, 0, output_mem_size * sizeof(int) * 3);
cudaMemset(d_selection, 0, output_mem_size * sizeof(float) * n);
cudaMemcpy(d_depth_render, depth_render, output_mem_size * sizeof(float), cudaMemcpyHostToDevice);
cudaMemset(d_render_all, 0, output_mem_size * sizeof(unsigned char) * 3 * n);
int idx;
for (idx = 0; idx < n; idx ++) {
cudaMemcpy(d_pose, &(pose[idx * 16]), sizeof(float) * 16, cudaMemcpyHostToDevice);
cudaMemcpy(d_img, &(img[idx * nx * ny * 3]), frame_mem_size, cudaMemcpyHostToDevice);
cudaMemcpy(d_depth, &(depth[idx * nx * ny]), depth_mem_size, cudaMemcpyHostToDevice);
cudaMemcpy(d_img, &(img[idx * input_mem_size * 3]), input_mem_size * sizeof(unsigned char) * 3, cudaMemcpyHostToDevice);
cudaMemcpy(d_depth, &(depth[idx * input_mem_size]), input_mem_size * sizeof(float), cudaMemcpyHostToDevice);
cudaMemset(d_render, 0, render_mem_size * sizeof(unsigned char) * 3);
cudaMemset(d_render2, 0, render_mem_size * sizeof(int));
cudaMemset(d_render, 0, output_mem_size * sizeof(unsigned char) * 3);
cudaMemset(d_render2, 0, output_mem_size * sizeof(int));
cudaMemset(d_img2, 0, nx * ny * sizeof(int));
cudaMemset(d_3dpoint, 0, depth_mem_size * 4);
cudaMemset(d_3dpoint_after, 0, depth_mem_size * 4);
cudaMemset(d_img2, 0, input_mem_size * sizeof(int));
cudaMemset(d_3dpoint, 0, input_mem_size * sizeof(float) * 4);
cudaMemset(d_3dpoint_after, 0, input_mem_size * sizeof(float) * 4);
to3d_point<<< dimGrid, dimBlock >>>(d_depth, d_3dpoint);
transform<<< dimGrid, dimBlock >>>(d_3dpoint_after, d_3dpoint, d_pose);
@ -442,32 +471,31 @@ void render(int n, int h,int w, int s, unsigned char * img, float * depth,float
char_to_int <<< dimGrid, dimBlock >>> (d_img2, d_img);
render_final <<< dimGrid, dimBlock >>> (d_3dpoint_after, &(d_selection[idx * nx * ny * s * s]), d_depth_render, d_img2, d_render2, s);
render_final <<< dimGrid, dimBlock >>> (d_3dpoint_after, &(d_selection[idx * onx * ony]), d_depth_render, d_img2, d_render2, oh, ow);
//int_to_char <<< dimGrid2, dimBlock >>> (d_render2, d_render);
int_to_char <<< dimGrid2, dimBlock >>> (d_render2, &(d_render_all[idx * nx * ny * s * s * 3]));
//int_to_char <<< dimGrid_out, dimBlock >>> (d_render2, d_render);
int_to_char <<< dimGrid_out, dimBlock >>> (d_render2, &(d_render_all[idx * output_mem_size * 3]));
//fill <<< dimGrid2, dimBlock >>> (&(d_render_all[idx * nx * ny * s * s * 3]));
}
merge <<< dimGrid2, dimBlock >>> (d_render_all, d_render, d_selection, n, nx * ny * s * s);
merge <<< dimGrid_out, dimBlock >>> (d_render_all, d_render, d_selection, n, output_mem_size);
int fill_size[10] = {3, 5, 10, 20, 50, 75, 100, 200, 600, 1024};
int fill_size[10] = {3, 5, 10, 20, 50, 75, 100, 200, 400, 768};
//int fill_size[8] = {3, 5, 10, 20, 50, 100, 200};
for (int j = 0; j < 10; j++) {
cudaMemset(nz, 0, render_mem_size * sizeof(int));
cudaMemset(average, 0, render_mem_size * sizeof(int) * 3);
get_average <<< dimGrid2, dimBlock >>> (d_render, nz, average, fill_size[j]);
fill_with_average <<< dimGrid2, dimBlock >>> (d_render, nz, average, fill_size[j]);
cudaMemset(nz, 0, output_mem_size * sizeof(int));
cudaMemset(average, 0, output_mem_size * sizeof(int) * 3);
get_average <<< dimGrid_out, dimBlock >>> (d_render, nz, average, fill_size[j]);
fill_with_average <<< dimGrid_out, dimBlock >>> (d_render, nz, average, fill_size[j]);
}
/*
cudaMemset(nz, 0, render_mem_size * sizeof(int));
cudaMemset(average, 0, render_mem_size * sizeof(int) * 3);
get_average <<< dimGrid2, dimBlock >>> (d_render, nz, average, 3);
fill_with_average <<< dimGrid2, dimBlock >>> (d_render, nz, average, 3);
cudaMemset(nz, 0, output_mem_size * sizeof(int));
cudaMemset(average, 0, output_mem_size * sizeof(int) * 3);
get_average <<< dimGrid_out, dimBlock >>> (d_render, nz, average, 3);
fill_with_average <<< dimGrid_out, dimBlock >>> (d_render, nz, average, 3);
*/
cudaMemcpy(render, d_render, render_mem_size * sizeof(unsigned char) * 3 , cudaMemcpyDeviceToHost);
cudaMemcpy(render, d_render, output_mem_size * sizeof(unsigned char) * 3 , cudaMemcpyDeviceToHost);
cudaFree(d_img);
cudaFree(d_depth);

View File

@ -20,6 +20,8 @@ from multiprocessing.dummy import Process
from realenv.data.datasets import ViewDataSet3D
from realenv.core.render.completion import CompletionNet
from realenv.learn.completion2 import CompletionNet2
import torch.nn as nn
file_dir = os.path.dirname(os.path.abspath(__file__))
@ -82,6 +84,20 @@ class PCRenderer:
self.scale_up = scale_up
self.show = np.zeros((768, 768, 3),dtype='uint8')
self.show_rgb = np.zeros((768, 768 ,3),dtype='uint8')
comp = CompletionNet2(norm = nn.BatchNorm2d, nf = 24)
comp = torch.nn.DataParallel(comp).cuda()
comp.load_state_dict(torch.load(os.path.join(file_dir, "model.pth")))
self.model = comp.module
self.model.eval()
self.imgv = Variable(torch.zeros(1, 3 , 768, 768), volatile = True).cuda()
self.maskv = Variable(torch.zeros(1,2, 768, 768), volatile = True).cuda()
def _onmouse(self, *args):
if args[0] == cv2.EVENT_LBUTTONDOWN:
self.org_pitch, self.org_yaw, self.org_x, self.org_y, self.org_z =\
@ -200,45 +216,62 @@ class PCRenderer:
h = wo/3
w = 2*h
n = ho/3
opengl_arr = np.frombuffer(message, dtype=np.float32).reshape((h, w))
pano = False
if pano:
opengl_arr = np.frombuffer(message, dtype=np.float32).reshape((h, w))
else:
opengl_arr = np.frombuffer(message, dtype=np.float32).reshape((n, n))
def _render_depth(opengl_arr):
#with Profiler("Render Depth"):
cv2.imshow('target depth', opengl_arr/16.)
def _render_pc(opengl_arr):
#with Profiler("Render pointcloud"):
poses_after = [
pose.dot(np.linalg.inv(poses[i])).astype(np.float32)
for i in range(len(imgs))]
with Profiler("CUDA PC rendering"):
with Profiler("Render pointcloud cuda"):
poses_after = [
pose.dot(np.linalg.inv(poses[i])).astype(np.float32)
for i in range(len(imgs))]
#opengl_arr = np.zeros((h,w), dtype = np.float32)
cuda_pc.render(ct.c_int(len(imgs)),
ct.c_int(imgs[0].shape[0]),
ct.c_int(imgs[0].shape[1]),
ct.c_int(self.scale_up),
imgs.ctypes.data_as(ct.c_void_p),
depths.ctypes.data_as(ct.c_void_p),
np.asarray(poses_after, dtype = np.float32).ctypes.data_as(ct.c_void_p),
show.ctypes.data_as(ct.c_void_p),
opengl_arr.ctypes.data_as(ct.c_void_p)
)
ct.c_int(imgs[0].shape[0]),
ct.c_int(imgs[0].shape[1]),
ct.c_int(768),
ct.c_int(768),
imgs.ctypes.data_as(ct.c_void_p),
depths.ctypes.data_as(ct.c_void_p),
np.asarray(poses_after, dtype = np.float32).ctypes.data_as(ct.c_void_p),
show.ctypes.data_as(ct.c_void_p),
opengl_arr.ctypes.data_as(ct.c_void_p)
)
threads = [
Process(target=_render_pc, args=(opengl_arr,)),
Process(target=_render_depth, args=(opengl_arr,))]
[t.start() for t in threads]
[t.join() for t in threads]
if model:
if self.model:
tf = transforms.ToTensor()
#from IPython import embed; embed()
before = time.time()
source = tf(show)
source_depth = tf(np.expand_dims(target_depth, 2).astype(np.float32)/65536 * 255)
imgv.data.copy_(source)
maskv.data.copy_(source_depth)
mask = (torch.sum(source[:3,:,:],0)>0).float().unsqueeze(0)
source_depth = tf(np.expand_dims(opengl_arr, 2).astype(np.float32)/128.0 * 255)
print(mask.size(), source_depth.size())
mask = torch.cat([source_depth, mask], 0)
self.imgv.data.copy_(source)
self.maskv.data.copy_(mask)
print('Transfer time', time.time() - before)
before = time.time()
recon = model(imgv, maskv)
recon = model(self.imgv, self.maskv)
print('NNtime:', time.time() - before)
before = time.time()
show2 = recon.data.cpu().numpy()[0].transpose(1,2,0)
@ -311,7 +344,7 @@ class PCRenderer:
t1 = time.time()
t = t1-t0
self.fps = 1/t
cv2.putText(self.show_rgb,'pitch %.3f yaw %.2f roll %.3f x %.2f y %.2f z %.2f'%(self.pitch, self.yaw, self.roll, self.x, self.y, self.z),(15,self.showsz-15),0,0.5,(255,255,255))
cv2.putText(self.show_rgb,'pitch %.3f yaw %.2f roll %.3f x %.2f y %.2f z %.2f'%(self.pitch, self.yaw, self.roll, self.x, self.y, self.z),(15,768-15),0,0.5,(255,255,255))
cv2.putText(self.show_rgb,'fps %.1f'%(self.fps),(15,15),0,0.5,(255,255,255))
cv2.imshow('show3d',self.show_rgb)

View File

@ -52,6 +52,7 @@ def get_model_initial_pose(robot):
if MODEL_ID == "11HB6XZSh1Q":
return [0, 0, 3 * 3.14/2], [-3.38, -7, 1.4] ## living room open area
#return [0, 0, 3 * 3.14/2], [-5, -5, 1.9] ## living room kitchen table
if MODEL_ID == "BbxejD15Etk":
return [0, 0, 3 * 3.14/2], [-6.76, -12, 1.4] ## Gates Huang
else: