diff --git a/build.sh b/build.sh index 030d9b0ee..4d74e4119 100755 --- a/build.sh +++ b/build.sh @@ -160,6 +160,13 @@ download_data () { wget --quiet https://www.dropbox.com/s/msd32wg144eew5r/coord.npy cd - fi + + if [ ! -f ./realenv/core/render/model.pth ]; then + cd ./realenv/core/render/ + wget --quiet https://www.dropbox.com/s/e7far9okgv7oq8p/model.pth + cd - + fi + if [ -f realenv/data/*.pkl ]; then rm realenv/data/*.pkl @@ -209,4 +216,4 @@ case "$subcommand" in default "$@" exit 1 ;; -esac \ No newline at end of file +esac diff --git a/examples/agents/random_humanoid.py b/examples/agents/random_humanoid.py index 85ccc6afd..d9ceeec7a 100644 --- a/examples/agents/random_humanoid.py +++ b/examples/agents/random_humanoid.py @@ -62,7 +62,7 @@ if __name__ == '__main__': if not done: continue if restart_delay==0: print("score=%0.2f in %i frames" % (score, frame)) - restart_delay = 20 # 2 sec at 60 fps + restart_delay = 40 # 2 sec at 60 fps else: restart_delay -= 1 if restart_delay==0: break @@ -70,4 +70,4 @@ if __name__ == '__main__': except KeyboardInterrupt: env._end() - print("Program finished") \ No newline at end of file + print("Program finished") diff --git a/realenv/core/channels/depth_render/render.cpp b/realenv/core/channels/depth_render/render.cpp index d97d095cb..1e25c4a55 100755 --- a/realenv/core/channels/depth_render/render.cpp +++ b/realenv/core/channels/depth_render/render.cpp @@ -542,7 +542,6 @@ int main( int argc, char * argv[] ) int i = 0; - zmq::context_t context (1); zmq::socket_t socket (context, ZMQ_REP); socket.bind ("tcp://127.0.0.1:5555"); @@ -628,207 +627,329 @@ int main( int argc, char * argv[] ) // create buffer, 3 channels per Pixel float* dataBuffer = (float*)malloc(nByte); + // First let's create our buffer, 3 channels per Pixel + //float* dataBuffer = (float*)malloc(nByte); //char* dataBuffer = (char*)malloc(nSize*sizeof(char)); - float * dataBuffer_c = (float * ) malloc(windowWidth*windowHeight * sizeof(float)); - if (!dataBuffer) return false; - if (!dataBuffer_c) return false; + //float * dataBuffer_c = (float * ) malloc(windowWidth*windowHeight * sizeof(float)); + //if (!dataBuffer) return false; + //if (!dataBuffer_c) return false; - - for (int k = 0; k < 6; k ++ ) + bool pano = False; + + if (pano) { + + + for (int k = 0; k < 6; k ++ ) + { + // Render to our framebuffer + + // Clear the screen + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + // Use our shader + glUseProgram(programID); + + // Compute the MVP matrix from keyboard and mouse input + //computeMatricesFromInputs(); + //computeMatricesFromFile(name_loc); + float fov = glm::radians(90.0f); + glm::mat4 ProjectionMatrix = glm::perspective(fov, 1.0f, 0.1f, 5000.0f); // near & far are not verified, but accuracy seems to work well + glm::mat4 ViewMatrix = getView(viewMat, k); + //glm::mat4 ViewMatrix = getViewMatrix(); + glm::mat4 viewMatPose = glm::inverse(ViewMatrix); + // printf("View (pose) matrix for skybox %d\n", k); + // for (int i = 0; i < 4; ++i) { + // printf("\t %f %f %f %f\n", viewMatPose[0][i], viewMatPose[1][i], viewMatPose[2][i], viewMatPose[3][i]); + // //printf("\t %f %f %f %f\n", ViewMatrix[0][i], ViewMatrix[1][i], ViewMatrix[2][i], ViewMatrix[3][i]); + // } + + glm::mat4 ModelMatrix = glm::mat4(1.0); + + pose_idx ++; + + //glm::mat4 tempMat = getViewMatrix(); + //debug_mat(tempMat, "csv"); + + // glm::mat4 revertZ = glm::mat4(); + // revertZ[2][2] = -1; + // glm::quat rotateZ_N90 = glm::quat(glm::vec3(0.0f, 0.0f, glm::radians(-90.0f))); + // glm::quat rotateX_90 = glm::quat(glm::vec3(glm::radians(-90.0f), 0.0f, 0.0f)); + + //glm::mat4 MVP = ProjectionMatrix * ViewMatrix * revertZ * ModelMatrix; + glm::mat4 MVP = ProjectionMatrix * ViewMatrix * ModelMatrix; + + // Send our transformation to the currently bound shader, + // in the "MVP" uniform + glUniformMatrix4fv(MatrixID, 1, GL_FALSE, &MVP[0][0]); + glUniformMatrix4fv(ModelMatrixID, 1, GL_FALSE, &ModelMatrix[0][0]); + glUniformMatrix4fv(ViewMatrixID, 1, GL_FALSE, &ViewMatrix[0][0]); + + glm::vec3 lightPos = glm::vec3(4,4,4); + glUniform3f(LightID, lightPos.x, lightPos.y, lightPos.z); + + // Bind our texture in Texture Unit 0 + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, Texture); + // Set our "myTextureSampler" sampler to use Texture Unit 0 + glUniform1i(TextureID, 0); + + // 1rst attribute buffer : vertices + glEnableVertexAttribArray(0); + glBindBuffer(GL_ARRAY_BUFFER, vertexbuffer); + glVertexAttribPointer( + 0, // attribute + 3, // size + GL_FLOAT, // type + GL_FALSE, // normalized? + 0, // stride + (void*)0 // array buffer offset + ); + + // 2nd attribute buffer : UVs + glEnableVertexAttribArray(1); + glBindBuffer(GL_ARRAY_BUFFER, uvbuffer); + glVertexAttribPointer( + 1, // attribute + 2, // size + GL_FLOAT, // type + GL_FALSE, // normalized? + 0, // stride + (void*)0 // array buffer offset + ); + + // 3rd attribute buffer : normals + glEnableVertexAttribArray(2); + glBindBuffer(GL_ARRAY_BUFFER, normalbuffer); + glVertexAttribPointer( + 2, // attribute + 3, // size + GL_FLOAT, // type + GL_FALSE, // normalized? + 0, // stride + (void*)0 // array buffer offset + ); + + // Index buffer + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, elementbuffer); + + // Draw the triangles ! + glDrawElements( + GL_TRIANGLES, // mode + indices.size(), // count + GL_UNSIGNED_INT, // type + (void*)0 // element array buffer offset + ); + + glDisableVertexAttribArray(0); + glDisableVertexAttribArray(1); + glDisableVertexAttribArray(2); + + /* + // Render to the screen + glBindFramebuffer(GL_FRAMEBUFFER, 0); + // Render on the whole framebuffer, complete from the lower left corner to the upper right + glViewport(0,0,windowWidth,windowHeight); + + // Clear the screen + glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + // Use our shader + glUseProgram(quad_programID); + + // Bind our texture in Texture Unit 0 + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, renderedTexture); + //glBindTexture(GL_TEXTURE_2D, depthTexture); + // Set our "renderedTexture" sampler to use Texture Unit 0 + glUniform1i(texID, 0); + + glUniform1f(timeID, (float)(glfwGetTime()*10.0f) ); + + // 1rst attribute buffer : vertices + glEnableVertexAttribArray(0); + glBindBuffer(GL_ARRAY_BUFFER, quad_vertexbuffer); + glVertexAttribPointer( + 0, // attribute 0. No particular reason for 0, but must match the layout in the shader. + 3, // size + GL_FLOAT, // type + GL_FALSE, // normalized? + 0, // stride + (void*)0 // array buffer offset + ); + + // Draw the triangles ! + glDrawArrays(GL_TRIANGLES, 0, 6); // 2*3 indices starting at 0 -> 2 triangles + + glDisableVertexAttribArray(0); + */ + + /* + if (false) { + char buffer[100]; + //printf("before: %s\n", buffer); + sprintf(buffer, "/home/jerry/Pictures/%s_mist.png", filename); + //printf("after: %s\n", buffer); + //printf("file name is %s\n", filename); + //printf("saving screenshot to %s\n", buffer); + save_screenshot(buffer, windowWidth, windowHeight, renderedTexture); + } + */ + + // Swap buffers + //glfwSwapBuffers(window); + //glfwPollEvents(); + + + // Let's fetch them from the backbuffer + // We request the pixels in GL_BGR format, thanks to Berzeger for the tip + + //glReadPixels((GLint)0, (GLint)0, + // (GLint)windowWidth, (GLint)windowHeight, + // GL_BGR, GL_UNSIGNED_SHORT, dataBuffer); + //glReadPixels((GLint)0, (GLint)0, + // (GLint)windowWidth, (GLint)windowHeight, + // GL_BGR, GL_FLOAT, dataBuffer); + + //glGetTextureImage(renderedTexture, 0, GL_RGB, GL_UNSIGNED_SHORT, nSize*sizeof(unsigned short), dataBuffer); + // float* loc = dataBuffer + windowWidth*windowHeight * k; + // glGetTextureImage(renderedTexture, 0, GL_BLUE, GL_FLOAT, + // (nSize/3)*sizeof(float), loc); + + // Map the OpenGL texture buffer to CUDA memory space + checkCudaErrors(cudaGraphicsMapResources(1, &resource)); + cudaArray_t writeArray; + checkCudaErrors(cudaGraphicsSubResourceGetMappedArray(&writeArray, resource, 0, 0)); + + // Copy the blue channel of the texture to the appropriate part of the cubemap that CUDA will use + fillBlue(cubeMapGpuBuffer, writeArray, windowWidth * windowHeight * k, windowWidth, windowHeight); + + // Unmap the OpenGL texture so that it can be rewritten + checkCudaErrors(cudaGraphicsUnmapResources(1, &resource)); + + + } + checkCudaErrors(cudaStreamSynchronize(0)); + zmq::message_t reply (panoWidth*panoHeight*sizeof(float)); + projectCubeMapToEquirectangular((float*)reply.data(), cubeMapGpuBuffer, d_cubeMapCoordToPanoCoord, cubeMapCoordToPanoCoord.size(), (size_t) nSize/3); + + std::cout << "Render time: " << t.elapsed() << std::endl; + socket.send (reply); + + //free(dataBuffer); + //free(dataBuffer_c); + } + else { + //Pinhole mode + // Render to our framebuffer - // Clear the screen - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + // Clear the screen + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - // Use our shader - glUseProgram(programID); + // Use our shader + glUseProgram(programID); - // Compute the MVP matrix from keyboard and mouse input - // computeMatricesFromInputs(); - // computeMatricesFromFile(name_loc); - float fov = glm::radians(90.0f); - glm::mat4 ProjectionMatrix = glm::perspective(fov, 1.0f, 0.1f, 5000.0f); - // TODO: (hzyjerry) near & far are not verified, but accuracy seems to work well - glm::mat4 ViewMatrix = getView(viewMat, k); - //glm::mat4 ViewMatrix = getViewMatrix(); - glm::mat4 viewMatPose = glm::inverse(ViewMatrix); + // Compute the MVP matrix from keyboard and mouse input + //computeMatricesFromInputs(); + //computeMatricesFromFile(name_loc); + float fov = glm::radians(90.0f); + glm::mat4 ProjectionMatrix = glm::perspective(fov, 1.0f, 0.1f, 5000.0f); // near & far are not verified, but accuracy seems to work well + glm::mat4 ViewMatrix = getView(viewMat, 2); + glm::mat4 viewMatPose = glm::inverse(ViewMatrix); + + glm::mat4 ModelMatrix = glm::mat4(1.0); + glm::mat4 MVP = ProjectionMatrix * ViewMatrix * ModelMatrix; - glm::mat4 ModelMatrix = glm::mat4(1.0); + // Send our transformation to the currently bound shader, + // in the "MVP" uniform + glUniformMatrix4fv(MatrixID, 1, GL_FALSE, &MVP[0][0]); + glUniformMatrix4fv(ModelMatrixID, 1, GL_FALSE, &ModelMatrix[0][0]); + glUniformMatrix4fv(ViewMatrixID, 1, GL_FALSE, &ViewMatrix[0][0]); - pose_idx ++; + glm::vec3 lightPos = glm::vec3(4,4,4); + glUniform3f(LightID, lightPos.x, lightPos.y, lightPos.z); - //glm::mat4 tempMat = getViewMatrix(); - //debug_mat(tempMat, "csv"); + // Bind our texture in Texture Unit 0 + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, Texture); + // Set our "myTextureSampler" sampler to use Texture Unit 0 + glUniform1i(TextureID, 0); - // glm::mat4 revertZ = glm::mat4(); - // revertZ[2][2] = -1; - // glm::quat rotateZ_N90 = glm::quat(glm::vec3(0.0f, 0.0f, glm::radians(-90.0f))); - // glm::quat rotateX_90 = glm::quat(glm::vec3(glm::radians(-90.0f), 0.0f, 0.0f)); + // 1rst attribute buffer : vertices + glEnableVertexAttribArray(0); + glBindBuffer(GL_ARRAY_BUFFER, vertexbuffer); + glVertexAttribPointer( + 0, // attribute + 3, // size + GL_FLOAT, // type + GL_FALSE, // normalized? + 0, // stride + (void*)0 // array buffer offset + ); - //glm::mat4 MVP = ProjectionMatrix * ViewMatrix * revertZ * ModelMatrix; - glm::mat4 MVP = ProjectionMatrix * ViewMatrix * ModelMatrix; + // 2nd attribute buffer : UVs + glEnableVertexAttribArray(1); + glBindBuffer(GL_ARRAY_BUFFER, uvbuffer); + glVertexAttribPointer( + 1, // attribute + 2, // size + GL_FLOAT, // type + GL_FALSE, // normalized? + 0, // stride + (void*)0 // array buffer offset + ); - // Send our transformation to the currently bound shader, - // in the "MVP" uniform - glUniformMatrix4fv(MatrixID, 1, GL_FALSE, &MVP[0][0]); - glUniformMatrix4fv(ModelMatrixID, 1, GL_FALSE, &ModelMatrix[0][0]); - glUniformMatrix4fv(ViewMatrixID, 1, GL_FALSE, &ViewMatrix[0][0]); + // 3rd attribute buffer : normals + glEnableVertexAttribArray(2); + glBindBuffer(GL_ARRAY_BUFFER, normalbuffer); + glVertexAttribPointer( + 2, // attribute + 3, // size + GL_FLOAT, // type + GL_FALSE, // normalized? + 0, // stride + (void*)0 // array buffer offset + ); - glm::vec3 lightPos = glm::vec3(4,4,4); - glUniform3f(LightID, lightPos.x, lightPos.y, lightPos.z); + // Index buffer + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, elementbuffer); - // Bind our texture in Texture Unit 0 - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, Texture); - // Set our "myTextureSampler" sampler to use Texture Unit 0 - glUniform1i(TextureID, 0); + // Draw the triangles ! + glDrawElements( + GL_TRIANGLES, // mode + indices.size(), // count + GL_UNSIGNED_INT, // type + (void*)0 // element array buffer offset + ); - // 1rst attribute buffer : vertices - glEnableVertexAttribArray(0); - glBindBuffer(GL_ARRAY_BUFFER, vertexbuffer); - glVertexAttribPointer( - 0, // attribute - 3, // size - GL_FLOAT, // type - GL_FALSE, // normalized? - 0, // stride - (void*)0 // array buffer offset - ); + glDisableVertexAttribArray(0); + glDisableVertexAttribArray(1); + glDisableVertexAttribArray(2); + + zmq::message_t reply (windowWidth*windowHeight*sizeof(float)); + float * reply_data_handle = (float*)reply.data(); + glGetTextureImage(renderedTexture, 0, GL_BLUE, GL_FLOAT, windowWidth * windowHeight *sizeof(float), reply_data_handle); + + std::cout << "Render time: " << t.elapsed() << std::endl; + + + float tmp; + + for (int i = 0; i < windowHeight/2; i++) + for (int j = 0; j < windowWidth; j++) { + tmp = reply_data_handle[i * windowWidth + j]; + reply_data_handle[i * windowWidth + j] = reply_data_handle[(windowHeight - 1 -i) * windowWidth + j]; + reply_data_handle[(windowHeight - 1 -i) * windowWidth + j] = tmp; + } + socket.send (reply); - // 2nd attribute buffer : UVs - glEnableVertexAttribArray(1); - glBindBuffer(GL_ARRAY_BUFFER, uvbuffer); - glVertexAttribPointer( - 1, // attribute - 2, // size - GL_FLOAT, // type - GL_FALSE, // normalized? - 0, // stride - (void*)0 // array buffer offset - ); - - // 3rd attribute buffer : normals - glEnableVertexAttribArray(2); - glBindBuffer(GL_ARRAY_BUFFER, normalbuffer); - glVertexAttribPointer( - 2, // attribute - 3, // size - GL_FLOAT, // type - GL_FALSE, // normalized? - 0, // stride - (void*)0 // array buffer offset - ); - - // Index buffer - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, elementbuffer); - - // Draw the triangles ! - glDrawElements( - GL_TRIANGLES, // mode - indices.size(), // count - GL_UNSIGNED_INT, // type - (void*)0 // element array buffer offset - ); - - glDisableVertexAttribArray(0); - glDisableVertexAttribArray(1); - glDisableVertexAttribArray(2); - - /* - // Render to the screen - glBindFramebuffer(GL_FRAMEBUFFER, 0); - // Render on the whole framebuffer, complete from the lower left corner to the upper right - glViewport(0,0,windowWidth,windowHeight); - - // Clear the screen - glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - - // Use our shader - glUseProgram(quad_programID); - - // Bind our texture in Texture Unit 0 - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, renderedTexture); - //glBindTexture(GL_TEXTURE_2D, depthTexture); - // Set our "renderedTexture" sampler to use Texture Unit 0 - glUniform1i(texID, 0); - - glUniform1f(timeID, (float)(glfwGetTime()*10.0f) ); - - // 1rst attribute buffer : vertices - glEnableVertexAttribArray(0); - glBindBuffer(GL_ARRAY_BUFFER, quad_vertexbuffer); - glVertexAttribPointer( - 0, // attribute 0. No particular reason for 0, but must match the layout in the shader. - 3, // size - GL_FLOAT, // type - GL_FALSE, // normalized? - 0, // stride - (void*)0 // array buffer offset - ); - - // Draw the triangles ! - glDrawArrays(GL_TRIANGLES, 0, 6); // 2*3 indices starting at 0 -> 2 triangles - - glDisableVertexAttribArray(0); - */ - - /* - if (false) { - char buffer[100]; - //printf("before: %s\n", buffer); - sprintf(buffer, "/home/jerry/Pictures/%s_mist.png", filename); - //printf("after: %s\n", buffer); - //printf("file name is %s\n", filename); - //printf("saving screenshot to %s\n", buffer); - save_screenshot(buffer, windowWidth, windowHeight, renderedTexture); - } - */ - - // Swap buffers - //glfwSwapBuffers(window); - //glfwPollEvents(); - - - // Let's fetch them from the backbuffer - // We request the pixels in GL_BGR format, thanks to Berzeger for the tip - - //glReadPixels((GLint)0, (GLint)0, - // (GLint)windowWidth, (GLint)windowHeight, - // GL_BGR, GL_UNSIGNED_SHORT, dataBuffer); - //glReadPixels((GLint)0, (GLint)0, - // (GLint)windowWidth, (GLint)windowHeight, - // GL_BGR, GL_FLOAT, dataBuffer); - - //glGetTextureImage(renderedTexture, 0, GL_RGB, GL_UNSIGNED_SHORT, nSize*sizeof(unsigned short), dataBuffer); - // float* loc = dataBuffer + windowWidth*windowHeight * k; - // glGetTextureImage(renderedTexture, 0, GL_BLUE, GL_FLOAT, - // (nSize/3)*sizeof(float), loc); - - // Map the OpenGL texture buffer to CUDA memory space - checkCudaErrors(cudaGraphicsMapResources(1, &resource)); - cudaArray_t writeArray; - checkCudaErrors(cudaGraphicsSubResourceGetMappedArray(&writeArray, resource, 0, 0)); - - // Copy the blue channel of the texture to the appropriate part of the cubemap that CUDA will use - fillBlue(cubeMapGpuBuffer, writeArray, windowWidth * windowHeight * k, windowWidth, windowHeight); - - // Unmap the OpenGL texture so that it can be rewritten - checkCudaErrors(cudaGraphicsUnmapResources(1, &resource)); - - } - checkCudaErrors(cudaStreamSynchronize(0)); - zmq::message_t reply (panoWidth*panoHeight*sizeof(float)); - projectCubeMapToEquirectangular((float*)reply.data(), cubeMapGpuBuffer, d_cubeMapCoordToPanoCoord, cubeMapCoordToPanoCoord.size(), (size_t) nSize/3); - - std::cout << "Render time: " << t.elapsed() << std::endl; - socket.send (reply); - - free(dataBuffer); - free(dataBuffer_c); + //free(dataBuffer); + //free(dataBuffer_c); + + } diff --git a/realenv/core/render/render_cuda_filter.cu b/realenv/core/render/render_cuda_filter.cu index c0464bc75..176a7b16b 100644 --- a/realenv/core/render/render_cuda_filter.cu +++ b/realenv/core/render/render_cuda_filter.cu @@ -26,6 +26,7 @@ const int BLOCK_ROWS = 8; #endif +const bool pano = false; __global__ void copy_mem(unsigned char *source, unsigned char *render) { @@ -114,6 +115,7 @@ __global__ void merge(unsigned char * img_all, unsigned char * img, float * sele for (idx = 0; idx < n; idx ++) { weight = selection[idx * stride + ((y+j)*width + x)] / (sum + 1e-4); + //weight = 0.25; img[3*((y+j)*width + x)] += (unsigned char) (img_all[idx * stride * 3 + 3*((y+j)*width + x)] * weight); img[3*((y+j)*width + x)+1] += (unsigned char) (img_all[idx * stride * 3 + 3*((y+j)*width + x) + 1] * weight); @@ -184,9 +186,17 @@ __global__ void transform2d(float *points3d_after) float y = points3d_after[(ih * w + iw) * 3 + 1]; float z = points3d_after[(ih * w + iw) * 3 + 2]; - points3d_after[(ih * w + iw) * 3 + 0] = sqrt(x * x + y * y + z * z); - points3d_after[(ih * w + iw) * 3 + 1] = atan2(y, x); - points3d_after[(ih * w + iw) * 3 + 2] = atan2(sqrt(x * x + y * y), z); + points3d_after[(ih * w + iw) * 3 + 0] = sqrt(x * x + y * y + z * z); + //points3d_after[(ih * w + iw) * 3 + 1] = atan2(y, x); + //points3d_after[(ih * w + iw) * 3 + 2] = atan2(sqrt(x * x + y * y), z); + if ((x > 0) && (y < x) && (y > -x) && (z < x) && (z > -x)) { + points3d_after[(ih * w + iw) * 3 + 1] = y / (x + 1e-5); + points3d_after[(ih * w + iw) * 3 + 2] = -z / (x + 1e-5); + } + else { + points3d_after[(ih * w + iw) * 3 + 1] = 0; + points3d_after[(ih * w + iw) * 3 + 2] = 0; + } } } @@ -263,7 +273,8 @@ __global__ void fill_with_average(unsigned char *img, int * nz, int * average, i -__global__ void render_final(float *points3d_polar, float * selection, float * depth_render, int * img, int * render, int s) + +__global__ void render_final(float *points3d_polar, float * selection, float * depth_render, int * img, int * render, int oh, int ow) { int x = blockIdx.x * TILE_DIM + threadIdx.x; int y = blockIdx.y * TILE_DIM + threadIdx.y; @@ -272,25 +283,32 @@ __global__ void render_final(float *points3d_polar, float * selection, float * d for (int j = 0; j < TILE_DIM; j+= BLOCK_ROWS) { + + + + int iw = x; int ih = y + j; - int tx = round((points3d_polar[(ih * w + iw) * 3 + 1] + M_PI)/(2*M_PI) * w * s - 0.5); - int ty = round((points3d_polar[(ih * w + iw) * 3 + 2])/M_PI * h * s - 0.5); + //int tx = round((points3d_polar[(ih * w + iw) * 3 + 1] + 1)/2 * ow - 0.5); + //int ty = round((points3d_polar[(ih * w + iw) * 3 + 2] + 1)/2 * oh - 0.5); - float tx_offset = ((points3d_polar[(ih * w + iw) * 3 + 1] + M_PI)/(2*M_PI) * w * s - 0.5); - float ty_offset = ((points3d_polar[(ih * w + iw) * 3 + 2])/M_PI * h * s - 0.5); + int tx = round((points3d_polar[(ih * w + iw) * 3 + 1] + 1)/2 * ow - 0.5); + int ty = round((points3d_polar[(ih * w + iw) * 3 + 2] + 1)/2 * oh - 0.5); + + float tx_offset = ((points3d_polar[(ih * w + iw) * 3 + 1] + 1)/2 * ow - 0.5); + float ty_offset = ((points3d_polar[(ih * w + iw) * 3 + 2] + 1)/2 * oh - 0.5); float tx00 = 0; float ty00 = 0; - float tx01 = ((points3d_polar[(ih * w + iw + 1) * 3 + 1] + M_PI)/(2*M_PI) * w * s - 0.5) - tx_offset; - float ty01 = ((points3d_polar[(ih * w + iw + 1) * 3 + 2])/M_PI * h * s - 0.5) - ty_offset; + float tx01 = ((points3d_polar[(ih * w + iw + 1) * 3 + 1] + 1)/2 * ow - 0.5) - tx_offset; + float ty01 = ((points3d_polar[(ih * w + iw + 1) * 3 + 2] + 1)/2 * oh - 0.5) - ty_offset; - float tx10 = ((points3d_polar[((ih + 1) * w + iw) * 3 + 1] + M_PI)/(2*M_PI) * w * s - 0.5) - tx_offset; - float ty10 = ((points3d_polar[((ih + 1) * w + iw) * 3 + 2])/M_PI * h * s - 0.5) - ty_offset; + float tx10 = ((points3d_polar[((ih + 1) * w + iw) * 3 + 1] + 1)/2 * ow - 0.5) - tx_offset; + float ty10 = ((points3d_polar[((ih + 1) * w + iw) * 3 + 2] + 1)/2 * oh - 0.5) - ty_offset; - float tx11 = ((points3d_polar[((ih+1) * w + iw + 1) * 3 + 1] + M_PI)/(2*M_PI) * w * s - 0.5) - tx_offset; - float ty11 = ((points3d_polar[((ih+1) * w + iw + 1) * 3 + 2])/M_PI * h * s - 0.5) - ty_offset; + float tx11 = ((points3d_polar[((ih+1) * w + iw + 1) * 3 + 1] + 1)/2 * ow - 0.5) - tx_offset; + float ty11 = ((points3d_polar[((ih+1) * w + iw + 1) * 3 + 2] + 1)/2 * oh - 0.5) - ty_offset; float t00 = 0 * (float)tx00 + (float)tx01 * -1.0/3 + (float)tx10 * 2.0/3 + (float)tx11 * 1.0/3; float t01 = 0 * (float)ty00 + (float)ty01 * -1.0/3 + (float)ty10 * 2.0/3 + (float)ty11 * 1.0/3; @@ -311,10 +329,10 @@ __global__ void render_final(float *points3d_polar, float * selection, float * d //printf("inverse %f %f %f %f\n", it00, it01, it10, it11); int this_depth = (int)(12800/128 * points3d_polar[(ih * w + iw) * 3 + 0]); - int delta00 = (int)(12800/128 * points3d_polar[(ih * w + iw) * 3 + 0]) - (int)(100 * depth_render[(ty * w * s + tx)]); - int delta01 = (int)(12800/128 * points3d_polar[(ih * w + iw + 1) * 3 + 0]) - (int)(100 * depth_render[(ty * w * s + tx + 1)]); - int delta10 = (int)(12800/128 * points3d_polar[((ih + 1) * w + iw) * 3 + 0]) - (int)(100 * depth_render[((ty+1) * w * s + tx)]); - int delta11 = (int)(12800/128 * points3d_polar[((ih+1) * w + iw + 1) * 3 + 0]) - (int)(100 * depth_render[((ty+1) * w * s + tx + 1)]); + int delta00 = (int)(12800/128 * points3d_polar[(ih * w + iw) * 3 + 0]) - (int)(100 * depth_render[(ty * ow + tx)]); + int delta01 = (int)(12800/128 * points3d_polar[(ih * w + iw + 1) * 3 + 0]) - (int)(100 * depth_render[(ty * ow + tx + 1)]); + int delta10 = (int)(12800/128 * points3d_polar[((ih + 1) * w + iw) * 3 + 0]) - (int)(100 * depth_render[((ty+1) * ow + tx)]); + int delta11 = (int)(12800/128 * points3d_polar[((ih+1) * w + iw + 1) * 3 + 0]) - (int)(100 * depth_render[((ty+1) * ow + tx + 1)]); int mindelta = min(min(delta00, delta01), min(delta10, delta11)); int maxdelta = max(max(delta00, delta01), max(delta10, delta11)); @@ -329,9 +347,12 @@ __global__ void render_final(float *points3d_polar, float * selection, float * d int r,g,b; int itx, ity; + //render[(ty * ow + tx)] = img[ih * w + iw]; + + if ((y > h/8) && (y < (h*7)/8)) if ((mindelta > -10) && (maxdelta < 10) && (this_depth < 10000)) { - if ((txmax - txmin) * (tymax - tymin) < 100 * s * s) + if ((txmax - txmin) * (tymax - tymin) < 500) { for (itx = txmin; itx < txmax; itx ++) for (ity = tymin; ity < tymax; ity ++) @@ -355,36 +376,40 @@ __global__ void render_final(float *points3d_polar, float * selection, float * d if (r > 255) r = 255; if (g > 255) g = 255; if (b > 255) b = 255; - if ((ity > 0) && (ity < h * s) && (itx > 0) && (ity < w * s)) { - render[(ity * w * s + itx)] = r * 256 * 256 + g * 256 + b; - selection[(ity * w * s + itx)] = 1.0 / abs(det); - } - //printf("%f\n", selection[(ity * w * s + itx)]); + + + render[(ity * ow + itx)] = r * 256 * 256 + g * 256 + b; + selection[(ity * ow + itx)] = 1.0 / abs(det); } } } } } + + + } extern "C"{ -void render(int n, int h,int w, int s, unsigned char * img, float * depth,float * pose, unsigned char * render, float * depth_render){ +void render(int n, int h,int w, int oh, int ow, unsigned char * img, float * depth,float * pose, unsigned char * render, float * depth_render){ //int ih, iw, i, ic; //printf("inside cuda code %d\n", depth); - printf("scale %d\n", s); - const int nx = w/s; - const int ny = h/s; - const size_t depth_mem_size = nx*ny*sizeof(float); - const size_t frame_mem_size = nx*ny*sizeof(unsigned char) * 3; + //printf("scale %d\n", s); + const int nx = w; + const int ny = h; - const size_t render_mem_size = nx * ny * s * s; + const int onx = ow; + const int ony = oh; + + const size_t input_mem_size = nx*ny; + const size_t output_mem_size = onx * ony; dim3 dimGrid(nx/TILE_DIM, ny/TILE_DIM, 1); - dim3 dimGrid2(nx * s/TILE_DIM, ny * s/TILE_DIM, 1); + dim3 dimGrid_out(onx/TILE_DIM, ony/TILE_DIM, 1); dim3 dimBlock(TILE_DIM, BLOCK_ROWS, 1); unsigned char *d_img, *d_render, *d_render_all; @@ -399,42 +424,46 @@ void render(int n, int h,int w, int s, unsigned char * img, float * depth,float int *d_render2, *d_img2; - cudaMalloc((void **)&d_img, frame_mem_size); - cudaMalloc((void **)&d_render, render_mem_size * sizeof(unsigned char) * 3); - cudaMalloc((void **)&d_render_all, render_mem_size * sizeof(unsigned char) * 3 * n); - cudaMalloc((void **)&d_depth, depth_mem_size); - cudaMalloc((void **)&d_depth_render, render_mem_size * sizeof(float)); - cudaMalloc((void **)&d_3dpoint, depth_mem_size * 4); - cudaMalloc((void **)&d_3dpoint_after, depth_mem_size * 4); + cudaMalloc((void **)&d_img, input_mem_size * sizeof(unsigned char) * 3); + cudaMalloc((void **)&d_img2, input_mem_size * sizeof(int)); + cudaMalloc((void **)&d_render, output_mem_size * sizeof(unsigned char) * 3); + cudaMalloc((void **)&d_render_all, output_mem_size * sizeof(unsigned char) * 3 * n); + cudaMalloc((void **)&d_depth, input_mem_size * sizeof(float)); + cudaMalloc((void **)&d_depth_render, output_mem_size * sizeof(float)); + cudaMalloc((void **)&d_3dpoint, input_mem_size * sizeof(float) * 4); + cudaMalloc((void **)&d_3dpoint_after, input_mem_size * sizeof(float) * 4); cudaMalloc((void **)&d_pose, sizeof(float) * 16); - cudaMalloc((void **)&d_render2, render_mem_size * sizeof(int)); - cudaMalloc((void **)&d_img2, render_mem_size * sizeof(int)); - cudaMalloc((void **)&d_selection, render_mem_size * sizeof(float) * n); + + + cudaMalloc((void **)&d_selection, output_mem_size * sizeof(float) * n); + + cudaMalloc((void **)&d_render2, output_mem_size * sizeof(int)); - cudaMalloc((void **)&nz, render_mem_size * sizeof(int)); - cudaMalloc((void **)&average, render_mem_size * sizeof(int) * 3); - cudaMemcpy(d_depth_render, depth_render, render_mem_size * sizeof(float), cudaMemcpyHostToDevice); - cudaMemset(d_render_all, 0, render_mem_size * sizeof(unsigned char) * 3 * n); - cudaMemset(d_selection, 0, render_mem_size * sizeof(float) * n); + cudaMalloc((void **)&nz, output_mem_size * sizeof(int)); + cudaMalloc((void **)&average, output_mem_size * sizeof(int) * 3); - cudaMemset(nz, 0, render_mem_size * sizeof(int)); - cudaMemset(average, 0, render_mem_size * sizeof(int) * 3); + + cudaMemset(nz, 0, output_mem_size * sizeof(int)); + cudaMemset(average, 0, output_mem_size * sizeof(int) * 3); + cudaMemset(d_selection, 0, output_mem_size * sizeof(float) * n); + cudaMemcpy(d_depth_render, depth_render, output_mem_size * sizeof(float), cudaMemcpyHostToDevice); + cudaMemset(d_render_all, 0, output_mem_size * sizeof(unsigned char) * 3 * n); int idx; for (idx = 0; idx < n; idx ++) { cudaMemcpy(d_pose, &(pose[idx * 16]), sizeof(float) * 16, cudaMemcpyHostToDevice); - cudaMemcpy(d_img, &(img[idx * nx * ny * 3]), frame_mem_size, cudaMemcpyHostToDevice); - cudaMemcpy(d_depth, &(depth[idx * nx * ny]), depth_mem_size, cudaMemcpyHostToDevice); + cudaMemcpy(d_img, &(img[idx * input_mem_size * 3]), input_mem_size * sizeof(unsigned char) * 3, cudaMemcpyHostToDevice); + cudaMemcpy(d_depth, &(depth[idx * input_mem_size]), input_mem_size * sizeof(float), cudaMemcpyHostToDevice); - cudaMemset(d_render, 0, render_mem_size * sizeof(unsigned char) * 3); - cudaMemset(d_render2, 0, render_mem_size * sizeof(int)); + cudaMemset(d_render, 0, output_mem_size * sizeof(unsigned char) * 3); + cudaMemset(d_render2, 0, output_mem_size * sizeof(int)); - cudaMemset(d_img2, 0, nx * ny * sizeof(int)); - cudaMemset(d_3dpoint, 0, depth_mem_size * 4); - cudaMemset(d_3dpoint_after, 0, depth_mem_size * 4); + cudaMemset(d_img2, 0, input_mem_size * sizeof(int)); + cudaMemset(d_3dpoint, 0, input_mem_size * sizeof(float) * 4); + cudaMemset(d_3dpoint_after, 0, input_mem_size * sizeof(float) * 4); to3d_point<<< dimGrid, dimBlock >>>(d_depth, d_3dpoint); transform<<< dimGrid, dimBlock >>>(d_3dpoint_after, d_3dpoint, d_pose); @@ -442,32 +471,31 @@ void render(int n, int h,int w, int s, unsigned char * img, float * depth,float char_to_int <<< dimGrid, dimBlock >>> (d_img2, d_img); - render_final <<< dimGrid, dimBlock >>> (d_3dpoint_after, &(d_selection[idx * nx * ny * s * s]), d_depth_render, d_img2, d_render2, s); + render_final <<< dimGrid, dimBlock >>> (d_3dpoint_after, &(d_selection[idx * onx * ony]), d_depth_render, d_img2, d_render2, oh, ow); - //int_to_char <<< dimGrid2, dimBlock >>> (d_render2, d_render); - int_to_char <<< dimGrid2, dimBlock >>> (d_render2, &(d_render_all[idx * nx * ny * s * s * 3])); + //int_to_char <<< dimGrid_out, dimBlock >>> (d_render2, d_render); + int_to_char <<< dimGrid_out, dimBlock >>> (d_render2, &(d_render_all[idx * output_mem_size * 3])); - //fill <<< dimGrid2, dimBlock >>> (&(d_render_all[idx * nx * ny * s * s * 3])); } - merge <<< dimGrid2, dimBlock >>> (d_render_all, d_render, d_selection, n, nx * ny * s * s); + merge <<< dimGrid_out, dimBlock >>> (d_render_all, d_render, d_selection, n, output_mem_size); - int fill_size[10] = {3, 5, 10, 20, 50, 75, 100, 200, 600, 1024}; + int fill_size[10] = {3, 5, 10, 20, 50, 75, 100, 200, 400, 768}; - //int fill_size[8] = {3, 5, 10, 20, 50, 100, 200}; for (int j = 0; j < 10; j++) { - cudaMemset(nz, 0, render_mem_size * sizeof(int)); - cudaMemset(average, 0, render_mem_size * sizeof(int) * 3); - get_average <<< dimGrid2, dimBlock >>> (d_render, nz, average, fill_size[j]); - fill_with_average <<< dimGrid2, dimBlock >>> (d_render, nz, average, fill_size[j]); + cudaMemset(nz, 0, output_mem_size * sizeof(int)); + cudaMemset(average, 0, output_mem_size * sizeof(int) * 3); + get_average <<< dimGrid_out, dimBlock >>> (d_render, nz, average, fill_size[j]); + fill_with_average <<< dimGrid_out, dimBlock >>> (d_render, nz, average, fill_size[j]); } /* - cudaMemset(nz, 0, render_mem_size * sizeof(int)); - cudaMemset(average, 0, render_mem_size * sizeof(int) * 3); - get_average <<< dimGrid2, dimBlock >>> (d_render, nz, average, 3); - fill_with_average <<< dimGrid2, dimBlock >>> (d_render, nz, average, 3); + cudaMemset(nz, 0, output_mem_size * sizeof(int)); + cudaMemset(average, 0, output_mem_size * sizeof(int) * 3); + get_average <<< dimGrid_out, dimBlock >>> (d_render, nz, average, 3); + fill_with_average <<< dimGrid_out, dimBlock >>> (d_render, nz, average, 3); */ - cudaMemcpy(render, d_render, render_mem_size * sizeof(unsigned char) * 3 , cudaMemcpyDeviceToHost); + cudaMemcpy(render, d_render, output_mem_size * sizeof(unsigned char) * 3 , cudaMemcpyDeviceToHost); + cudaFree(d_img); cudaFree(d_depth); diff --git a/realenv/core/render/show_3d2.py b/realenv/core/render/show_3d2.py index 654374a20..10c2b412c 100644 --- a/realenv/core/render/show_3d2.py +++ b/realenv/core/render/show_3d2.py @@ -20,6 +20,8 @@ from multiprocessing.dummy import Process from realenv.data.datasets import ViewDataSet3D from realenv.core.render.completion import CompletionNet +from realenv.learn.completion2 import CompletionNet2 +import torch.nn as nn file_dir = os.path.dirname(os.path.abspath(__file__)) @@ -82,6 +84,20 @@ class PCRenderer: self.scale_up = scale_up + + self.show = np.zeros((768, 768, 3),dtype='uint8') + self.show_rgb = np.zeros((768, 768 ,3),dtype='uint8') + + comp = CompletionNet2(norm = nn.BatchNorm2d, nf = 24) + comp = torch.nn.DataParallel(comp).cuda() + comp.load_state_dict(torch.load(os.path.join(file_dir, "model.pth"))) + self.model = comp.module + self.model.eval() + + self.imgv = Variable(torch.zeros(1, 3 , 768, 768), volatile = True).cuda() + self.maskv = Variable(torch.zeros(1,2, 768, 768), volatile = True).cuda() + + def _onmouse(self, *args): if args[0] == cv2.EVENT_LBUTTONDOWN: self.org_pitch, self.org_yaw, self.org_x, self.org_y, self.org_z =\ @@ -200,45 +216,62 @@ class PCRenderer: h = wo/3 w = 2*h n = ho/3 - opengl_arr = np.frombuffer(message, dtype=np.float32).reshape((h, w)) + + + pano = False + if pano: + opengl_arr = np.frombuffer(message, dtype=np.float32).reshape((h, w)) + else: + opengl_arr = np.frombuffer(message, dtype=np.float32).reshape((n, n)) def _render_depth(opengl_arr): #with Profiler("Render Depth"): cv2.imshow('target depth', opengl_arr/16.) def _render_pc(opengl_arr): - #with Profiler("Render pointcloud"): - poses_after = [ - pose.dot(np.linalg.inv(poses[i])).astype(np.float32) - for i in range(len(imgs))] - with Profiler("CUDA PC rendering"): + + with Profiler("Render pointcloud cuda"): + poses_after = [ + pose.dot(np.linalg.inv(poses[i])).astype(np.float32) + for i in range(len(imgs))] + #opengl_arr = np.zeros((h,w), dtype = np.float32) + + cuda_pc.render(ct.c_int(len(imgs)), - ct.c_int(imgs[0].shape[0]), - ct.c_int(imgs[0].shape[1]), - ct.c_int(self.scale_up), - imgs.ctypes.data_as(ct.c_void_p), - depths.ctypes.data_as(ct.c_void_p), - np.asarray(poses_after, dtype = np.float32).ctypes.data_as(ct.c_void_p), - show.ctypes.data_as(ct.c_void_p), - opengl_arr.ctypes.data_as(ct.c_void_p) - ) + ct.c_int(imgs[0].shape[0]), + ct.c_int(imgs[0].shape[1]), + ct.c_int(768), + ct.c_int(768), + imgs.ctypes.data_as(ct.c_void_p), + depths.ctypes.data_as(ct.c_void_p), + np.asarray(poses_after, dtype = np.float32).ctypes.data_as(ct.c_void_p), + show.ctypes.data_as(ct.c_void_p), + opengl_arr.ctypes.data_as(ct.c_void_p) + ) + threads = [ Process(target=_render_pc, args=(opengl_arr,)), Process(target=_render_depth, args=(opengl_arr,))] [t.start() for t in threads] [t.join() for t in threads] - if model: + + + if self.model: tf = transforms.ToTensor() + #from IPython import embed; embed() before = time.time() source = tf(show) - source_depth = tf(np.expand_dims(target_depth, 2).astype(np.float32)/65536 * 255) - imgv.data.copy_(source) - maskv.data.copy_(source_depth) + mask = (torch.sum(source[:3,:,:],0)>0).float().unsqueeze(0) + source_depth = tf(np.expand_dims(opengl_arr, 2).astype(np.float32)/128.0 * 255) + print(mask.size(), source_depth.size()) + mask = torch.cat([source_depth, mask], 0) + self.imgv.data.copy_(source) + self.maskv.data.copy_(mask) print('Transfer time', time.time() - before) before = time.time() - recon = model(imgv, maskv) + recon = model(self.imgv, self.maskv) print('NNtime:', time.time() - before) before = time.time() show2 = recon.data.cpu().numpy()[0].transpose(1,2,0) @@ -311,7 +344,7 @@ class PCRenderer: t1 = time.time() t = t1-t0 self.fps = 1/t - cv2.putText(self.show_rgb,'pitch %.3f yaw %.2f roll %.3f x %.2f y %.2f z %.2f'%(self.pitch, self.yaw, self.roll, self.x, self.y, self.z),(15,self.showsz-15),0,0.5,(255,255,255)) + cv2.putText(self.show_rgb,'pitch %.3f yaw %.2f roll %.3f x %.2f y %.2f z %.2f'%(self.pitch, self.yaw, self.roll, self.x, self.y, self.z),(15,768-15),0,0.5,(255,255,255)) cv2.putText(self.show_rgb,'fps %.1f'%(self.fps),(15,15),0,0.5,(255,255,255)) cv2.imshow('show3d',self.show_rgb) diff --git a/realenv/data/datasets.py b/realenv/data/datasets.py index 27147c488..9047f508b 100644 --- a/realenv/data/datasets.py +++ b/realenv/data/datasets.py @@ -52,6 +52,7 @@ def get_model_initial_pose(robot): if MODEL_ID == "11HB6XZSh1Q": return [0, 0, 3 * 3.14/2], [-3.38, -7, 1.4] ## living room open area #return [0, 0, 3 * 3.14/2], [-5, -5, 1.9] ## living room kitchen table + if MODEL_ID == "BbxejD15Etk": return [0, 0, 3 * 3.14/2], [-6.76, -12, 1.4] ## Gates Huang else: