tensorflow_hw_mvBlueFox_detection.py

#!/usr/bin/env python
# Ros libraries
import roslib
import rospy

# Ros Messages
from sensor_msgs.msg import CompressedImage, Image
from cv_bridge import CvBridge, CvBridgeError
from geometry_msgs.msg import Point, PoseStamped
from std_msgs.msg import Bool
#Tensorflow imports
import numpy as np
import os
import tensorflow as tf
import copy
import yaml
import cv2
import math
import tarfile
import six.moves.urllib as urllib
from tensorflow.core.framework import graph_pb2

# Protobuf Compilation (once necessary)
#os.system('protoc object_detection/protos/*.proto --python_out=.')

from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
from stuff.helper import FPS2, WebcamVideoStream, SessionWorker

import time
import sys
sys.path.insert(0, '/home/pateman/catkin_ws/src/autonomous_offboard/src/scripts')

## LOAD CONFIG PARAMS ##
if (os.path.isfile('/home/nvidia/catkin_ws/src/autonomous_offboard/src/scripts/config.yml')):
    with open("/home/nvidia/catkin_ws/src/autonomous_offboard/src/scripts/config.yml", 'r') as ymlfile:
        cfg = yaml.load(ymlfile)
else:
    with open("config.sample.yml", 'r') as ymlfile:
        cfg = yaml.load(ymlfile)

video_input         = cfg['video_input']
visualize           = cfg['visualize']
vis_text            = cfg['vis_text']
max_frames          = cfg['max_frames']
fps_interval        = cfg['fps_interval']
allow_memory_growth = cfg['allow_memory_growth']
det_interval        = cfg['det_interval']
det_th              = cfg['det_th']
model_name          = cfg['model_name']
model_path          = cfg['model_path']
label_path          = cfg['label_path']
num_classes         = cfg['num_classes']
split_model         = cfg['split_model']
log_device          = cfg['log_device']
ssd_shape           = cfg['ssd_shape']

class tensorflow_detection:
    def __init__(self):

        self.quad_3d_frame = np.float32([[-0.345, -0.345, 0], [0.345, -0.345, 0], [0.345, 0.345, 0], [-0.345, 0.345, 0]])
        self.quad_3d_center = np.float32([[-0.11075, -0.11075, 0], [0.11075, -0.11075, 0], [0.11075, 0.11075, 0], [-0.11075, 0.11075, 0]])
        #----REAL---
        self.K = np.float64([[1168.084098028661, 0, 545.2679414486556],
                        [0, 1164.433590234314, 526.4386170901346],
                        [0.0, 0.0, 1.0]])
        # ----GAZEBO---
        #self.K = np.float64([[1472.512772, 0, 640.5],
        #                [0, 1472.512772, 480.5],
        #                [0.0, 0.0, 1.0]])
        self.dist_coef = np.float64([-0.06624560142145453, 0.05966170895627322, 0.002933236970742761, -0.01336570576300551, 0])
        self.detection_graph, score, expand = self.load_frozenmodel()
        self.category_index = self.load_labelmap()
        config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=log_device)
        config.gpu_options.allow_growth = allow_memory_growth
        self.target_reached = False
        self.local_pose = PoseStamped()
        cur_frames = 0
        self.write_flag = False
        self.write_flag_center = False
        self.countDet = 0
        self.countDet_center = 0
        with self.detection_graph.as_default():
            with tf.Session(graph=self.detection_graph, config=config) as self.sess:
                self.cam_read = Image()
                # topic where we publish
                self.image_pub = rospy.Publisher("/tensorflow_detection/image", Image, queue_size=10)
                self.bridge = CvBridge()
                # topic where the coordinates go
                self.cam_pose_pub = rospy.Publisher("/tensorflow_detection/cam_point", Point, queue_size=1)
                self.cam_pose = Point()
                self.cam_pose_center_pub = rospy.Publisher("/tensorflow_detection/cam_point_center", Point, queue_size=1)
                self.cam_pose_center = Point()
                # subscribed Topic
                self.subscriber = rospy.Subscriber("/mv_29901972/image_raw", Image, self.callback, queue_size=1)
                #self.subscriber = rospy.Subscriber("/uav_camera/image_raw_down", Image, self.callback, queue_size=1)
                #rospy.Subscriber('/position_control/distance', Bool, self.distance_reached_cb)
                #rospy.Subscriber('/mavros/local_position/pose', PoseStamped, self._local_pose_callback)
                #self.positioning_file = open("/home/pateman/catkin_ws/src/autonomous_offboard/src/positioning_data.txt",
                #    "w")

                self.threshold = 0.6

    def distance_reached_cb(self, data):
        self.target_reached = data.data
    def _local_pose_callback(self, data):
        self.local_pose = data
    def callback(self, ros_data):


        try:
            cv_image = self.bridge.imgmsg_to_cv2(ros_data, "bgr8")
        except CvBridgeError as e:
            print(e)
        height, width, _ = cv_image.shape
        # Define Input and Ouput tensors

        image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0')
        detection_boxes = self.detection_graph.get_tensor_by_name('detection_boxes:0')
        detection_scores = self.detection_graph.get_tensor_by_name('detection_scores:0')
        detection_classes = self.detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = self.detection_graph.get_tensor_by_name('num_detections:0')
        image = cv_image  # video_stream.read()
        image_expanded = np.expand_dims(image, axis=0)
        boxes, scores, classes, num = self.sess.run(
            [detection_boxes, detection_scores, detection_classes, num_detections],
            feed_dict={image_tensor: image_expanded})
        vis_util.visualize_boxes_and_labels_on_image_array(
            cv_image,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            self.category_index,
            use_normalized_coordinates=True,
            line_thickness=8)
        frame_flag_detected = False
        center_flag_detected = False
        for i in range(0, num):
            score = scores[0][i]
            if score >= self.threshold:
                #if (self.target_reached == False):
                #    self.write_flag = False
                #    self.write_flag_center = False
                #print(int(classes[0][i]))
                #if the whole frame is detected
                if int(classes[0][i]) == 1:
                    frame_flag_detected = True
                    ymin = boxes[0][i][0] * height
                    xmin = boxes[0][i][1] * width
                    ymax = boxes[0][i][2] * height
                    xmax = boxes[0][i][3] * width
                    centerX = (xmin+xmax)/2
                    centerY = (ymin+ymax)/2
                    #print("ymin: {}, ymax: {}, xmin: {}, xmax: {}".format(ymin, ymax, xmin, xmax))
                    #print("center x: {}\ncenter y: {}\nwidth: {}\nheight: {}".format(centerX, centerY, (xmax - xmin),
                    #                                                                 (ymax - ymin)))
                    self.find_relative_pose(centerX, centerY, (xmax - xmin), (xmax - xmin))
                #if center of frame is detected
                if int(classes[0][i]) == 2:
                    center_flag_detected = True
                    ymin = boxes[0][i][0] * height
                    xmin = boxes[0][i][1] * width
                    ymax = boxes[0][i][2] * height
                    xmax = boxes[0][i][3] * width
                    centerX = (xmin+xmax)/2
                    centerY = (ymin+ymax)/2
                    #print("ymin: {}, ymax: {}, xmin: {}, xmax: {}".format(ymin, ymax, xmin, xmax))
                    #print("center x: {}\ncenter y: {}\nwidth: {}\nheight: {}".format(centerX, centerY, (xmax - xmin),
                    #                                                                 (ymax - ymin)))
                    self.find_relative_pose_center(centerX, centerY, (xmax - xmin), (xmax - xmin))
        if not frame_flag_detected:
            self.cam_pose = Point()
            self.cam_pose.x = float("inf")
            self.cam_pose.y = float("inf")
            self.cam_pose.z = float("inf")
            self.cam_pose_pub.publish(self.cam_pose)
        if not center_flag_detected:
            self.cam_pose_center = Point()
            self.cam_pose_center.x = float("inf")
            self.cam_pose_center.y = float("inf")
            self.cam_pose_center.z = float("inf")
            self.cam_pose_center_pub.publish(self.cam_pose_center)
        try:
            self.image_pub.publish(self.bridge.cv2_to_imgmsg(cv_image, "bgr8"))
        except CvBridgeError as e:
            print(e)
    def find_relative_pose(self, x, y, w, h):
        #-----GAZEBO-----#
        #quad = np.float32(
        #    [[x - w / 2, y - h / 2], [x - w / 2, y + h / 2], [x + w / 2, y + h / 2], [x + w / 2, y - h / 2]])
        #-------REAL--------#
        quad = np.float32([[x - w / 2, y - h / 2], [x + w / 2, y - h / 2], [x + w / 2, y + h / 2], [x - w / 2, y + h / 2]])
        #dist_coef_zero = np.zeros(4)
        _ret, rvec, tvec = cv2.solvePnP(self.quad_3d_frame, quad, self.K, self.dist_coef)
        rmat = cv2.Rodrigues(rvec)[0]
        #self.rotationMatrixToEulerAngles(rmat)
        cameraTranslatevector = -np.matrix(rmat).T * np.matrix(tvec)

        T0 = np.zeros((4, 4))
        T0[:3, :3] = rmat
        T0[:4, 3] = [0, 0, 0, 1]
        T0[:3, 3] = np.transpose(cameraTranslatevector)
        ####----REAL----####
        p0 = np.array([0.300 / 2, -0.345 / 2, 0, 1])
        ###-----GAZEBO----#
        #p0 = np.array([-0.345 / 2, -0.345 / 2, 0, 1])
        z0 = np.dot(T0, p0)

        self.cam_pose.x = -z0.item(0)
        self.cam_pose.y = z0.item(1)
        self.cam_pose.z = z0.item(2)
        #if (self.target_reached and self.write_flag == False):
        #    self.positioning_file.write(
        #        "Frame: Local position: [{}, {}, {}], Object position: [5, 5, 0], positioning: [{}, {}, {}], count: {}\n".format(
        #            self.local_pose.pose.position.x, self.local_pose.pose.position.y, self.local_pose.pose.position.z,
        #            self.cam_pose.x, self.cam_pose.y, self.cam_pose.z, self.countDet))
        #    self.write_flag = True
        #    self.countDet += 1
        self.cam_pose_pub.publish(self.cam_pose)
    def find_relative_pose_center(self, x, y, w, h):
        # -------GAZEBO--------#
        #quad = np.float32(
        #    [[x - w / 2, y - h / 2], [x - w / 2, y + h / 2], [x + w / 2, y + h / 2], [x + w / 2, y - h / 2]])
        # -------REAL--------#
        quad = np.float32(
            [[x - w / 2, y - h / 2], [x + w / 2, y - h / 2], [x + w / 2, y + h / 2], [x - w / 2, y + h / 2]])
        #dist_coef_zero = np.zeros(4)
        _ret, rvec, tvec = cv2.solvePnP(self.quad_3d_center, quad, self.K, self.dist_coef)
        rmat = cv2.Rodrigues(rvec)[0]
        #self.rotationMatrixToEulerAngles(rmat)
        cameraTranslatevector = -np.matrix(rmat).T * np.matrix(tvec)

        T0 = np.zeros((4, 4))
        T0[:3, :3] = rmat
        T0[:4, 3] = [0, 0, 0, 1]
        T0[:3, 3] = np.transpose(cameraTranslatevector)
        ####----REAL----####
        p0 = np.array([0.200 / 2, -0.100 / 2, 0, 1])
        ####----GAZEBO----####
        #p0 = np.array([-0.11075 / 2, -0.11075 / 2, 0, 1])
        z0 = np.dot(T0, p0)

        self.cam_pose_center.x = -z0.item(0)
        self.cam_pose_center.y = z0.item(1)
        self.cam_pose_center.z = z0.item(2)
        #if (self.target_reached and self.write_flag_center == False):
        #    self.positioning_file.write(
        #        "Center: Local position: [{}, {}, {}], Object position: [5, 5, 0], positioning: [{}, {}, {}], count: {}\n".format(
        #            self.local_pose.pose.position.x, self.local_pose.pose.position.y, self.local_pose.pose.position.z,
        #            self.cam_pose.x, self.cam_pose.y, self.cam_pose.z, self.countDet_center))
        #    self.write_flag_center = True
        #    self.countDet_center += 1
        self.cam_pose_center_pub.publish(self.cam_pose_center)

    def isRotationMatrix(self,R):
        Rt = np.transpose(R)
        shouldBeIdentity = np.dot(Rt, R)
        I = np.identity(3, dtype=R.dtype)
        n = np.linalg.norm(I - shouldBeIdentity)
        return n < 1e-6

    # Calculates rotation matrix to euler angles
    # The result is the same as MATLAB except the order
    # of the euler angles ( x and z are swapped ).
    def rotationMatrixToEulerAngles(self, R):

        assert (self.isRotationMatrix(R))

        sy = math.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0])

        singular = sy < 1e-6

        if not singular:
            x = math.atan2(R[2, 1], R[2, 2])
            y = math.atan2(-R[2, 0], sy)
            z = math.atan2(R[1, 0], R[0, 0])
        else:
            x = math.atan2(-R[1, 2], R[1, 1])
            y = math.atan2(-R[2, 0], sy)
            z = 0
        print("Rotation x: {}, y: {}, z: {}".format(x, y, z))
        return np.array([x, y, z])

    def _node_name(self,n):
      if n.startswith("^"):
        return n[1:]
      else:
        return n.split(":")[0]

    # Load a (frozen) Tensorflow model into memory.
    def load_frozenmodel(self):
        print('Loading frozen model into memory')
        if not split_model:
            detection_graph = tf.Graph()
            with detection_graph.as_default():
                od_graph_def = tf.GraphDef()
                with tf.gfile.GFile(model_path, 'rb') as fid:
                    serialized_graph = fid.read()
                    od_graph_def.ParseFromString(serialized_graph)
                    tf.import_graph_def(od_graph_def, name='')
            return detection_graph, None, None
        else:
            # load a frozen Model and split it into GPU and CPU graphs
            # Hardcoded for ssd_mobilenet
            input_graph = tf.Graph()
            with tf.Session(graph=input_graph):
                if ssd_shape == 600:
                    shape = 7326
                else:
                    shape = 1917
                score = tf.placeholder(tf.float32, shape=(None, shape, num_classes),
                                       name="Postprocessor/convert_scores")
                expand = tf.placeholder(tf.float32, shape=(None, shape, 1, 4), name="Postprocessor/ExpandDims_1")
                for node in input_graph.as_graph_def().node:
                    if node.name == "Postprocessor/convert_scores":
                        score_def = node
                    if node.name == "Postprocessor/ExpandDims_1":
                        expand_def = node

            detection_graph = tf.Graph()
            with detection_graph.as_default():
                od_graph_def = tf.GraphDef()
                with tf.gfile.GFile(model_path, 'rb') as fid:
                    serialized_graph = fid.read()
                    od_graph_def.ParseFromString(serialized_graph)
                    dest_nodes = ['Postprocessor/convert_scores', 'Postprocessor/ExpandDims_1']

                    edges = {}
                    name_to_node_map = {}
                    node_seq = {}
                    seq = 0
                    for node in od_graph_def.node:
                        n = self._node_name(node.name)
                        name_to_node_map[n] = node
                        edges[n] = [self._node_name(x) for x in node.input]
                        node_seq[n] = seq
                        seq += 1

                    for d in dest_nodes:
                        assert d in name_to_node_map, "%s is not in graph" % d

                    nodes_to_keep = set()
                    next_to_visit = dest_nodes[:]
                    while next_to_visit:
                        n = next_to_visit[0]
                        del next_to_visit[0]
                        if n in nodes_to_keep:
                            continue
                        nodes_to_keep.add(n)
                        next_to_visit += edges[n]

                    nodes_to_keep_list = sorted(list(nodes_to_keep), key=lambda n: node_seq[n])

                    nodes_to_remove = set()
                    for n in node_seq:
                        if n in nodes_to_keep_list: continue
                        nodes_to_remove.add(n)
                    nodes_to_remove_list = sorted(list(nodes_to_remove), key=lambda n: node_seq[n])

                    keep = graph_pb2.GraphDef()
                    for n in nodes_to_keep_list:
                        keep.node.extend([copy.deepcopy(name_to_node_map[n])])

                    remove = graph_pb2.GraphDef()
                    remove.node.extend([score_def])
                    remove.node.extend([expand_def])
                    for n in nodes_to_remove_list:
                        remove.node.extend([copy.deepcopy(name_to_node_map[n])])

                    with tf.device('/gpu:0'):
                        tf.import_graph_def(keep, name='')
                    with tf.device('/cpu:0'):
                        tf.import_graph_def(remove, name='')

            return detection_graph, score, expand

    def load_labelmap(self):
        print('Loading label map')
        label_map = label_map_util.load_labelmap(label_path)
        categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=num_classes,
                                                                    use_display_name=True)
        category_index = label_map_util.create_category_index(categories)
        return category_index
def main():
    '''Initializes and cleanup ros node'''
    ic = tensorflow_detection()
    rospy.init_node('tensorflow_detection', anonymous=True)
    rospy.Rate(20)
    try:
        rospy.spin()
    except KeyboardInterrupt:
        print "Shutting down ROS Image tensorflow detector x"
    cv2.destroyAllWindows()


if __name__ == '__main__':
    main()