-
pateman16 authored0e474b24
tensorflow_hw_mvBlueFox_detection.py 17.54 KiB
#!/usr/bin/env python
# Ros libraries
import roslib
import rospy
# Ros Messages
from sensor_msgs.msg import CompressedImage, Image
from cv_bridge import CvBridge, CvBridgeError
from geometry_msgs.msg import Point, PoseStamped
from std_msgs.msg import Bool
#Tensorflow imports
import numpy as np
import os
import tensorflow as tf
import copy
import yaml
import cv2
import math
import tarfile
import six.moves.urllib as urllib
from tensorflow.core.framework import graph_pb2
# Protobuf Compilation (once necessary)
#os.system('protoc object_detection/protos/*.proto --python_out=.')
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
from stuff.helper import FPS2, WebcamVideoStream, SessionWorker
import time
import sys
sys.path.insert(0, '/home/pateman/catkin_ws/src/autonomous_offboard/src/scripts')
## LOAD CONFIG PARAMS ##
if (os.path.isfile('/home/nvidia/catkin_ws/src/autonomous_offboard/src/scripts/config.yml')):
with open("/home/nvidia/catkin_ws/src/autonomous_offboard/src/scripts/config.yml", 'r') as ymlfile:
cfg = yaml.load(ymlfile)
else:
with open("config.sample.yml", 'r') as ymlfile:
cfg = yaml.load(ymlfile)
video_input = cfg['video_input']
visualize = cfg['visualize']
vis_text = cfg['vis_text']
max_frames = cfg['max_frames']
fps_interval = cfg['fps_interval']
allow_memory_growth = cfg['allow_memory_growth']
det_interval = cfg['det_interval']
det_th = cfg['det_th']
model_name = cfg['model_name']
model_path = cfg['model_path']
label_path = cfg['label_path']
num_classes = cfg['num_classes']
split_model = cfg['split_model']
log_device = cfg['log_device']
ssd_shape = cfg['ssd_shape']
class tensorflow_detection:
def __init__(self):
self.quad_3d_frame = np.float32([[-0.345, -0.345, 0], [0.345, -0.345, 0], [0.345, 0.345, 0], [-0.345, 0.345, 0]])
self.quad_3d_center = np.float32([[-0.11075, -0.11075, 0], [0.11075, -0.11075, 0], [0.11075, 0.11075, 0], [-0.11075, 0.11075, 0]])
#----REAL---
self.K = np.float64([[1168.084098028661, 0, 545.2679414486556],
[0, 1164.433590234314, 526.4386170901346],
[0.0, 0.0, 1.0]])
# ----GAZEBO---
#self.K = np.float64([[1472.512772, 0, 640.5],
# [0, 1472.512772, 480.5],
# [0.0, 0.0, 1.0]])
self.dist_coef = np.float64([-0.06624560142145453, 0.05966170895627322, 0.002933236970742761, -0.01336570576300551, 0])
self.detection_graph, score, expand = self.load_frozenmodel()
self.category_index = self.load_labelmap()
config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=log_device)
config.gpu_options.allow_growth = allow_memory_growth
self.target_reached = False
self.local_pose = PoseStamped()
cur_frames = 0
self.write_flag = False
self.write_flag_center = False
self.countDet = 0
self.countDet_center = 0
with self.detection_graph.as_default():
with tf.Session(graph=self.detection_graph, config=config) as self.sess:
self.cam_read = Image()
# topic where we publish
self.image_pub = rospy.Publisher("/tensorflow_detection/image", Image, queue_size=10)
self.bridge = CvBridge()
# topic where the coordinates go
self.cam_pose_pub = rospy.Publisher("/tensorflow_detection/cam_point", Point, queue_size=1)
self.cam_pose = Point()
self.cam_pose_center_pub = rospy.Publisher("/tensorflow_detection/cam_point_center", Point, queue_size=1)
self.cam_pose_center = Point()
# subscribed Topic
self.subscriber = rospy.Subscriber("/mv_29901972/image_raw", Image, self.callback, queue_size=1)
#self.subscriber = rospy.Subscriber("/uav_camera/image_raw_down", Image, self.callback, queue_size=1)
#rospy.Subscriber('/position_control/distance', Bool, self.distance_reached_cb)
#rospy.Subscriber('/mavros/local_position/pose', PoseStamped, self._local_pose_callback)
#self.positioning_file = open("/home/pateman/catkin_ws/src/autonomous_offboard/src/positioning_data.txt",
# "w")
self.threshold = 0.6
def distance_reached_cb(self, data):
self.target_reached = data.data
def _local_pose_callback(self, data):
self.local_pose = data
def callback(self, ros_data):
try:
cv_image = self.bridge.imgmsg_to_cv2(ros_data, "bgr8")
except CvBridgeError as e:
print(e)
height, width, _ = cv_image.shape
# Define Input and Ouput tensors
image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0')
detection_boxes = self.detection_graph.get_tensor_by_name('detection_boxes:0')
detection_scores = self.detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = self.detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = self.detection_graph.get_tensor_by_name('num_detections:0')
image = cv_image # video_stream.read()
image_expanded = np.expand_dims(image, axis=0)
boxes, scores, classes, num = self.sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_expanded})
vis_util.visualize_boxes_and_labels_on_image_array(
cv_image,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
self.category_index,
use_normalized_coordinates=True,
line_thickness=8)
frame_flag_detected = False
center_flag_detected = False
for i in range(0, num):
score = scores[0][i]
if score >= self.threshold:
#if (self.target_reached == False):
# self.write_flag = False
# self.write_flag_center = False
#print(int(classes[0][i]))
#if the whole frame is detected
if int(classes[0][i]) == 1:
frame_flag_detected = True
ymin = boxes[0][i][0] * height
xmin = boxes[0][i][1] * width
ymax = boxes[0][i][2] * height
xmax = boxes[0][i][3] * width
centerX = (xmin+xmax)/2
centerY = (ymin+ymax)/2
#print("ymin: {}, ymax: {}, xmin: {}, xmax: {}".format(ymin, ymax, xmin, xmax))
#print("center x: {}\ncenter y: {}\nwidth: {}\nheight: {}".format(centerX, centerY, (xmax - xmin),
# (ymax - ymin)))
self.find_relative_pose(centerX, centerY, (xmax - xmin), (xmax - xmin))
#if center of frame is detected
if int(classes[0][i]) == 2:
center_flag_detected = True
ymin = boxes[0][i][0] * height
xmin = boxes[0][i][1] * width
ymax = boxes[0][i][2] * height
xmax = boxes[0][i][3] * width
centerX = (xmin+xmax)/2
centerY = (ymin+ymax)/2
#print("ymin: {}, ymax: {}, xmin: {}, xmax: {}".format(ymin, ymax, xmin, xmax))
#print("center x: {}\ncenter y: {}\nwidth: {}\nheight: {}".format(centerX, centerY, (xmax - xmin),
# (ymax - ymin)))
self.find_relative_pose_center(centerX, centerY, (xmax - xmin), (xmax - xmin))
if not frame_flag_detected:
self.cam_pose = Point()
self.cam_pose.x = float("inf")
self.cam_pose.y = float("inf")
self.cam_pose.z = float("inf")
self.cam_pose_pub.publish(self.cam_pose)
if not center_flag_detected:
self.cam_pose_center = Point()
self.cam_pose_center.x = float("inf")
self.cam_pose_center.y = float("inf")
self.cam_pose_center.z = float("inf")
self.cam_pose_center_pub.publish(self.cam_pose_center)
try:
self.image_pub.publish(self.bridge.cv2_to_imgmsg(cv_image, "bgr8"))
except CvBridgeError as e:
print(e)
def find_relative_pose(self, x, y, w, h):
#-----GAZEBO-----#
#quad = np.float32(
# [[x - w / 2, y - h / 2], [x - w / 2, y + h / 2], [x + w / 2, y + h / 2], [x + w / 2, y - h / 2]])
#-------REAL--------#
quad = np.float32([[x - w / 2, y - h / 2], [x + w / 2, y - h / 2], [x + w / 2, y + h / 2], [x - w / 2, y + h / 2]])
#dist_coef_zero = np.zeros(4)
_ret, rvec, tvec = cv2.solvePnP(self.quad_3d_frame, quad, self.K, self.dist_coef)
rmat = cv2.Rodrigues(rvec)[0]
#self.rotationMatrixToEulerAngles(rmat)
cameraTranslatevector = -np.matrix(rmat).T * np.matrix(tvec)
T0 = np.zeros((4, 4))
T0[:3, :3] = rmat
T0[:4, 3] = [0, 0, 0, 1]
T0[:3, 3] = np.transpose(cameraTranslatevector)
####----REAL----####
p0 = np.array([0.300 / 2, -0.345 / 2, 0, 1])
###-----GAZEBO----#
#p0 = np.array([-0.345 / 2, -0.345 / 2, 0, 1])
z0 = np.dot(T0, p0)
self.cam_pose.x = -z0.item(0)
self.cam_pose.y = z0.item(1)
self.cam_pose.z = z0.item(2)
#if (self.target_reached and self.write_flag == False):
# self.positioning_file.write(
# "Frame: Local position: [{}, {}, {}], Object position: [5, 5, 0], positioning: [{}, {}, {}], count: {}\n".format(
# self.local_pose.pose.position.x, self.local_pose.pose.position.y, self.local_pose.pose.position.z,
# self.cam_pose.x, self.cam_pose.y, self.cam_pose.z, self.countDet))
# self.write_flag = True
# self.countDet += 1
self.cam_pose_pub.publish(self.cam_pose)
def find_relative_pose_center(self, x, y, w, h):
# -------GAZEBO--------#
#quad = np.float32(
# [[x - w / 2, y - h / 2], [x - w / 2, y + h / 2], [x + w / 2, y + h / 2], [x + w / 2, y - h / 2]])
# -------REAL--------#
quad = np.float32(
[[x - w / 2, y - h / 2], [x + w / 2, y - h / 2], [x + w / 2, y + h / 2], [x - w / 2, y + h / 2]])
#dist_coef_zero = np.zeros(4)
_ret, rvec, tvec = cv2.solvePnP(self.quad_3d_center, quad, self.K, self.dist_coef)
rmat = cv2.Rodrigues(rvec)[0]
#self.rotationMatrixToEulerAngles(rmat)
cameraTranslatevector = -np.matrix(rmat).T * np.matrix(tvec)
T0 = np.zeros((4, 4))
T0[:3, :3] = rmat
T0[:4, 3] = [0, 0, 0, 1]
T0[:3, 3] = np.transpose(cameraTranslatevector)
####----REAL----####
p0 = np.array([0.200 / 2, -0.100 / 2, 0, 1])
####----GAZEBO----####
#p0 = np.array([-0.11075 / 2, -0.11075 / 2, 0, 1])
z0 = np.dot(T0, p0)
self.cam_pose_center.x = -z0.item(0)
self.cam_pose_center.y = z0.item(1)
self.cam_pose_center.z = z0.item(2)
#if (self.target_reached and self.write_flag_center == False):
# self.positioning_file.write(
# "Center: Local position: [{}, {}, {}], Object position: [5, 5, 0], positioning: [{}, {}, {}], count: {}\n".format(
# self.local_pose.pose.position.x, self.local_pose.pose.position.y, self.local_pose.pose.position.z,
# self.cam_pose.x, self.cam_pose.y, self.cam_pose.z, self.countDet_center))
# self.write_flag_center = True
# self.countDet_center += 1
self.cam_pose_center_pub.publish(self.cam_pose_center)
def isRotationMatrix(self,R):
Rt = np.transpose(R)
shouldBeIdentity = np.dot(Rt, R)
I = np.identity(3, dtype=R.dtype)
n = np.linalg.norm(I - shouldBeIdentity)
return n < 1e-6
# Calculates rotation matrix to euler angles
# The result is the same as MATLAB except the order
# of the euler angles ( x and z are swapped ).
def rotationMatrixToEulerAngles(self, R):
assert (self.isRotationMatrix(R))
sy = math.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0])
singular = sy < 1e-6
if not singular:
x = math.atan2(R[2, 1], R[2, 2])
y = math.atan2(-R[2, 0], sy)
z = math.atan2(R[1, 0], R[0, 0])
else:
x = math.atan2(-R[1, 2], R[1, 1])
y = math.atan2(-R[2, 0], sy)
z = 0
print("Rotation x: {}, y: {}, z: {}".format(x, y, z))
return np.array([x, y, z])
def _node_name(self,n):
if n.startswith("^"):
return n[1:]
else:
return n.split(":")[0]
# Load a (frozen) Tensorflow model into memory.
def load_frozenmodel(self):
print('Loading frozen model into memory')
if not split_model:
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(model_path, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
return detection_graph, None, None
else:
# load a frozen Model and split it into GPU and CPU graphs
# Hardcoded for ssd_mobilenet
input_graph = tf.Graph()
with tf.Session(graph=input_graph):
if ssd_shape == 600:
shape = 7326
else:
shape = 1917
score = tf.placeholder(tf.float32, shape=(None, shape, num_classes),
name="Postprocessor/convert_scores")
expand = tf.placeholder(tf.float32, shape=(None, shape, 1, 4), name="Postprocessor/ExpandDims_1")
for node in input_graph.as_graph_def().node:
if node.name == "Postprocessor/convert_scores":
score_def = node
if node.name == "Postprocessor/ExpandDims_1":
expand_def = node
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(model_path, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
dest_nodes = ['Postprocessor/convert_scores', 'Postprocessor/ExpandDims_1']
edges = {}
name_to_node_map = {}
node_seq = {}
seq = 0
for node in od_graph_def.node:
n = self._node_name(node.name)
name_to_node_map[n] = node
edges[n] = [self._node_name(x) for x in node.input]
node_seq[n] = seq
seq += 1
for d in dest_nodes:
assert d in name_to_node_map, "%s is not in graph" % d
nodes_to_keep = set()
next_to_visit = dest_nodes[:]
while next_to_visit:
n = next_to_visit[0]
del next_to_visit[0]
if n in nodes_to_keep:
continue
nodes_to_keep.add(n)
next_to_visit += edges[n]
nodes_to_keep_list = sorted(list(nodes_to_keep), key=lambda n: node_seq[n])
nodes_to_remove = set()
for n in node_seq:
if n in nodes_to_keep_list: continue
nodes_to_remove.add(n)
nodes_to_remove_list = sorted(list(nodes_to_remove), key=lambda n: node_seq[n])
keep = graph_pb2.GraphDef()
for n in nodes_to_keep_list:
keep.node.extend([copy.deepcopy(name_to_node_map[n])])
remove = graph_pb2.GraphDef()
remove.node.extend([score_def])
remove.node.extend([expand_def])
for n in nodes_to_remove_list:
remove.node.extend([copy.deepcopy(name_to_node_map[n])])
with tf.device('/gpu:0'):
tf.import_graph_def(keep, name='')
with tf.device('/cpu:0'):
tf.import_graph_def(remove, name='')
return detection_graph, score, expand
def load_labelmap(self):
print('Loading label map')
label_map = label_map_util.load_labelmap(label_path)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=num_classes,
use_display_name=True)
category_index = label_map_util.create_category_index(categories)
return category_index
def main():
'''Initializes and cleanup ros node'''
ic = tensorflow_detection()
rospy.init_node('tensorflow_detection', anonymous=True)
rospy.Rate(20)
try:
rospy.spin()
except KeyboardInterrupt:
print "Shutting down ROS Image tensorflow detector x"
cv2.destroyAllWindows()
if __name__ == '__main__':
main()