OpenShot Library | libopenshot  0.3.0
ObjectDetection.cpp
Go to the documentation of this file.
1 
10 // Copyright (c) 2008-2019 OpenShot Studios, LLC
11 //
12 // SPDX-License-Identifier: LGPL-3.0-or-later
13 
14 #include <fstream>
15 #include <iostream>
16 
18 #include "effects/Tracker.h"
19 #include "Exceptions.h"
20 #include "Timeline.h"
21 #include "objdetectdata.pb.h"
22 
23 #include <QImage>
24 #include <QPainter>
25 #include <QRectF>
26 using namespace std;
27 using namespace openshot;
28 
29 
31 ObjectDetection::ObjectDetection(std::string clipObDetectDataPath)
32 {
33  // Init effect properties
34  init_effect_details();
35 
36  // Tries to load the tracker data from protobuf
37  LoadObjDetectdData(clipObDetectDataPath);
38 
39  // Initialize the selected object index as the first object index
40  selectedObjectIndex = trackedObjects.begin()->first;
41 }
42 
43 // Default constructor
44 ObjectDetection::ObjectDetection()
45 {
46  // Init effect properties
47  init_effect_details();
48 
49  // Initialize the selected object index as the first object index
50  selectedObjectIndex = trackedObjects.begin()->first;
51 }
52 
53 // Init effect settings
54 void ObjectDetection::init_effect_details()
55 {
57  InitEffectInfo();
58 
60  info.class_name = "ObjectDetection";
61  info.name = "Object Detector";
62  info.description = "Detect objects through the video.";
63  info.has_audio = false;
64  info.has_video = true;
65  info.has_tracked_object = true;
66 }
67 
68 // This method is required for all derived classes of EffectBase, and returns a
69 // modified openshot::Frame object
70 std::shared_ptr<Frame> ObjectDetection::GetFrame(std::shared_ptr<Frame> frame, int64_t frame_number)
71 {
72  // Get the frame's image
73  cv::Mat cv_image = frame->GetImageCV();
74 
75  // Check if frame isn't NULL
76  if(cv_image.empty()){
77  return frame;
78  }
79 
80  // Initialize the Qt rectangle that will hold the positions of the bounding-box
81  std::vector<QRectF> boxRects;
82  // Initialize the image of the TrackedObject child clip
83  std::vector<std::shared_ptr<QImage>> childClipImages;
84 
85  // Check if track data exists for the requested frame
86  if (detectionsData.find(frame_number) != detectionsData.end()) {
87  float fw = cv_image.size().width;
88  float fh = cv_image.size().height;
89 
90  DetectionData detections = detectionsData[frame_number];
91  for(int i = 0; i<detections.boxes.size(); i++){
92 
93  // Does not show boxes with confidence below the threshold
94  if(detections.confidences.at(i) < confidence_threshold){
95  continue;
96  }
97  // Just display selected classes
98  if( display_classes.size() > 0 &&
99  std::find(display_classes.begin(), display_classes.end(), classNames[detections.classIds.at(i)]) == display_classes.end()){
100  continue;
101  }
102 
103  // Get the object id
104  int objectId = detections.objectIds.at(i);
105 
106  // Search for the object in the trackedObjects map
107  auto trackedObject_it = trackedObjects.find(objectId);
108 
109  // Cast the object as TrackedObjectBBox
110  std::shared_ptr<TrackedObjectBBox> trackedObject = std::static_pointer_cast<TrackedObjectBBox>(trackedObject_it->second);
111 
112  // Check if the tracked object has data for this frame
113  if (trackedObject->Contains(frame_number) &&
114  trackedObject->visible.GetValue(frame_number) == 1)
115  {
116  // Get the bounding-box of given frame
117  BBox trackedBox = trackedObject->GetBox(frame_number);
118  bool draw_text = !display_box_text.GetValue(frame_number);
119  std::vector<int> stroke_rgba = trackedObject->stroke.GetColorRGBA(frame_number);
120  int stroke_width = trackedObject->stroke_width.GetValue(frame_number);
121  float stroke_alpha = trackedObject->stroke_alpha.GetValue(frame_number);
122  std::vector<int> bg_rgba = trackedObject->background.GetColorRGBA(frame_number);
123  float bg_alpha = trackedObject->background_alpha.GetValue(frame_number);
124 
125  // Create a rotated rectangle object that holds the bounding box
126  // cv::RotatedRect box ( cv::Point2f( (int)(trackedBox.cx*fw), (int)(trackedBox.cy*fh) ),
127  // cv::Size2f( (int)(trackedBox.width*fw), (int)(trackedBox.height*fh) ),
128  // (int) (trackedBox.angle) );
129 
130  // DrawRectangleRGBA(cv_image, box, bg_rgba, bg_alpha, 1, true);
131  // DrawRectangleRGBA(cv_image, box, stroke_rgba, stroke_alpha, stroke_width, false);
132 
133 
134  cv::Rect2d box(
135  (int)( (trackedBox.cx-trackedBox.width/2)*fw),
136  (int)( (trackedBox.cy-trackedBox.height/2)*fh),
137  (int)( trackedBox.width*fw),
138  (int)( trackedBox.height*fh)
139  );
140 
141  // If the Draw Box property is off, then make the box invisible
142  if (trackedObject->draw_box.GetValue(frame_number) == 0)
143  {
144  bg_alpha = 1.0;
145  stroke_alpha = 1.0;
146  }
147 
148  drawPred(detections.classIds.at(i), detections.confidences.at(i),
149  box, cv_image, detections.objectIds.at(i), bg_rgba, bg_alpha, 1, true, draw_text);
150  drawPred(detections.classIds.at(i), detections.confidences.at(i),
151  box, cv_image, detections.objectIds.at(i), stroke_rgba, stroke_alpha, stroke_width, false, draw_text);
152 
153 
154  // Get the Detected Object's child clip
155  if (trackedObject->ChildClipId() != ""){
156  // Cast the parent timeline of this effect
157  Timeline* parentTimeline = (Timeline *) ParentTimeline();
158  if (parentTimeline){
159  // Get the Tracked Object's child clip
160  Clip* childClip = parentTimeline->GetClip(trackedObject->ChildClipId());
161 
162  if (childClip){
163  std::shared_ptr<Frame> f(new Frame(1, frame->GetWidth(), frame->GetHeight(), "#00000000"));
164  // Get the image of the child clip for this frame
165  std::shared_ptr<Frame> childClipFrame = childClip->GetFrame(f, frame_number);
166  childClipImages.push_back(childClipFrame->GetImage());
167 
168  // Set the Qt rectangle with the bounding-box properties
169  QRectF boxRect;
170  boxRect.setRect((int)((trackedBox.cx-trackedBox.width/2)*fw),
171  (int)((trackedBox.cy - trackedBox.height/2)*fh),
172  (int)(trackedBox.width*fw),
173  (int)(trackedBox.height*fh));
174  boxRects.push_back(boxRect);
175  }
176  }
177  }
178  }
179  }
180  }
181 
182  // Update Qt image with new Opencv frame
183  frame->SetImageCV(cv_image);
184 
185  // Set the bounding-box image with the Tracked Object's child clip image
186  if(boxRects.size() > 0){
187  // Get the frame image
188  QImage frameImage = *(frame->GetImage());
189  for(int i; i < boxRects.size();i++){
190  // Set a Qt painter to the frame image
191  QPainter painter(&frameImage);
192  // Draw the child clip image inside the bounding-box
193  painter.drawImage(boxRects[i], *childClipImages[i], QRectF(0, 0, frameImage.size().width(), frameImage.size().height()));
194  }
195  // Set the frame image as the composed image
196  frame->AddImage(std::make_shared<QImage>(frameImage));
197  }
198 
199  return frame;
200 }
201 
202 void ObjectDetection::DrawRectangleRGBA(cv::Mat &frame_image, cv::RotatedRect box, std::vector<int> color, float alpha,
203  int thickness, bool is_background){
204  // Get the bouding box vertices
205  cv::Point2f vertices2f[4];
206  box.points(vertices2f);
207 
208  // TODO: take a rectangle of frame_image by refencence and draw on top of that to improve speed
209  // select min enclosing rectangle to draw on a small portion of the image
210  // cv::Rect rect = box.boundingRect();
211  // cv::Mat image = frame_image(rect)
212 
213  if(is_background){
214  cv::Mat overlayFrame;
215  frame_image.copyTo(overlayFrame);
216 
217  // draw bounding box background
218  cv::Point vertices[4];
219  for(int i = 0; i < 4; ++i){
220  vertices[i] = vertices2f[i];}
221 
222  cv::Rect rect = box.boundingRect();
223  cv::fillConvexPoly(overlayFrame, vertices, 4, cv::Scalar(color[2],color[1],color[0]), cv::LINE_AA);
224  // add opacity
225  cv::addWeighted(overlayFrame, 1-alpha, frame_image, alpha, 0, frame_image);
226  }
227  else{
228  cv::Mat overlayFrame;
229  frame_image.copyTo(overlayFrame);
230 
231  // Draw bounding box
232  for (int i = 0; i < 4; i++)
233  {
234  cv::line(overlayFrame, vertices2f[i], vertices2f[(i+1)%4], cv::Scalar(color[2],color[1],color[0]),
235  thickness, cv::LINE_AA);
236  }
237 
238  // add opacity
239  cv::addWeighted(overlayFrame, 1-alpha, frame_image, alpha, 0, frame_image);
240  }
241 }
242 
243 void ObjectDetection::drawPred(int classId, float conf, cv::Rect2d box, cv::Mat& frame, int objectNumber, std::vector<int> color,
244  float alpha, int thickness, bool is_background, bool display_text)
245 {
246 
247  if(is_background){
248  cv::Mat overlayFrame;
249  frame.copyTo(overlayFrame);
250 
251  //Draw a rectangle displaying the bounding box
252  cv::rectangle(overlayFrame, box, cv::Scalar(color[2],color[1],color[0]), cv::FILLED);
253 
254  // add opacity
255  cv::addWeighted(overlayFrame, 1-alpha, frame, alpha, 0, frame);
256  }
257  else{
258  cv::Mat overlayFrame;
259  frame.copyTo(overlayFrame);
260 
261  //Draw a rectangle displaying the bounding box
262  cv::rectangle(overlayFrame, box, cv::Scalar(color[2],color[1],color[0]), thickness);
263 
264  if(display_text){
265  //Get the label for the class name and its confidence
266  std::string label = cv::format("%.2f", conf);
267  if (!classNames.empty())
268  {
269  CV_Assert(classId < (int)classNames.size());
270  label = classNames[classId] + ":" + label;
271  }
272 
273  //Display the label at the top of the bounding box
274  int baseLine;
275  cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
276 
277  double left = box.x;
278  double top = std::max((int)box.y, labelSize.height);
279 
280  cv::rectangle(overlayFrame, cv::Point(left, top - round(1.025*labelSize.height)), cv::Point(left + round(1.025*labelSize.width), top + baseLine),
281  cv::Scalar(color[2],color[1],color[0]), cv::FILLED);
282  putText(overlayFrame, label, cv::Point(left+1, top), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0,0,0),1);
283  }
284  // add opacity
285  cv::addWeighted(overlayFrame, 1-alpha, frame, alpha, 0, frame);
286  }
287 }
288 
289 // Load protobuf data file
290 bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){
291  // Create tracker message
292  pb_objdetect::ObjDetect objMessage;
293 
294  // Read the existing tracker message.
295  std::fstream input(inputFilePath, std::ios::in | std::ios::binary);
296  if (!objMessage.ParseFromIstream(&input)) {
297  std::cerr << "Failed to parse protobuf message." << std::endl;
298  return false;
299  }
300 
301  // Make sure classNames, detectionsData and trackedObjects are empty
302  classNames.clear();
303  detectionsData.clear();
304  trackedObjects.clear();
305 
306  // Seed to generate same random numbers
307  std::srand(1);
308  // Get all classes names and assign a color to them
309  for(int i = 0; i < objMessage.classnames_size(); i++)
310  {
311  classNames.push_back(objMessage.classnames(i));
312  classesColor.push_back(cv::Scalar(std::rand()%205 + 50, std::rand()%205 + 50, std::rand()%205 + 50));
313  }
314 
315  // Iterate over all frames of the saved message
316  for (size_t i = 0; i < objMessage.frame_size(); i++)
317  {
318  // Create protobuf message reader
319  const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);
320 
321  // Get frame Id
322  size_t id = pbFrameData.id();
323 
324  // Load bounding box data
325  const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();
326 
327  // Construct data vectors related to detections in the current frame
328  std::vector<int> classIds;
329  std::vector<float> confidences;
330  std::vector<cv::Rect_<float>> boxes;
331  std::vector<int> objectIds;
332 
333  // Iterate through the detected objects
334  for(int i = 0; i < pbFrameData.bounding_box_size(); i++)
335  {
336  // Get bounding box coordinates
337  float x = pBox.Get(i).x();
338  float y = pBox.Get(i).y();
339  float w = pBox.Get(i).w();
340  float h = pBox.Get(i).h();
341  // Get class Id (which will be assign to a class name)
342  int classId = pBox.Get(i).classid();
343  // Get prediction confidence
344  float confidence = pBox.Get(i).confidence();
345 
346  // Get the object Id
347  int objectId = pBox.Get(i).objectid();
348 
349  // Search for the object id on trackedObjects map
350  auto trackedObject = trackedObjects.find(objectId);
351  // Check if object already exists on the map
352  if (trackedObject != trackedObjects.end())
353  {
354  // Add a new BBox to it
355  trackedObject->second->AddBox(id, x+(w/2), y+(h/2), w, h, 0.0);
356  }
357  else
358  {
359  // There is no tracked object with that id, so insert a new one
360  TrackedObjectBBox trackedObj((int)classesColor[classId](0), (int)classesColor[classId](1), (int)classesColor[classId](2), (int)0);
361  trackedObj.AddBox(id, x+(w/2), y+(h/2), w, h, 0.0);
362 
363  std::shared_ptr<TrackedObjectBBox> trackedObjPtr = std::make_shared<TrackedObjectBBox>(trackedObj);
364  ClipBase* parentClip = this->ParentClip();
365  trackedObjPtr->ParentClip(parentClip);
366 
367  // Create a temp ID. This ID is necessary to initialize the object_id Json list
368  // this Id will be replaced by the one created in the UI
369  trackedObjPtr->Id(std::to_string(objectId));
370  trackedObjects.insert({objectId, trackedObjPtr});
371  }
372 
373  // Create OpenCV rectangle with the bouding box info
374  cv::Rect_<float> box(x, y, w, h);
375 
376  // Push back data into vectors
377  boxes.push_back(box);
378  classIds.push_back(classId);
379  confidences.push_back(confidence);
380  objectIds.push_back(objectId);
381  }
382 
383  // Assign data to object detector map
384  detectionsData[id] = DetectionData(classIds, confidences, boxes, id, objectIds);
385  }
386 
387  // Delete all global objects allocated by libprotobuf.
388  google::protobuf::ShutdownProtobufLibrary();
389 
390  return true;
391 }
392 
393 // Get the indexes and IDs of all visible objects in the given frame
394 std::string ObjectDetection::GetVisibleObjects(int64_t frame_number) const{
395 
396  // Initialize the JSON objects
397  Json::Value root;
398  root["visible_objects_index"] = Json::Value(Json::arrayValue);
399  root["visible_objects_id"] = Json::Value(Json::arrayValue);
400 
401  // Check if track data exists for the requested frame
402  if (detectionsData.find(frame_number) == detectionsData.end()){
403  return root.toStyledString();
404  }
405  DetectionData detections = detectionsData.at(frame_number);
406 
407  // Iterate through the tracked objects
408  for(int i = 0; i<detections.boxes.size(); i++){
409  // Does not show boxes with confidence below the threshold
410  if(detections.confidences.at(i) < confidence_threshold){
411  continue;
412  }
413 
414  // Just display selected classes
415  if( display_classes.size() > 0 &&
416  std::find(display_classes.begin(), display_classes.end(), classNames[detections.classIds.at(i)]) == display_classes.end()){
417  continue;
418  }
419 
420  int objectId = detections.objectIds.at(i);
421  // Search for the object in the trackedObjects map
422  auto trackedObject = trackedObjects.find(objectId);
423 
424  // Get the tracked object JSON properties for this frame
425  Json::Value trackedObjectJSON = trackedObject->second->PropertiesJSON(frame_number);
426 
427  if (trackedObjectJSON["visible"]["value"].asBool() &&
428  trackedObject->second->ExactlyContains(frame_number)){
429  // Save the object's index and ID if it's visible in this frame
430  root["visible_objects_index"].append(trackedObject->first);
431  root["visible_objects_id"].append(trackedObject->second->Id());
432  }
433  }
434 
435  return root.toStyledString();
436 }
437 
438 // Generate JSON string of this object
439 std::string ObjectDetection::Json() const {
440 
441  // Return formatted string
442  return JsonValue().toStyledString();
443 }
444 
445 // Generate Json::Value for this object
446 Json::Value ObjectDetection::JsonValue() const {
447 
448  // Create root json object
449  Json::Value root = EffectBase::JsonValue(); // get parent properties
450  root["type"] = info.class_name;
451  root["protobuf_data_path"] = protobuf_data_path;
452  root["selected_object_index"] = selectedObjectIndex;
453  root["confidence_threshold"] = confidence_threshold;
454  root["display_box_text"] = display_box_text.JsonValue();
455 
456  // Add tracked object's IDs to root
457  Json::Value objects;
458  for (auto const& trackedObject : trackedObjects){
459  Json::Value trackedObjectJSON = trackedObject.second->JsonValue();
460  // add object json
461  objects[trackedObject.second->Id()] = trackedObjectJSON;
462  }
463  root["objects"] = objects;
464 
465  // return JsonValue
466  return root;
467 }
468 
469 // Load JSON string into this object
470 void ObjectDetection::SetJson(const std::string value) {
471 
472  // Parse JSON string into JSON objects
473  try
474  {
475  const Json::Value root = openshot::stringToJson(value);
476  // Set all values that match
477  SetJsonValue(root);
478  }
479  catch (const std::exception& e)
480  {
481  // Error parsing JSON (or missing keys)
482  throw InvalidJSON("JSON is invalid (missing keys or invalid data types)");
483  }
484 }
485 
486 // Load Json::Value into this object
487 void ObjectDetection::SetJsonValue(const Json::Value root) {
488  // Set parent data
489  EffectBase::SetJsonValue(root);
490 
491  // Set data from Json (if key is found)
492  if (!root["protobuf_data_path"].isNull() && protobuf_data_path.size() <= 1){
493  protobuf_data_path = root["protobuf_data_path"].asString();
494 
495  if(!LoadObjDetectdData(protobuf_data_path)){
496  throw InvalidFile("Invalid protobuf data path", "");
497  protobuf_data_path = "";
498  }
499  }
500 
501  // Set the selected object index
502  if (!root["selected_object_index"].isNull())
503  selectedObjectIndex = root["selected_object_index"].asInt();
504 
505  if (!root["confidence_threshold"].isNull())
506  confidence_threshold = root["confidence_threshold"].asFloat();
507 
508  if (!root["display_box_text"].isNull())
509  display_box_text.SetJsonValue(root["display_box_text"]);
510 
511  if (!root["class_filter"].isNull()){
512  class_filter = root["class_filter"].asString();
513  std::stringstream ss(class_filter);
514  display_classes.clear();
515  while( ss.good() )
516  {
517  // Parse comma separated string
518  std::string substr;
519  std::getline( ss, substr, ',' );
520  display_classes.push_back( substr );
521  }
522  }
523 
524  if (!root["objects"].isNull()){
525  for (auto const& trackedObject : trackedObjects){
526  std::string obj_id = std::to_string(trackedObject.first);
527  if(!root["objects"][obj_id].isNull()){
528  trackedObject.second->SetJsonValue(root["objects"][obj_id]);
529  }
530  }
531  }
532 
533  // Set the tracked object's ids
534  if (!root["objects_id"].isNull()){
535  for (auto const& trackedObject : trackedObjects){
536  Json::Value trackedObjectJSON;
537  trackedObjectJSON["box_id"] = root["objects_id"][trackedObject.first].asString();
538  trackedObject.second->SetJsonValue(trackedObjectJSON);
539  }
540  }
541 }
542 
543 // Get all properties for a specific frame
544 std::string ObjectDetection::PropertiesJSON(int64_t requested_frame) const {
545 
546  // Generate JSON properties list
547  Json::Value root;
548 
549  Json::Value objects;
550  if(trackedObjects.count(selectedObjectIndex) != 0){
551  auto selectedObject = trackedObjects.at(selectedObjectIndex);
552  if (selectedObject){
553  Json::Value trackedObjectJSON = selectedObject->PropertiesJSON(requested_frame);
554  // add object json
555  objects[selectedObject->Id()] = trackedObjectJSON;
556  }
557  }
558  root["objects"] = objects;
559 
560  root["selected_object_index"] = add_property_json("Selected Object", selectedObjectIndex, "int", "", NULL, 0, 200, false, requested_frame);
561  root["id"] = add_property_json("ID", 0.0, "string", Id(), NULL, -1, -1, true, requested_frame);
562  root["position"] = add_property_json("Position", Position(), "float", "", NULL, 0, 1000 * 60 * 30, false, requested_frame);
563  root["layer"] = add_property_json("Track", Layer(), "int", "", NULL, 0, 20, false, requested_frame);
564  root["start"] = add_property_json("Start", Start(), "float", "", NULL, 0, 1000 * 60 * 30, false, requested_frame);
565  root["end"] = add_property_json("End", End(), "float", "", NULL, 0, 1000 * 60 * 30, false, requested_frame);
566  root["duration"] = add_property_json("Duration", Duration(), "float", "", NULL, 0, 1000 * 60 * 30, true, requested_frame);
567  root["confidence_threshold"] = add_property_json("Confidence Theshold", confidence_threshold, "float", "", NULL, 0, 1, false, requested_frame);
568  root["class_filter"] = add_property_json("Class Filter", 0.0, "string", class_filter, NULL, -1, -1, false, requested_frame);
569 
570  root["display_box_text"] = add_property_json("Draw Box Text", display_box_text.GetValue(requested_frame), "int", "", &display_box_text, 0, 1.0, false, requested_frame);
571  root["display_box_text"]["choices"].append(add_property_choice_json("Off", 1, display_box_text.GetValue(requested_frame)));
572  root["display_box_text"]["choices"].append(add_property_choice_json("On", 0, display_box_text.GetValue(requested_frame)));
573 
574  // Return formatted string
575  return root.toStyledString();
576 }
Header file for all Exception classes.
Header file for Object Detection effect class.
Header file for Timeline class.
Header file for Tracker effect class.
This abstract class is the base class, used by all clips in libopenshot.
Definition: ClipBase.h:33
This class represents a clip (used to arrange readers on the timeline)
Definition: Clip.h:90
std::shared_ptr< openshot::Frame > GetFrame(int64_t frame_number) override
Get an openshot::Frame object for a specific frame number of this clip. The image size and number of ...
Definition: Clip.cpp:378
This class represents a single frame of video (i.e. image & audio data)
Definition: Frame.h:91
Exception for files that can not be found or opened.
Definition: Exceptions.h:188
Exception for invalid JSON.
Definition: Exceptions.h:218
This class represents a timeline.
Definition: Timeline.h:150
openshot::Clip * GetClip(const std::string &id)
Look up a single clip by ID.
Definition: Timeline.cpp:408
This class contains the properties of a tracked object and functions to manipulate it.
void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle) override
Add a BBox to the BoxVec map.
This namespace is the default namespace for all code in the openshot library.
Definition: Compressor.h:29
const Json::Value stringToJson(const std::string value)
Definition: Json.cpp:16
std::vector< cv::Rect_< float > > boxes
std::vector< float > confidences
std::vector< int > classIds
std::vector< int > objectIds
This struct holds the information of a bounding-box.
float cy
y-coordinate of the bounding box center
float height
bounding box height
float cx
x-coordinate of the bounding box center
float width
bounding box width