OpenShot Library | libopenshot  0.3.0
ObjectDetection.cpp
Go to the documentation of this file.
1 
10 // Copyright (c) 2008-2019 OpenShot Studios, LLC
11 //
12 // SPDX-License-Identifier: LGPL-3.0-or-later
13 
14 #include <fstream>
15 #include <iostream>
16 
18 #include "effects/Tracker.h"
19 #include "Exceptions.h"
20 #include "Timeline.h"
21 #include "objdetectdata.pb.h"
22 
23 #include <QImage>
24 #include <QPainter>
25 #include <QRectF>
26 using namespace std;
27 using namespace openshot;
28 
29 
31 ObjectDetection::ObjectDetection(std::string clipObDetectDataPath)
32 {
33  // Init effect properties
34  init_effect_details();
35 
36  // Tries to load the tracker data from protobuf
37  LoadObjDetectdData(clipObDetectDataPath);
38 
39  // Initialize the selected object index as the first object index
40  selectedObjectIndex = trackedObjects.begin()->first;
41 }
42 
43 // Default constructor
44 ObjectDetection::ObjectDetection()
45 {
46  // Init effect properties
47  init_effect_details();
48 
49  // Initialize the selected object index as the first object index
50  selectedObjectIndex = trackedObjects.begin()->first;
51 }
52 
53 // Init effect settings
54 void ObjectDetection::init_effect_details()
55 {
57  InitEffectInfo();
58 
60  info.class_name = "ObjectDetection";
61  info.name = "Object Detector";
62  info.description = "Detect objects through the video.";
63  info.has_audio = false;
64  info.has_video = true;
65  info.has_tracked_object = true;
66 }
67 
68 // This method is required for all derived classes of EffectBase, and returns a
69 // modified openshot::Frame object
70 std::shared_ptr<Frame> ObjectDetection::GetFrame(std::shared_ptr<Frame> frame, int64_t frame_number)
71 {
72  // Get the frame's image
73  cv::Mat cv_image = frame->GetImageCV();
74 
75  // Check if frame isn't NULL
76  if(cv_image.empty()){
77  return frame;
78  }
79 
80  // Initialize the Qt rectangle that will hold the positions of the bounding-box
81  std::vector<QRectF> boxRects;
82  // Initialize the image of the TrackedObject child clip
83  std::vector<std::shared_ptr<QImage>> childClipImages;
84 
85  // Check if track data exists for the requested frame
86  if (detectionsData.find(frame_number) != detectionsData.end()) {
87  float fw = cv_image.size().width;
88  float fh = cv_image.size().height;
89 
90  DetectionData detections = detectionsData[frame_number];
91  for(int i = 0; i<detections.boxes.size(); i++){
92 
93  // Does not show boxes with confidence below the threshold
94  if(detections.confidences.at(i) < confidence_threshold){
95  continue;
96  }
97  // Just display selected classes
98  if( display_classes.size() > 0 &&
99  std::find(display_classes.begin(), display_classes.end(), classNames[detections.classIds.at(i)]) == display_classes.end()){
100  continue;
101  }
102 
103  // Get the object id
104  int objectId = detections.objectIds.at(i);
105 
106  // Search for the object in the trackedObjects map
107  auto trackedObject_it = trackedObjects.find(objectId);
108 
109  // Cast the object as TrackedObjectBBox
110  std::shared_ptr<TrackedObjectBBox> trackedObject = std::static_pointer_cast<TrackedObjectBBox>(trackedObject_it->second);
111 
112  // Check if the tracked object has data for this frame
113  if (trackedObject->Contains(frame_number) &&
114  trackedObject->visible.GetValue(frame_number) == 1)
115  {
116  // Get the bounding-box of given frame
117  BBox trackedBox = trackedObject->GetBox(frame_number);
118  bool draw_text = !display_box_text.GetValue(frame_number);
119  std::vector<int> stroke_rgba = trackedObject->stroke.GetColorRGBA(frame_number);
120  int stroke_width = trackedObject->stroke_width.GetValue(frame_number);
121  float stroke_alpha = trackedObject->stroke_alpha.GetValue(frame_number);
122  std::vector<int> bg_rgba = trackedObject->background.GetColorRGBA(frame_number);
123  float bg_alpha = trackedObject->background_alpha.GetValue(frame_number);
124 
125  // Create a rotated rectangle object that holds the bounding box
126  // cv::RotatedRect box ( cv::Point2f( (int)(trackedBox.cx*fw), (int)(trackedBox.cy*fh) ),
127  // cv::Size2f( (int)(trackedBox.width*fw), (int)(trackedBox.height*fh) ),
128  // (int) (trackedBox.angle) );
129 
130  // DrawRectangleRGBA(cv_image, box, bg_rgba, bg_alpha, 1, true);
131  // DrawRectangleRGBA(cv_image, box, stroke_rgba, stroke_alpha, stroke_width, false);
132 
133 
134  cv::Rect2d box(
135  (int)( (trackedBox.cx-trackedBox.width/2)*fw),
136  (int)( (trackedBox.cy-trackedBox.height/2)*fh),
137  (int)( trackedBox.width*fw),
138  (int)( trackedBox.height*fh)
139  );
140 
141  // If the Draw Box property is off, then make the box invisible
142  if (trackedObject->draw_box.GetValue(frame_number) == 0)
143  {
144  bg_alpha = 1.0;
145  stroke_alpha = 1.0;
146  }
147 
148  drawPred(detections.classIds.at(i), detections.confidences.at(i),
149  box, cv_image, detections.objectIds.at(i), bg_rgba, bg_alpha, 1, true, draw_text);
150  drawPred(detections.classIds.at(i), detections.confidences.at(i),
151  box, cv_image, detections.objectIds.at(i), stroke_rgba, stroke_alpha, stroke_width, false, draw_text);
152 
153 
154  // Get the Detected Object's child clip
155  if (trackedObject->ChildClipId() != ""){
156  // Cast the parent timeline of this effect
157  Timeline* parentTimeline = (Timeline *) ParentTimeline();
158  if (parentTimeline){
159  // Get the Tracked Object's child clip
160  Clip* childClip = parentTimeline->GetClip(trackedObject->ChildClipId());
161 
162  if (childClip){
163  std::shared_ptr<Frame> f(new Frame(1, frame->GetWidth(), frame->GetHeight(), "#00000000"));
164  // Get the image of the child clip for this frame
165  std::shared_ptr<Frame> childClipFrame = childClip->GetFrame(f, frame_number);
166  childClipImages.push_back(childClipFrame->GetImage());
167 
168  // Set the Qt rectangle with the bounding-box properties
169  QRectF boxRect;
170  boxRect.setRect((int)((trackedBox.cx-trackedBox.width/2)*fw),
171  (int)((trackedBox.cy - trackedBox.height/2)*fh),
172  (int)(trackedBox.width*fw),
173  (int)(trackedBox.height*fh));
174  boxRects.push_back(boxRect);
175  }
176  }
177  }
178  }
179  }
180  }
181 
182  // Update Qt image with new Opencv frame
183  frame->SetImageCV(cv_image);
184 
185  // Set the bounding-box image with the Tracked Object's child clip image
186  if(boxRects.size() > 0){
187  // Get the frame image
188  QImage frameImage = *(frame->GetImage());
189  for(int i; i < boxRects.size();i++){
190  // Set a Qt painter to the frame image
191  QPainter painter(&frameImage);
192  // Draw the child clip image inside the bounding-box
193  painter.drawImage(boxRects[i], *childClipImages[i], QRectF(0, 0, frameImage.size().width(), frameImage.size().height()));
194  }
195  // Set the frame image as the composed image
196  frame->AddImage(std::make_shared<QImage>(frameImage));
197  }
198 
199  return frame;
200 }
201 
202 void ObjectDetection::DrawRectangleRGBA(cv::Mat &frame_image, cv::RotatedRect box, std::vector<int> color, float alpha,
203  int thickness, bool is_background){
204  // Get the bouding box vertices
205  cv::Point2f vertices2f[4];
206  box.points(vertices2f);
207 
208  // TODO: take a rectangle of frame_image by refencence and draw on top of that to improve speed
209  // select min enclosing rectangle to draw on a small portion of the image
210  // cv::Rect rect = box.boundingRect();
211  // cv::Mat image = frame_image(rect)
212 
213  if(is_background){
214  cv::Mat overlayFrame;
215  frame_image.copyTo(overlayFrame);
216 
217  // draw bounding box background
218  cv::Point vertices[4];
219  for(int i = 0; i < 4; ++i){
220  vertices[i] = vertices2f[i];}
221 
222  cv::Rect rect = box.boundingRect();
223  cv::fillConvexPoly(overlayFrame, vertices, 4, cv::Scalar(color[2],color[1],color[0]), cv::LINE_AA);
224  // add opacity
225  cv::addWeighted(overlayFrame, 1-alpha, frame_image, alpha, 0, frame_image);
226  }
227  else{
228  cv::Mat overlayFrame;
229  frame_image.copyTo(overlayFrame);
230 
231  // Draw bounding box
232  for (int i = 0; i < 4; i++)
233  {
234  cv::line(overlayFrame, vertices2f[i], vertices2f[(i+1)%4], cv::Scalar(color[2],color[1],color[0]),
235  thickness, cv::LINE_AA);
236  }
237 
238  // add opacity
239  cv::addWeighted(overlayFrame, 1-alpha, frame_image, alpha, 0, frame_image);
240  }
241 }
242 
243 void ObjectDetection::drawPred(int classId, float conf, cv::Rect2d box, cv::Mat& frame, int objectNumber, std::vector<int> color,
244  float alpha, int thickness, bool is_background, bool display_text)
245 {
246 
247  if(is_background){
248  cv::Mat overlayFrame;
249  frame.copyTo(overlayFrame);
250 
251  //Draw a rectangle displaying the bounding box
252  cv::rectangle(overlayFrame, box, cv::Scalar(color[2],color[1],color[0]), cv::FILLED);
253 
254  // add opacity
255  cv::addWeighted(overlayFrame, 1-alpha, frame, alpha, 0, frame);
256  }
257  else{
258  cv::Mat overlayFrame;
259  frame.copyTo(overlayFrame);
260 
261  //Draw a rectangle displaying the bounding box
262  cv::rectangle(overlayFrame, box, cv::Scalar(color[2],color[1],color[0]), thickness);
263 
264  if(display_text){
265  //Get the label for the class name and its confidence
266  std::string label = cv::format("%.2f", conf);
267  if (!classNames.empty())
268  {
269  CV_Assert(classId < (int)classNames.size());
270  label = classNames[classId] + ":" + label;
271  }
272 
273  //Display the label at the top of the bounding box
274  int baseLine;
275  cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
276 
277  double left = box.x;
278  double top = std::max((int)box.y, labelSize.height);
279 
280  cv::rectangle(overlayFrame, cv::Point(left, top - round(1.025*labelSize.height)), cv::Point(left + round(1.025*labelSize.width), top + baseLine),
281  cv::Scalar(color[2],color[1],color[0]), cv::FILLED);
282  putText(overlayFrame, label, cv::Point(left+1, top), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0,0,0),1);
283  }
284  // add opacity
285  cv::addWeighted(overlayFrame, 1-alpha, frame, alpha, 0, frame);
286  }
287 }
288 
289 // Load protobuf data file
290 bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){
291  // Create tracker message
292  pb_objdetect::ObjDetect objMessage;
293 
294  // Read the existing tracker message.
295  std::fstream input(inputFilePath, std::ios::in | std::ios::binary);
296  if (!objMessage.ParseFromIstream(&input)) {
297  std::cerr << "Failed to parse protobuf message." << std::endl;
298  return false;
299  }
300 
301  // Make sure classNames, detectionsData and trackedObjects are empty
302  classNames.clear();
303  detectionsData.clear();
304  trackedObjects.clear();
305 
306  // Seed to generate same random numbers
307  std::srand(1);
308  // Get all classes names and assign a color to them
309  for(int i = 0; i < objMessage.classnames_size(); i++)
310  {
311  classNames.push_back(objMessage.classnames(i));
312  classesColor.push_back(cv::Scalar(std::rand()%205 + 50, std::rand()%205 + 50, std::rand()%205 + 50));
313  }
314 
315  // Iterate over all frames of the saved message
316  for (size_t i = 0; i < objMessage.frame_size(); i++)
317  {
318  // Create protobuf message reader
319  const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);
320 
321  // Get frame Id
322  size_t id = pbFrameData.id();
323 
324  // Load bounding box data
325  const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();
326 
327  // Construct data vectors related to detections in the current frame
328  std::vector<int> classIds;
329  std::vector<float> confidences;
330  std::vector<cv::Rect_<float>> boxes;
331  std::vector<int> objectIds;
332 
333  // Iterate through the detected objects
334  for(int i = 0; i < pbFrameData.bounding_box_size(); i++)
335  {
336  // Get bounding box coordinates
337  float x = pBox.Get(i).x();
338  float y = pBox.Get(i).y();
339  float w = pBox.Get(i).w();
340  float h = pBox.Get(i).h();
341  // Get class Id (which will be assign to a class name)
342  int classId = pBox.Get(i).classid();
343  // Get prediction confidence
344  float confidence = pBox.Get(i).confidence();
345 
346  // Get the object Id
347  int objectId = pBox.Get(i).objectid();
348 
349  // Search for the object id on trackedObjects map
350  auto trackedObject = trackedObjects.find(objectId);
351  // Check if object already exists on the map
352  if (trackedObject != trackedObjects.end())
353  {
354  // Add a new BBox to it
355  trackedObject->second->AddBox(id, x+(w/2), y+(h/2), w, h, 0.0);
356  }
357  else
358  {
359  // There is no tracked object with that id, so insert a new one
360  TrackedObjectBBox trackedObj((int)classesColor[classId](0), (int)classesColor[classId](1), (int)classesColor[classId](2), (int)0);
361  trackedObj.AddBox(id, x+(w/2), y+(h/2), w, h, 0.0);
362 
363  std::shared_ptr<TrackedObjectBBox> trackedObjPtr = std::make_shared<TrackedObjectBBox>(trackedObj);
364  ClipBase* parentClip = this->ParentClip();
365  trackedObjPtr->ParentClip(parentClip);
366 
367  // Create a temp ID. This ID is necessary to initialize the object_id Json list
368  // this Id will be replaced by the one created in the UI
369  trackedObjPtr->Id(std::to_string(objectId));
370  trackedObjects.insert({objectId, trackedObjPtr});
371  }
372 
373  // Create OpenCV rectangle with the bouding box info
374  cv::Rect_<float> box(x, y, w, h);
375 
376  // Push back data into vectors
377  boxes.push_back(box);
378  classIds.push_back(classId);
379  confidences.push_back(confidence);
380  objectIds.push_back(objectId);
381  }
382 
383  // Assign data to object detector map
384  detectionsData[id] = DetectionData(classIds, confidences, boxes, id, objectIds);
385  }
386 
387  // Delete all global objects allocated by libprotobuf.
388  google::protobuf::ShutdownProtobufLibrary();
389 
390  return true;
391 }
392 
393 // Get the indexes and IDs of all visible objects in the given frame
394 std::string ObjectDetection::GetVisibleObjects(int64_t frame_number) const{
395 
396  // Initialize the JSON objects
397  Json::Value root;
398  root["visible_objects_index"] = Json::Value(Json::arrayValue);
399  root["visible_objects_id"] = Json::Value(Json::arrayValue);
400 
401  // Check if track data exists for the requested frame
402  if (detectionsData.find(frame_number) == detectionsData.end()){
403  return root.toStyledString();
404  }
405  DetectionData detections = detectionsData.at(frame_number);
406 
407  // Iterate through the tracked objects
408  for(int i = 0; i<detections.boxes.size(); i++){
409  // Does not show boxes with confidence below the threshold
410  if(detections.confidences.at(i) < confidence_threshold){
411  continue;
412  }
413 
414  // Just display selected classes
415  if( display_classes.size() > 0 &&
416  std::find(display_classes.begin(), display_classes.end(), classNames[detections.classIds.at(i)]) == display_classes.end()){
417  continue;
418  }
419 
420  int objectId = detections.objectIds.at(i);
421  // Search for the object in the trackedObjects map
422  auto trackedObject = trackedObjects.find(objectId);
423 
424  // Get the tracked object JSON properties for this frame
425  Json::Value trackedObjectJSON = trackedObject->second->PropertiesJSON(frame_number);
426 
427  if (trackedObjectJSON["visible"]["value"].asBool() &&
428  trackedObject->second->ExactlyContains(frame_number)){
429  // Save the object's index and ID if it's visible in this frame
430  root["visible_objects_index"].append(trackedObject->first);
431  root["visible_objects_id"].append(trackedObject->second->Id());
432  }
433  }
434 
435  return root.toStyledString();
436 }
437 
438 // Generate JSON string of this object
439 std::string ObjectDetection::Json() const {
440 
441  // Return formatted string
442  return JsonValue().toStyledString();
443 }
444 
445 // Generate Json::Value for this object
446 Json::Value ObjectDetection::JsonValue() const {
447 
448  // Create root json object
449  Json::Value root = EffectBase::JsonValue(); // get parent properties
450  root["type"] = info.class_name;
451  root["protobuf_data_path"] = protobuf_data_path;
452  root["selected_object_index"] = selectedObjectIndex;
453  root["confidence_threshold"] = confidence_threshold;
454  root["display_box_text"] = display_box_text.JsonValue();
455 
456  // Add tracked object's IDs to root
457  Json::Value objects;
458  for (auto const& trackedObject : trackedObjects){
459  Json::Value trackedObjectJSON = trackedObject.second->JsonValue();
460  // add object json
461  objects[trackedObject.second->Id()] = trackedObjectJSON;
462  }
463  root["objects"] = objects;
464 
465  // return JsonValue
466  return root;
467 }
468 
469 // Load JSON string into this object
470 void ObjectDetection::SetJson(const std::string value) {
471 
472  // Parse JSON string into JSON objects
473  try
474  {
475  const Json::Value root = openshot::stringToJson(value);
476  // Set all values that match
477  SetJsonValue(root);
478  }
479  catch (const std::exception& e)
480  {
481  // Error parsing JSON (or missing keys)
482  throw InvalidJSON("JSON is invalid (missing keys or invalid data types)");
483  }
484 }
485 
486 // Load Json::Value into this object
487 void ObjectDetection::SetJsonValue(const Json::Value root) {
488  // Set parent data
489  EffectBase::SetJsonValue(root);
490 
491  // Set data from Json (if key is found)
492  if (!root["protobuf_data_path"].isNull() && protobuf_data_path.size() <= 1){
493  protobuf_data_path = root["protobuf_data_path"].asString();
494 
495  if(!LoadObjDetectdData(protobuf_data_path)){
496  throw InvalidFile("Invalid protobuf data path", "");
497  protobuf_data_path = "";
498  }
499  }
500 
501  // Set the selected object index
502  if (!root["selected_object_index"].isNull())
503  selectedObjectIndex = root["selected_object_index"].asInt();
504 
505  if (!root["confidence_threshold"].isNull())
506  confidence_threshold = root["confidence_threshold"].asFloat();
507 
508  if (!root["display_box_text"].isNull())
509  display_box_text.SetJsonValue(root["display_box_text"]);
510 
511  if (!root["class_filter"].isNull()){
512  class_filter = root["class_filter"].asString();
513  std::stringstream ss(class_filter);
514  display_classes.clear();
515  while( ss.good() )
516  {
517  // Parse comma separated string
518  std::string substr;
519  std::getline( ss, substr, ',' );
520  display_classes.push_back( substr );
521  }
522  }
523 
524  if (!root["objects"].isNull()){
525  for (auto const& trackedObject : trackedObjects){
526  std::string obj_id = std::to_string(trackedObject.first);
527  if(!root["objects"][obj_id].isNull()){
528  trackedObject.second->SetJsonValue(root["objects"][obj_id]);
529  }
530  }
531  }
532 
533  // Set the tracked object's ids
534  if (!root["objects_id"].isNull()){
535  for (auto const& trackedObject : trackedObjects){
536  Json::Value trackedObjectJSON;
537  trackedObjectJSON["box_id"] = root["objects_id"][trackedObject.first].asString();
538  trackedObject.second->SetJsonValue(trackedObjectJSON);
539  }
540  }
541 }
542 
543 // Get all properties for a specific frame
544 std::string ObjectDetection::PropertiesJSON(int64_t requested_frame) const {
545 
546  // Generate JSON properties list
547  Json::Value root;
548 
549  Json::Value objects;
550  if(trackedObjects.count(selectedObjectIndex) != 0){
551  auto selectedObject = trackedObjects.at(selectedObjectIndex);
552  if (selectedObject){
553  Json::Value trackedObjectJSON = selectedObject->PropertiesJSON(requested_frame);
554  // add object json
555  objects[selectedObject->Id()] = trackedObjectJSON;
556  }
557  }
558  root["objects"] = objects;
559 
560  root["selected_object_index"] = add_property_json("Selected Object", selectedObjectIndex, "int", "", NULL, 0, 200, false, requested_frame);
561  root["id"] = add_property_json("ID", 0.0, "string", Id(), NULL, -1, -1, true, requested_frame);
562  root["position"] = add_property_json("Position", Position(), "float", "", NULL, 0, 1000 * 60 * 30, false, requested_frame);
563  root["layer"] = add_property_json("Track", Layer(), "int", "", NULL, 0, 20, false, requested_frame);
564  root["start"] = add_property_json("Start", Start(), "float", "", NULL, 0, 1000 * 60 * 30, false, requested_frame);
565  root["end"] = add_property_json("End", End(), "float", "", NULL, 0, 1000 * 60 * 30, false, requested_frame);
566  root["duration"] = add_property_json("Duration", Duration(), "float", "", NULL, 0, 1000 * 60 * 30, true, requested_frame);
567  root["confidence_threshold"] = add_property_json("Confidence Theshold", confidence_threshold, "float", "", NULL, 0, 1, false, requested_frame);
568  root["class_filter"] = add_property_json("Class Filter", 0.0, "string", class_filter, NULL, -1, -1, false, requested_frame);
569 
570  root["display_box_text"] = add_property_json("Draw Box Text", display_box_text.GetValue(requested_frame), "int", "", &display_box_text, 0, 1.0, false, requested_frame);
571  root["display_box_text"]["choices"].append(add_property_choice_json("Off", 1, display_box_text.GetValue(requested_frame)));
572  root["display_box_text"]["choices"].append(add_property_choice_json("On", 0, display_box_text.GetValue(requested_frame)));
573 
574  // Return formatted string
575  return root.toStyledString();
576 }
std::vector< cv::Rect_< float > > boxes
Header file for Tracker effect class.
float cy
y-coordinate of the bounding box center
This class represents a single frame of video (i.e. image & audio data)
Definition: Frame.h:90
float height
bounding box height
std::vector< int > classIds
Header file for Object Detection effect class.
openshot::Clip * GetClip(const std::string &id)
Look up a single clip by ID.
Definition: Timeline.cpp:408
const Json::Value stringToJson(const std::string value)
Definition: Json.cpp:16
Header file for Timeline class.
std::vector< int > objectIds
Header file for all Exception classes.
This class represents a clip (used to arrange readers on the timeline)
Definition: Clip.h:90
Exception for files that can not be found or opened.
Definition: Exceptions.h:187
float width
bounding box width
This abstract class is the base class, used by all clips in libopenshot.
Definition: ClipBase.h:33
This struct holds the information of a bounding-box.
This namespace is the default namespace for all code in the openshot library.
Definition: Compressor.h:28
float cx
x-coordinate of the bounding box center
std::vector< float > confidences
Exception for invalid JSON.
Definition: Exceptions.h:217
void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle) override
Add a BBox to the BoxVec map.
std::shared_ptr< openshot::Frame > GetFrame(int64_t frame_number) override
Get an openshot::Frame object for a specific frame number of this clip. The image size and number of ...
Definition: Clip.cpp:378
This class contains the properties of a tracked object and functions to manipulate it...
This class represents a timeline.
Definition: Timeline.h:150