perf: Add more moves and optimize (#3845)

* Make slice constructor consistent

* Add more missing std::move for ref steals

* Add missing perfect forwarding for arg_v ctor

* Add missing move in arg_v constructor

* Revert "Add missing move in arg_v constructor"

This reverts commit 126fc7c524ea7a51b54720defd75de3470d69557.

* Add another missing move in cast.h

* Optimize object move ctor

* Don't do useless move

* Make move ctor same as nb

* Make obj move ctor same as nb

* Revert changes which break MSVC
diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h
index d45b49c..07a56e7 100644
--- a/include/pybind11/cast.h
+++ b/include/pybind11/cast.h
@@ -1243,8 +1243,8 @@
 private:
     template <typename T>
     arg_v(arg &&base, T &&x, const char *descr = nullptr)
-        : arg(base), value(reinterpret_steal<object>(
-                         detail::make_caster<T>::cast(x, return_value_policy::automatic, {}))),
+        : arg(base), value(reinterpret_steal<object>(detail::make_caster<T>::cast(
+                         std::forward<T>(x), return_value_policy::automatic, {}))),
           descr(descr)
 #if !defined(NDEBUG)
           ,
@@ -1491,7 +1491,7 @@
                                                         type_id<T>());
 #endif
         }
-        args_list.append(o);
+        args_list.append(std::move(o));
     }
 
     void process(list &args_list, detail::args_proxy ap) {
diff --git a/include/pybind11/numpy.h b/include/pybind11/numpy.h
index 7624c9f..d45fe42 100644
--- a/include/pybind11/numpy.h
+++ b/include/pybind11/numpy.h
@@ -640,9 +640,9 @@
 
         list names, formats, offsets;
         for (auto &descr : field_descriptors) {
-            names.append(descr.name);
-            formats.append(descr.format);
-            offsets.append(descr.offset);
+            names.append(std::move(descr.name));
+            formats.append(std::move(descr.format));
+            offsets.append(std::move(descr.offset));
         }
         return dtype(std::move(names), std::move(formats), std::move(offsets), itemsize);
     }
diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h
index 18cd715..ba0fda0 100644
--- a/include/pybind11/pytypes.h
+++ b/include/pybind11/pytypes.h
@@ -268,10 +268,7 @@
     /// Copy constructor; always increases the reference count
     object(const object &o) : handle(o) { inc_ref(); }
     /// Move constructor; steals the object from ``other`` and preserves its reference count
-    object(object &&other) noexcept {
-        m_ptr = other.m_ptr;
-        other.m_ptr = nullptr;
-    }
+    object(object &&other) noexcept : handle(other) { other.m_ptr = nullptr; }
     /// Destructor; automatically calls `handle::dec_ref()`
     ~object() { dec_ref(); }
 
@@ -1519,8 +1516,8 @@
 class slice : public object {
 public:
     PYBIND11_OBJECT_DEFAULT(slice, object, PySlice_Check)
-    slice(handle start, handle stop, handle step) {
-        m_ptr = PySlice_New(start.ptr(), stop.ptr(), step.ptr());
+    slice(handle start, handle stop, handle step)
+        : object(PySlice_New(start.ptr(), stop.ptr(), step.ptr()), stolen_t{}) {
         if (!m_ptr) {
             pybind11_fail("Could not allocate slice object!");
         }
diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h
index 3d1ca7a..51b57a9 100644
--- a/include/pybind11/stl.h
+++ b/include/pybind11/stl.h
@@ -79,7 +79,7 @@
         for (auto &&value : src) {
             auto value_ = reinterpret_steal<object>(
                 key_conv::cast(forward_like<T>(value), policy, parent));
-            if (!value_ || !s.add(value_)) {
+            if (!value_ || !s.add(std::move(value_))) {
                 return handle();
             }
         }