Find the smallest positive number missing from an unsorted array











up vote
4
down vote

favorite












this is the original question



https://www.geeksforgeeks.org/find-the-smallest-positive-number-missing-from-an-unsorted-array/



You are given an unsorted array with both positive and negative elements. You have to find the smallest positive number missing from the array in O(n) time using constant extra space. You can modify the original array.




Input: {2, 3, 7, 6, 8, -1, -10, 15}   Output: 1



Input: { 2, 3, -7, 6, 8, 1, -10, 15 }  Output: 4



Input: {1, 1, 0, -1, -2}                Output: 2




using System;
using System.Collections.Generic;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace ArrayQuestions
{

[TestClass]
public class FindTheSmallestNonnegativeNumberNoyInArray
{
[TestMethod]
public void FindTheSmallestNonnegativeNumberNoyInArrayTest()
{
int array = {2, 3, 7, 6, 8, -1, -10, 15};
int result = FindSmallestPositiveHash(array);
int expected = 1;
Assert.AreEqual(expected, result);
}


[TestMethod]
public void FindTheSmallestNonnegativeNumberNoyInArrayTest2()
{
int array = { 2, 3, -7, 6, 8, 1, -10, 15 };
int result = FindSmallestPositiveHash(array);
int expected = 4;
Assert.AreEqual(expected, result);
}
private int FindSmallestPositiveHash(int array)
{
HashSet<int> set = new HashSet<int>();
int maxPositive = 0;
for (int i = 0; i < array.Length; i++)
{
if (!set.Contains(array[i]))
{
maxPositive = Math.Max(maxPositive, array[i]);
set.Add(array[i]);
}
}
for (int i = 1; i < maxPositive; i++)
{
if (!set.Contains(i))
{
return i;
}
}
return maxPositive + 1;
}
}
}


I know there is a better solution, I tried to implement the hash based solution please comment or runtime and performance.
Thanks










share|improve this question




























    up vote
    4
    down vote

    favorite












    this is the original question



    https://www.geeksforgeeks.org/find-the-smallest-positive-number-missing-from-an-unsorted-array/



    You are given an unsorted array with both positive and negative elements. You have to find the smallest positive number missing from the array in O(n) time using constant extra space. You can modify the original array.




    Input: {2, 3, 7, 6, 8, -1, -10, 15}   Output: 1



    Input: { 2, 3, -7, 6, 8, 1, -10, 15 }  Output: 4



    Input: {1, 1, 0, -1, -2}                Output: 2




    using System;
    using System.Collections.Generic;
    using Microsoft.VisualStudio.TestTools.UnitTesting;

    namespace ArrayQuestions
    {

    [TestClass]
    public class FindTheSmallestNonnegativeNumberNoyInArray
    {
    [TestMethod]
    public void FindTheSmallestNonnegativeNumberNoyInArrayTest()
    {
    int array = {2, 3, 7, 6, 8, -1, -10, 15};
    int result = FindSmallestPositiveHash(array);
    int expected = 1;
    Assert.AreEqual(expected, result);
    }


    [TestMethod]
    public void FindTheSmallestNonnegativeNumberNoyInArrayTest2()
    {
    int array = { 2, 3, -7, 6, 8, 1, -10, 15 };
    int result = FindSmallestPositiveHash(array);
    int expected = 4;
    Assert.AreEqual(expected, result);
    }
    private int FindSmallestPositiveHash(int array)
    {
    HashSet<int> set = new HashSet<int>();
    int maxPositive = 0;
    for (int i = 0; i < array.Length; i++)
    {
    if (!set.Contains(array[i]))
    {
    maxPositive = Math.Max(maxPositive, array[i]);
    set.Add(array[i]);
    }
    }
    for (int i = 1; i < maxPositive; i++)
    {
    if (!set.Contains(i))
    {
    return i;
    }
    }
    return maxPositive + 1;
    }
    }
    }


    I know there is a better solution, I tried to implement the hash based solution please comment or runtime and performance.
    Thanks










    share|improve this question


























      up vote
      4
      down vote

      favorite









      up vote
      4
      down vote

      favorite











      this is the original question



      https://www.geeksforgeeks.org/find-the-smallest-positive-number-missing-from-an-unsorted-array/



      You are given an unsorted array with both positive and negative elements. You have to find the smallest positive number missing from the array in O(n) time using constant extra space. You can modify the original array.




      Input: {2, 3, 7, 6, 8, -1, -10, 15}   Output: 1



      Input: { 2, 3, -7, 6, 8, 1, -10, 15 }  Output: 4



      Input: {1, 1, 0, -1, -2}                Output: 2




      using System;
      using System.Collections.Generic;
      using Microsoft.VisualStudio.TestTools.UnitTesting;

      namespace ArrayQuestions
      {

      [TestClass]
      public class FindTheSmallestNonnegativeNumberNoyInArray
      {
      [TestMethod]
      public void FindTheSmallestNonnegativeNumberNoyInArrayTest()
      {
      int array = {2, 3, 7, 6, 8, -1, -10, 15};
      int result = FindSmallestPositiveHash(array);
      int expected = 1;
      Assert.AreEqual(expected, result);
      }


      [TestMethod]
      public void FindTheSmallestNonnegativeNumberNoyInArrayTest2()
      {
      int array = { 2, 3, -7, 6, 8, 1, -10, 15 };
      int result = FindSmallestPositiveHash(array);
      int expected = 4;
      Assert.AreEqual(expected, result);
      }
      private int FindSmallestPositiveHash(int array)
      {
      HashSet<int> set = new HashSet<int>();
      int maxPositive = 0;
      for (int i = 0; i < array.Length; i++)
      {
      if (!set.Contains(array[i]))
      {
      maxPositive = Math.Max(maxPositive, array[i]);
      set.Add(array[i]);
      }
      }
      for (int i = 1; i < maxPositive; i++)
      {
      if (!set.Contains(i))
      {
      return i;
      }
      }
      return maxPositive + 1;
      }
      }
      }


      I know there is a better solution, I tried to implement the hash based solution please comment or runtime and performance.
      Thanks










      share|improve this question















      this is the original question



      https://www.geeksforgeeks.org/find-the-smallest-positive-number-missing-from-an-unsorted-array/



      You are given an unsorted array with both positive and negative elements. You have to find the smallest positive number missing from the array in O(n) time using constant extra space. You can modify the original array.




      Input: {2, 3, 7, 6, 8, -1, -10, 15}   Output: 1



      Input: { 2, 3, -7, 6, 8, 1, -10, 15 }  Output: 4



      Input: {1, 1, 0, -1, -2}                Output: 2




      using System;
      using System.Collections.Generic;
      using Microsoft.VisualStudio.TestTools.UnitTesting;

      namespace ArrayQuestions
      {

      [TestClass]
      public class FindTheSmallestNonnegativeNumberNoyInArray
      {
      [TestMethod]
      public void FindTheSmallestNonnegativeNumberNoyInArrayTest()
      {
      int array = {2, 3, 7, 6, 8, -1, -10, 15};
      int result = FindSmallestPositiveHash(array);
      int expected = 1;
      Assert.AreEqual(expected, result);
      }


      [TestMethod]
      public void FindTheSmallestNonnegativeNumberNoyInArrayTest2()
      {
      int array = { 2, 3, -7, 6, 8, 1, -10, 15 };
      int result = FindSmallestPositiveHash(array);
      int expected = 4;
      Assert.AreEqual(expected, result);
      }
      private int FindSmallestPositiveHash(int array)
      {
      HashSet<int> set = new HashSet<int>();
      int maxPositive = 0;
      for (int i = 0; i < array.Length; i++)
      {
      if (!set.Contains(array[i]))
      {
      maxPositive = Math.Max(maxPositive, array[i]);
      set.Add(array[i]);
      }
      }
      for (int i = 1; i < maxPositive; i++)
      {
      if (!set.Contains(i))
      {
      return i;
      }
      }
      return maxPositive + 1;
      }
      }
      }


      I know there is a better solution, I tried to implement the hash based solution please comment or runtime and performance.
      Thanks







      c# programming-challenge interview-questions






      share|improve this question















      share|improve this question













      share|improve this question




      share|improve this question








      edited 11 hours ago









      t3chb0t

      33.6k744108




      33.6k744108










      asked 18 hours ago









      Gilad

      1,24731526




      1,24731526






















          1 Answer
          1






          active

          oldest

          votes

















          up vote
          6
          down vote













          There are a few things you can do to simplify your code:




          • There's no need to keep track of the maximum positive integer if you remove the condition from the second for loop: for (int i = 1; ; i++).

          • That means you don't need to check whether the hash already contains the given number: just add it right away.

          • If you don't mind a small performance hit you can use Linq's ToHashSet instead: var set = array.ToHashSet(); (Edit: or new HashSet<int>(array); if you're not using .NET Framework 4.7.2).


          • Edit: Alternately, if you expect a lot of negative values in your inputs, not adding those to the set can result in a fair speed improvement - I'm seeing a 30% improvement for inputs with 50% negative values.


          The first three changes don't really speed things up, and the last one is fairly input-specific. For a significant and more reliable performance improvement, replace the hash with an array of booleans:



          var presence = new bool[array.Length + 1];
          foreach (var value in array)
          {
          if (value > 0 && value < presence.Length)
          {
          presence[value] = true;
          }
          }

          for (int i = 1; i < presence.Length; i++)
          {
          if (!presence[i])
          {
          return i;
          }
          }
          return presence.Length;


          The maximum possible return value is array.Length + 1, so values larger than that can safely be ignored (just like negative values) because they won't affect the result. The input [1, 2, 3] produces 4. Replacing any of these numbers with a larger one will create a gap: [1, 99, 3] produces 2. Whether or not 99 is present is irrelevant: what matters is that 2 is not present.



          For large inputs this is about twice as fast, for small inputs it can be more than 10x faster. Hash set lookups are fast, but there is some overhead involved, so they won't beat array lookups.





          Edit: The proposed solution on their website is (re)using the input array as a lookup table. It's moving all positive numbers to the front and then marking the presence of numbers by making the value at that index negative - somewhat similar to the above approach. I guess the first step allows them to simplify the rest of the algorithm, but it does make things slower. That first step can be removed with careful swapping and negating, making it run faster, but it's still a bit slower as the array of boolean approach - and more difficult to understand.






          share|improve this answer



















          • 1




            .NET Framework 4.7.2 - that's why I've never seen ToHashSet before :-o I was wondering what you are talking about ;-)
            – t3chb0t
            11 hours ago













          Your Answer





          StackExchange.ifUsing("editor", function () {
          return StackExchange.using("mathjaxEditing", function () {
          StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
          StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
          });
          });
          }, "mathjax-editing");

          StackExchange.ifUsing("editor", function () {
          StackExchange.using("externalEditor", function () {
          StackExchange.using("snippets", function () {
          StackExchange.snippets.init();
          });
          });
          }, "code-snippets");

          StackExchange.ready(function() {
          var channelOptions = {
          tags: "".split(" "),
          id: "196"
          };
          initTagRenderer("".split(" "), "".split(" "), channelOptions);

          StackExchange.using("externalEditor", function() {
          // Have to fire editor after snippets, if snippets enabled
          if (StackExchange.settings.snippets.snippetsEnabled) {
          StackExchange.using("snippets", function() {
          createEditor();
          });
          }
          else {
          createEditor();
          }
          });

          function createEditor() {
          StackExchange.prepareEditor({
          heartbeatType: 'answer',
          convertImagesToLinks: false,
          noModals: true,
          showLowRepImageUploadWarning: true,
          reputationToPostImages: null,
          bindNavPrevention: true,
          postfix: "",
          imageUploader: {
          brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
          contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
          allowUrls: true
          },
          onDemand: true,
          discardSelector: ".discard-answer"
          ,immediatelyShowMarkdownHelp:true
          });


          }
          });














           

          draft saved


          draft discarded


















          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f208218%2ffind-the-smallest-positive-number-missing-from-an-unsorted-array%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown

























          1 Answer
          1






          active

          oldest

          votes








          1 Answer
          1






          active

          oldest

          votes









          active

          oldest

          votes






          active

          oldest

          votes








          up vote
          6
          down vote













          There are a few things you can do to simplify your code:




          • There's no need to keep track of the maximum positive integer if you remove the condition from the second for loop: for (int i = 1; ; i++).

          • That means you don't need to check whether the hash already contains the given number: just add it right away.

          • If you don't mind a small performance hit you can use Linq's ToHashSet instead: var set = array.ToHashSet(); (Edit: or new HashSet<int>(array); if you're not using .NET Framework 4.7.2).


          • Edit: Alternately, if you expect a lot of negative values in your inputs, not adding those to the set can result in a fair speed improvement - I'm seeing a 30% improvement for inputs with 50% negative values.


          The first three changes don't really speed things up, and the last one is fairly input-specific. For a significant and more reliable performance improvement, replace the hash with an array of booleans:



          var presence = new bool[array.Length + 1];
          foreach (var value in array)
          {
          if (value > 0 && value < presence.Length)
          {
          presence[value] = true;
          }
          }

          for (int i = 1; i < presence.Length; i++)
          {
          if (!presence[i])
          {
          return i;
          }
          }
          return presence.Length;


          The maximum possible return value is array.Length + 1, so values larger than that can safely be ignored (just like negative values) because they won't affect the result. The input [1, 2, 3] produces 4. Replacing any of these numbers with a larger one will create a gap: [1, 99, 3] produces 2. Whether or not 99 is present is irrelevant: what matters is that 2 is not present.



          For large inputs this is about twice as fast, for small inputs it can be more than 10x faster. Hash set lookups are fast, but there is some overhead involved, so they won't beat array lookups.





          Edit: The proposed solution on their website is (re)using the input array as a lookup table. It's moving all positive numbers to the front and then marking the presence of numbers by making the value at that index negative - somewhat similar to the above approach. I guess the first step allows them to simplify the rest of the algorithm, but it does make things slower. That first step can be removed with careful swapping and negating, making it run faster, but it's still a bit slower as the array of boolean approach - and more difficult to understand.






          share|improve this answer



















          • 1




            .NET Framework 4.7.2 - that's why I've never seen ToHashSet before :-o I was wondering what you are talking about ;-)
            – t3chb0t
            11 hours ago

















          up vote
          6
          down vote













          There are a few things you can do to simplify your code:




          • There's no need to keep track of the maximum positive integer if you remove the condition from the second for loop: for (int i = 1; ; i++).

          • That means you don't need to check whether the hash already contains the given number: just add it right away.

          • If you don't mind a small performance hit you can use Linq's ToHashSet instead: var set = array.ToHashSet(); (Edit: or new HashSet<int>(array); if you're not using .NET Framework 4.7.2).


          • Edit: Alternately, if you expect a lot of negative values in your inputs, not adding those to the set can result in a fair speed improvement - I'm seeing a 30% improvement for inputs with 50% negative values.


          The first three changes don't really speed things up, and the last one is fairly input-specific. For a significant and more reliable performance improvement, replace the hash with an array of booleans:



          var presence = new bool[array.Length + 1];
          foreach (var value in array)
          {
          if (value > 0 && value < presence.Length)
          {
          presence[value] = true;
          }
          }

          for (int i = 1; i < presence.Length; i++)
          {
          if (!presence[i])
          {
          return i;
          }
          }
          return presence.Length;


          The maximum possible return value is array.Length + 1, so values larger than that can safely be ignored (just like negative values) because they won't affect the result. The input [1, 2, 3] produces 4. Replacing any of these numbers with a larger one will create a gap: [1, 99, 3] produces 2. Whether or not 99 is present is irrelevant: what matters is that 2 is not present.



          For large inputs this is about twice as fast, for small inputs it can be more than 10x faster. Hash set lookups are fast, but there is some overhead involved, so they won't beat array lookups.





          Edit: The proposed solution on their website is (re)using the input array as a lookup table. It's moving all positive numbers to the front and then marking the presence of numbers by making the value at that index negative - somewhat similar to the above approach. I guess the first step allows them to simplify the rest of the algorithm, but it does make things slower. That first step can be removed with careful swapping and negating, making it run faster, but it's still a bit slower as the array of boolean approach - and more difficult to understand.






          share|improve this answer



















          • 1




            .NET Framework 4.7.2 - that's why I've never seen ToHashSet before :-o I was wondering what you are talking about ;-)
            – t3chb0t
            11 hours ago















          up vote
          6
          down vote










          up vote
          6
          down vote









          There are a few things you can do to simplify your code:




          • There's no need to keep track of the maximum positive integer if you remove the condition from the second for loop: for (int i = 1; ; i++).

          • That means you don't need to check whether the hash already contains the given number: just add it right away.

          • If you don't mind a small performance hit you can use Linq's ToHashSet instead: var set = array.ToHashSet(); (Edit: or new HashSet<int>(array); if you're not using .NET Framework 4.7.2).


          • Edit: Alternately, if you expect a lot of negative values in your inputs, not adding those to the set can result in a fair speed improvement - I'm seeing a 30% improvement for inputs with 50% negative values.


          The first three changes don't really speed things up, and the last one is fairly input-specific. For a significant and more reliable performance improvement, replace the hash with an array of booleans:



          var presence = new bool[array.Length + 1];
          foreach (var value in array)
          {
          if (value > 0 && value < presence.Length)
          {
          presence[value] = true;
          }
          }

          for (int i = 1; i < presence.Length; i++)
          {
          if (!presence[i])
          {
          return i;
          }
          }
          return presence.Length;


          The maximum possible return value is array.Length + 1, so values larger than that can safely be ignored (just like negative values) because they won't affect the result. The input [1, 2, 3] produces 4. Replacing any of these numbers with a larger one will create a gap: [1, 99, 3] produces 2. Whether or not 99 is present is irrelevant: what matters is that 2 is not present.



          For large inputs this is about twice as fast, for small inputs it can be more than 10x faster. Hash set lookups are fast, but there is some overhead involved, so they won't beat array lookups.





          Edit: The proposed solution on their website is (re)using the input array as a lookup table. It's moving all positive numbers to the front and then marking the presence of numbers by making the value at that index negative - somewhat similar to the above approach. I guess the first step allows them to simplify the rest of the algorithm, but it does make things slower. That first step can be removed with careful swapping and negating, making it run faster, but it's still a bit slower as the array of boolean approach - and more difficult to understand.






          share|improve this answer














          There are a few things you can do to simplify your code:




          • There's no need to keep track of the maximum positive integer if you remove the condition from the second for loop: for (int i = 1; ; i++).

          • That means you don't need to check whether the hash already contains the given number: just add it right away.

          • If you don't mind a small performance hit you can use Linq's ToHashSet instead: var set = array.ToHashSet(); (Edit: or new HashSet<int>(array); if you're not using .NET Framework 4.7.2).


          • Edit: Alternately, if you expect a lot of negative values in your inputs, not adding those to the set can result in a fair speed improvement - I'm seeing a 30% improvement for inputs with 50% negative values.


          The first three changes don't really speed things up, and the last one is fairly input-specific. For a significant and more reliable performance improvement, replace the hash with an array of booleans:



          var presence = new bool[array.Length + 1];
          foreach (var value in array)
          {
          if (value > 0 && value < presence.Length)
          {
          presence[value] = true;
          }
          }

          for (int i = 1; i < presence.Length; i++)
          {
          if (!presence[i])
          {
          return i;
          }
          }
          return presence.Length;


          The maximum possible return value is array.Length + 1, so values larger than that can safely be ignored (just like negative values) because they won't affect the result. The input [1, 2, 3] produces 4. Replacing any of these numbers with a larger one will create a gap: [1, 99, 3] produces 2. Whether or not 99 is present is irrelevant: what matters is that 2 is not present.



          For large inputs this is about twice as fast, for small inputs it can be more than 10x faster. Hash set lookups are fast, but there is some overhead involved, so they won't beat array lookups.





          Edit: The proposed solution on their website is (re)using the input array as a lookup table. It's moving all positive numbers to the front and then marking the presence of numbers by making the value at that index negative - somewhat similar to the above approach. I guess the first step allows them to simplify the rest of the algorithm, but it does make things slower. That first step can be removed with careful swapping and negating, making it run faster, but it's still a bit slower as the array of boolean approach - and more difficult to understand.







          share|improve this answer














          share|improve this answer



          share|improve this answer








          edited 9 hours ago

























          answered 16 hours ago









          Pieter Witvoet

          4,926723




          4,926723








          • 1




            .NET Framework 4.7.2 - that's why I've never seen ToHashSet before :-o I was wondering what you are talking about ;-)
            – t3chb0t
            11 hours ago
















          • 1




            .NET Framework 4.7.2 - that's why I've never seen ToHashSet before :-o I was wondering what you are talking about ;-)
            – t3chb0t
            11 hours ago










          1




          1




          .NET Framework 4.7.2 - that's why I've never seen ToHashSet before :-o I was wondering what you are talking about ;-)
          – t3chb0t
          11 hours ago






          .NET Framework 4.7.2 - that's why I've never seen ToHashSet before :-o I was wondering what you are talking about ;-)
          – t3chb0t
          11 hours ago




















           

          draft saved


          draft discarded



















































           


          draft saved


          draft discarded














          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f208218%2ffind-the-smallest-positive-number-missing-from-an-unsorted-array%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown





















































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown

































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown







          Popular posts from this blog

          flock() on closed filehandle LOCK_FILE at /usr/bin/apt-mirror

          Mangá

          Eduardo VII do Reino Unido