0029399: Optimize reading of floating point values from text strings

Function Strtod() is reimplemented using open source (MIT-style license) code by David M. Gay instead of strtod() provided by standard run-time library. This improves its performance by 3-10 times. Functions Atof(), Strtod(), Printf(), Sprintf(), Fprintf() are declared as extern "C" to be usable from C programs. Strtod() is used in Interface_FileReaderData::Fastof() and in RWStl_Reader to accelerate their work. DRAW command QATestAtof and test perf fclasses strtod are added to check correctness and performance of Strtod(). Test perf draw restore is added to monitor performance of reading BREP files. Minor off-topic corrections: - method Standard_GUID::Assign (const Standard_UUID&) is implemented (was empty); - Precision.hxx is included in BRepMesh_Vertex.hxx that uses it.
2025-08-14 13:30:48 +03:00 · 2017-12-24 09:44:04 +03:00
parent 0edbf10564
commit 07bbde451a
16 changed files with 481 additions and 362 deletions
--- a/src/QANCollection/QANCollection_Test.cxx
+++ b/src/QANCollection/QANCollection_Test.cxx
@@ -22,6 +22,7 @@
 #include <gp_Pnt.hxx>

 #include <Precision.hxx>
+#include <Standard_Overflow.hxx>

 #include <NCollection_Vector.hxx>

@@ -863,6 +864,251 @@ static Standard_Integer QANColTestArrayMove (Draw_Interpretor& di, Standard_Inte
  return 0;
 }

+#include <math_BullardGenerator.hxx>
+#include <OSD_Timer.hxx>
+
+static inline double test_atof (const char* theStr) 
+{ 
+  return atof (theStr);
+}
+
+static inline double test_Atof (const char* theStr) 
+{ 
+  return Atof (theStr);
+}
+
+static inline double test_strtod (const char* theStr) 
+{ 
+  char *end;
+  return strtod (theStr, &end);
+}
+
+static inline double test_Strtod (const char* theStr) 
+{ 
+  char *end;
+  return Strtod (theStr, &end);
+}
+
+static inline double test_sscanf (const char* theStr) 
+{ 
+  double val = 0.;
+  sscanf (theStr, "%lf", &val);
+  return val; 
+}
+
+static int check_atof (const NCollection_Array2<char>& theStrings, const char* theFormat,
+                       double (*test_func)(const char*), Draw_Interpretor& di)
+{
+  int aNbErr = 0;
+  for (int i = 0; i < theStrings.UpperRow(); i++) 
+  {
+    const char *aStr= &theStrings(i,0);
+    char buff[256];
+    double aVal = test_func (aStr);
+    Sprintf (buff, theFormat, aVal);
+    if (strcasecmp (buff, &theStrings(i,0)))
+    {
+#if defined(_MSC_VER) && _MSC_VER < 1900
+      // MSVC < 2015 prints nan and inf as 1.#NAN or 1.INF, and noes not recognize nan or inf on read
+      if (strstr (aStr, "1.#") || strstr (aStr, "nan") || strstr (aStr, "inf") || 
+          strstr (aStr, "NAN") || strstr (aStr, "INF"))
+        continue;
+#endif
+      if (aNbErr < 5)
+      {
+        di << "Deviation parsing " << aStr << " and print back: " << buff << "\n";
+      }
+      aNbErr++;
+    }
+  }
+  return aNbErr;
+}
+
+// Test speed of standard and OCCT-specific (accelerated) functions to parse string to double
+static Standard_Integer QATestAtof (Draw_Interpretor& di, Standard_Integer argc, const char ** argv)
+{
+  int aNbToTest = Max (100, (argc > 1 ? Draw::Atoi(argv[1]) : 1000000));
+  int aNbDigits = (argc > 2 ? Draw::Atoi(argv[2]) : 10);
+  double aRangeMin = (argc > 3 ? Draw::Atof(argv[3]) : -1e9);
+  double aRangeMax = (argc > 4 ? Draw::Atof(argv[4]) : 1e9);
+
+  char aFormat[256];
+  Sprintf (aFormat, "%%.%dlg", Max (2, Min (20, aNbDigits)));
+
+  // prepare data
+  const int MAXLEN = 256;
+  NCollection_Array2<char> aValuesStr (0, aNbToTest - 1, 0, MAXLEN);
+  math_BullardGenerator aRandom;
+
+  if (aRangeMin < aRangeMax)
+  {
+    // random values within specified range
+//    std::default_random_engine aRandomEngine;
+//    std::uniform_real_distribution<double> aRandomDistr (aRangeMin, aRangeMax);
+    const uint64_t aMaxUInt64 = ~(uint64_t)0; // could be (not supported by old GCC): std::numeric_limits<uint64_t>::max()
+    for (int i = 0; i < aNbToTest; i++)
+    {
+//      double aVal = aRandomDistr (aRandomEngine);
+      uint64_t aIVal = ((uint64_t)aRandom.NextInt() << 32) + aRandom.NextInt();
+      double aVal = aRangeMin + (aIVal / (double)aMaxUInt64) * (aRangeMax - aRangeMin);
+      Sprintf(&aValuesStr(i,0), aFormat, aVal);
+    }
+  }
+  else
+  {
+    // special values
+    int i = 0;
+
+    strcpy (&aValuesStr(i++,0), "nan");
+    strcpy (&aValuesStr(i++,0), "nan(qnan)");
+    strcpy (&aValuesStr(i++,0), "NAN");
+    strcpy (&aValuesStr(i++,0), "-nan");
+    strcpy (&aValuesStr(i++,0), "-NAN");
+    strcpy (&aValuesStr(i++,0), "inf");
+    strcpy (&aValuesStr(i++,0), "INF");
+    strcpy (&aValuesStr(i++,0), "-inf");
+    strcpy (&aValuesStr(i++,0), "-INF");
+
+    strcpy (&aValuesStr(i++,0), "  ."); // standalone period should not be considered as a  number
+    strcpy (&aValuesStr(i++,0), "nanabcdef_128  xx"); // extra non-parenthised sequence after "nan"
+
+    strcpy (&aValuesStr(i++,0), "905791934.11394954"); // value that gets rounded in a wrong way by fast Strtod()
+    strcpy (&aValuesStr(i++,0), "9.343962790444495e+148"); // value where strtod() and Strtod() differ by 2 Epsilon
+
+    strcpy (&aValuesStr(i++,0), "     12345.67text"); // test for leading whitespaces and trailing text
+    strcpy (&aValuesStr(i++,0), "000.000"); // test for zero
+    strcpy (&aValuesStr(i++,0), "000.000e-0002"); // test for zero
+
+    strcpy (&aValuesStr(i++,0), "1000000000000000000000000000012345678901234567890"); // huge mantissa
+    strcpy (&aValuesStr(i++,0), "0000000000.00000000000000000012345678901234567890"); // leading zeros
+    strcpy (&aValuesStr(i++,0), "1.00000000000000000000000000012345678901234567890"); // long fractional part
+
+    strcpy (&aValuesStr(i++,0), "0.0000000001e318"); // large exponent but no overflow
+    strcpy (&aValuesStr(i++,0), "-1.7976931348623158e+308"); // -DBL_MAX 
+    strcpy (&aValuesStr(i++,0), "1.79769313486232e+308"); // overflow
+
+    strcpy (&aValuesStr(i++,0), "10000000000e-310"); // large negative exponent but no underflow
+    strcpy (&aValuesStr(i++,0), "1.1e-310"); // underflow
+    strcpy (&aValuesStr(i++,0), "0.000001e-310"); // underflow
+    strcpy (&aValuesStr(i++,0), "2.2250738585072014e-308"); // underflow, DBL_MIN
+    strcpy (&aValuesStr(i++,0), "2.2250738585e-308"); // underflow, value less than DBL_MIN
+
+    strcpy (&aValuesStr(i++,0), "2.2204460492503131e-016"); // DBL_EPSILON
+
+    // random binary data
+//    std::default_random_engine aRandomEngine;
+//    std::uniform_int_distribution<uint64_t> aRandomDistr (0, ~(uint64_t)0);
+    for (; i < aNbToTest; i++)
+    {
+      union {
+        uint64_t valint;
+        double valdbl;
+      } aVal;
+//      aVal.valint = aRandomDistr (aRandomEngine);
+      aVal.valint = ((uint64_t)aRandom.NextInt() << 32) + aRandom.NextInt();
+      Sprintf(&aValuesStr(i,0), aFormat, aVal.valdbl);
+    }
+  }
+
+  // test different methods
+#define TEST_ATOF(method) \
+  OSD_Timer aT_##method; aT_##method.Start(); \
+  double aRes_##method = 0.; \
+  for (int i = 0; i < aNbToTest; i++) { aRes_##method += test_##method (&aValuesStr(i,0)); } \
+  aT_##method.Stop()
+
+  TEST_ATOF(sscanf);
+  TEST_ATOF(strtod);
+  TEST_ATOF(atof);
+  TEST_ATOF(Strtod);
+  TEST_ATOF(Atof);
+#undef TEST_ATOF
+
+  // test different methods
+#define CHECK_ATOF(method) \
+  int aNbErr_##method = check_atof (aValuesStr, aFormat, test_##method, di); \
+  di << "Checking " << #method << ": " << aNbErr_##method << " deviations\n"
+
+  CHECK_ATOF(sscanf);
+  CHECK_ATOF(strtod);
+  CHECK_ATOF(atof);
+  CHECK_ATOF(Strtod);
+  CHECK_ATOF(Atof);
+#undef CHECK_ATOF
+
+/* compare results with atof */
+#ifdef _MSC_VER
+#define ISFINITE _finite
+#else
+#define ISFINITE std::isfinite
+#endif
+  int nbErr = 0;
+  for (int i = 0; i < aNbToTest; i++)
+  {
+    char *aStr = &aValuesStr(i,0), *anEndOCCT, *anEndStd;
+    double aRes = Strtod (aStr, &anEndOCCT);
+    double aRef = strtod (aStr, &anEndStd);
+    if (ISFINITE(aRes) != ISFINITE(aRef))
+    {
+      nbErr++;
+#if defined(_MSC_VER) && _MSC_VER < 1900
+      // MSVC < 2015 prints nan and inf as 1.#NAN or 1.INF, and noes not recognize nan or inf on read
+      if (strstr (aStr, "1.#") || strstr (aStr, "nan") || strstr (aStr, "inf") || 
+          strstr (aStr, "NAN") || strstr (aStr, "INF"))
+        continue;
+#endif
+      if (nbErr < 5)
+      {
+        char aBuff[256];
+        Sprintf (aBuff, "Error parsing %s: %.20lg / %.20lg\n", aStr, aRes, aRef);
+        di << aBuff;
+      }
+    }
+    else if (ISFINITE(aRef) && Abs (aRes - aRef) > Epsilon (aRef))
+    {
+      nbErr++;
+      if (nbErr < 5)
+      {
+        char aBuff[256];
+        Sprintf (aBuff, "Error parsing %s: %.20lg / %.20lg\n", aStr, aRes, aRef);
+        di << aBuff;
+        Sprintf (aBuff, "[Delta = %.8lg, Epsilon = %.8lg]\n", Abs (aRes - aRef), Epsilon (aRef));
+        di << aBuff;
+      }
+    }
+
+    // check that Strtod() and strtod() stop at the same place;
+    // this makes sense for reading special values such as "nan" and thus
+    // is not relevant for MSVC 2010 and earlier than do not support these
+#if ! defined(_MSC_VER) || _MSC_VER >= 1700
+    if (anEndOCCT != anEndStd)
+    {
+      nbErr++;
+      if (nbErr < 5)
+        di << "Error: different number of symbols parsed in " 
+           << aStr << ": " << (int)(anEndOCCT - aStr) << " / " << (int)(anEndStd - aStr) << "\n";
+    }
+#endif
+  }
+  di << "Total " << nbErr << " defiations from strtod() found\n"; 
+/* */
+
+  // print results
+  di << "Method\t      CPU\t  Elapsed   \t    Deviations \tChecksum\n";
+
+#define PRINT_RES(method) \
+  di << #method "\t" << aT_##method.UserTimeCPU() << "  \t" << aT_##method.ElapsedTime() << "\t" \
+  << aNbErr_##method << "\t" << aRes_##method << "\n"
+  PRINT_RES(sscanf);
+  PRINT_RES(strtod);
+  PRINT_RES(atof);
+  PRINT_RES(Strtod);
+  PRINT_RES(Atof);
+#undef PRINT_RES
+
+  return 0;
+}
+
 void QANCollection::CommandsTest(Draw_Interpretor& theCommands) {
  const char *group = "QANCollection";

@@ -877,5 +1123,6 @@ void QANCollection::CommandsTest(Draw_Interpretor& theCommands) {
  theCommands.Add("QANColTestList",           "QANColTestList",           __FILE__, QANColTestList,           group);  
  theCommands.Add("QANColTestSequence",       "QANColTestSequence",       __FILE__, QANColTestSequence,       group);  
  theCommands.Add("QANColTestVector",         "QANColTestVector",         __FILE__, QANColTestVector,         group);  
-  theCommands.Add("QANColTestArrayMove",      "QANColTestArrayMove (is expected to give error)",      __FILE__, QANColTestArrayMove,      group);  
+  theCommands.Add("QANColTestArrayMove",      "QANColTestArrayMove (is expected to give error)", __FILE__, QANColTestArrayMove, group);  
+  theCommands.Add("QATestAtof", "QATestAtof [nbvalues [nbdigits [min [max]]]]", __FILE__, QATestAtof, group);
 }