更新 1万字超えたため元回答と置き換えました
わかりました。StringComparison
の問題でした(基本のはずなのですが失念しておりました^^;
StringComparison 列挙型 (System) | Microsoft Learn
.NET での文字列の比較に関するベスト プラクティス | Microsoft Learn
StringComparison.Ordinal
ならば、今より100~1000倍は速くなりました。
それでもReadOnlySpan
のほうが速いことは確かなので、ROS
を使えるなら使ったほうがいいでしょう^^
Method N Mean Error StdDev Ratio Original 100 693.671 us 13.8602 us 12.2867 us 1.000 OriginalOrdinal 100 6.695 us 0.0434 us 0.0385 us 0.010 ROS 100 5.053 us 0.0245 us 0.0217 us 0.007 Original 500 20,997.678 us 440.4404 us 1,298.6484 us 1.000 OriginalOrdinal 500 47.985 us 0.9310 us 0.8253 us 0.002 ROS 500 39.873 us 0.2115 us 0.1979 us 0.002 Original 1000 139,483.870 us 1,282.5145 us 1,136.9157 us 1.000 OriginalOrdinal 1000 131.865 us 1.2881 us 1.0757 us 0.001 ROS 1000 117.974 us 2.3328 us 2.3957 us 0.001
cs
1 using BenchmarkDotNet . Attributes ;
2 using BenchmarkDotNet . Running ;
3
4
5 BenchmarkRunner . Run ( typeof ( Program ) . Assembly ) ;
6
7
8 [ ReturnValueValidator ( true ) ]
9 public class CharIndex
10 {
11 [ Params ( 100 , 500 , 1000 ) ]
12 public int N ;
13 private string data ;
14
15 [ GlobalSetup ]
16 public void GlobalSetup ( )
17 => data = string . Join ( "\r\n" , Enumerable . Range ( 0 , N ) . Select ( x => new string ( 'a' , x ) ) ) ;
18
19 [ Benchmark ( Baseline = true ) ]
20 public int Original ( ) => GetCharIndex ( data , N - 1 , N - 1 ) ;
21
22 [ Benchmark ]
23 public int OriginalOrdinal ( ) => GetCharIndex2 ( data , N - 1 , N - 1 ) ;
24
25 [ Benchmark ]
26 public int ROS ( ) => GetCharIndex3 ( data , N - 1 , N - 1 ) ;
27
28 int GetCharIndex ( string text , int line , int col )
29 {
30 string code = "\r\n" ;
31 int inLine = line ;
32
33 int begin = 0 ;
34 for ( int i = 0 ; true ; i ++ )
35 {
36 if ( line == 0 )
37 {
38 if ( inLine != 0 )
39 begin += code . Length - 1 ;
40 break ;
41 }
42 begin = text . IndexOf ( code , begin ) ;
43 if ( begin == - 1 )
44 {
45 return - 1 ;
46 }
47 line -- ;
48 begin ++ ;
49 }
50
51 return begin + col ;
52 }
53
54 int GetCharIndex2 ( string text , int line , int col )
55 {
56 string code = "\r\n" ;
57 int inLine = line ;
58
59 int begin = 0 ;
60 for ( int i = 0 ; true ; i ++ )
61 {
62 if ( line == 0 )
63 {
64 if ( inLine != 0 )
65 begin += code . Length - 1 ;
66 break ;
67 }
68 begin = text . IndexOf ( code , begin , StringComparison . Ordinal ) ;
69 if ( begin == - 1 )
70 {
71 return - 1 ;
72 }
73 line -- ;
74 begin ++ ;
75 }
76
77 return begin + col ;
78 }
79
80 int GetCharIndex3 ( ReadOnlySpan < char > text , int line , int col )
81 {
82 var code = "\r\n" . AsSpan ( ) ;
83 var inLine = line ;
84
85 var begin = 0 ;
86 while ( true )
87 {
88 if ( line == 0 )
89 {
90 if ( inLine != 0 ) begin += code . Length - 1 ;
91 break ;
92 }
93
94 begin = text . IndexOf ( code , begin ) ;
95 if ( begin == - 1 ) return - 1 ;
96
97 line -- ;
98 begin ++ ;
99 }
100
101 return begin + col ;
102 }
103 }
104
105 static class Ex
106 {
107 // [Add Span / ReadOnlySpan IndexOf extensions taking in a start index and a count · Issue #26982 · dotnet/runtime](https://github.com/dotnet/runtime/issues/26982)
108 public static int IndexOf ( this ReadOnlySpan < char > span , ReadOnlySpan < char > value , int startIndex )
109 {
110 var indexInSlice = span . Slice ( startIndex ) . IndexOf ( value ) ;
111 return indexInSlice == - 1 ? - 1 : startIndex + indexInSlice ;
112 }
113 }
// * Summary *
BenchmarkDotNet=v0.13.2, OS=Windows 10 (10.0.19043.2006/21H1/May2021Update)
Intel Core i7 CPU 920 2.67GHz (Nehalem), 1 CPU, 8 logical and 4 physical cores
.NET SDK=7.0.100-rc.1.22431.12
[Host] : .NET 6.0.9 (6.0.922.41905), X64 RyuJIT SSE4.2
DefaultJob : .NET 6.0.9 (6.0.922.41905), X64 RyuJIT SSE4.2
| Method | N | Mean | Error | StdDev | Ratio |
|---------------- |----- |---------------:|--------------:|--------------:|------:|
| Original | 100 | 693.671 us | 13.8602 us | 12.2867 us | 1.000 |
| OriginalOrdinal | 100 | 6.695 us | 0.0434 us | 0.0385 us | 0.010 |
| ROS | 100 | 5.053 us | 0.0245 us | 0.0217 us | 0.007 |
| | | | | | |
| Original | 500 | 20,997.678 us | 440.4404 us | 1,298.6484 us | 1.000 |
| OriginalOrdinal | 500 | 47.985 us | 0.9310 us | 0.8253 us | 0.002 |
| ROS | 500 | 39.873 us | 0.2115 us | 0.1979 us | 0.002 |
| | | | | | |
| Original | 1000 | 139,483.870 us | 1,282.5145 us | 1,136.9157 us | 1.000 |
| OriginalOrdinal | 1000 | 131.865 us | 1.2881 us | 1.0757 us | 0.001 |
| ROS | 1000 | 117.974 us | 2.3328 us | 2.3957 us | 0.001 |
// * Warnings *
MultimodalDistribution
CharIndex.Original: Default -> It seems that the distribution is bimodal (mValue = 3.4)
// * Hints *
Outliers
CharIndex.Original: Default -> 1 outlier was removed (750.32 us)
CharIndex.OriginalOrdinal: Default -> 1 outlier was removed (7.19 us)
CharIndex.ROS: Default -> 1 outlier was removed (5.33 us)
CharIndex.OriginalOrdinal: Default -> 1 outlier was removed (51.15 us)
CharIndex.Original: Default -> 1 outlier was removed (143.96 ms)
CharIndex.OriginalOrdinal: Default -> 2 outliers were removed (137.41 us, 139.30 us)
CharIndex.ROS: Default -> 1 outlier was removed (127.26 us)
インデックスが大きくなっても処理速度が極端に遅くなったりすることがないような実装はありますか?
想像よりもかなり遅いですね。。。(1万行にしたら全然返ってこなくなりました^^;
行数もわからずノーヒントだったら、頭から探していくしかないような。
Replace("\r\n", "\n")
して行数を求めて後ろ側だったら、LastIndexOf
するとか?
置換や後ろから探すオーバーヘッドをペイできるのかどうか...
Char
になるのはメリットかも?(IndexOf(String, Int32)
よりは、IndexOf(Char, Int32)
のほうが速そうな気はする)