質問編集履歴
13
文章の変更
test
CHANGED
File without changes
|
test
CHANGED
@@ -80,11 +80,11 @@
|
|
80
80
|
|
81
81
|
* %>s(HTTPステータス)そのまま
|
82
82
|
|
83
|
-
* %b(レスポンスのバイト数)そのまま
|
83
|
+
* %b(レスポンスのバイト数)そのまま
|
84
84
|
|
85
85
|
* %{Referer}i(リファラ)そのまま(スペースはそのまま取得)
|
86
86
|
|
87
|
-
* %{User-Agent}i(ユーザーエージェント)そのまま
|
87
|
+
* %{User-Agent}i(ユーザーエージェント)そのまま(スペースはそのまま取得)
|
88
88
|
|
89
89
|
|
90
90
|
|
12
内容の追記
test
CHANGED
File without changes
|
test
CHANGED
@@ -66,6 +66,8 @@
|
|
66
66
|
|
67
67
|
**分割ルール(取得したいテキスト)**
|
68
68
|
|
69
|
+
* 全ての列の「"」はエスケープ
|
70
|
+
|
69
71
|
* %h(クライアントのホスト名)そのまま
|
70
72
|
|
71
73
|
* %l(リモートログ名)そのまま
|
@@ -74,13 +76,13 @@
|
|
74
76
|
|
75
77
|
* %t(リクエスト時刻)[中身]の中身を取得[]は不要
|
76
78
|
|
77
|
-
* %r(リクエストの最初の行)HTTPメソッドとリクエストURLに分割、HTTP○○(HTTPS○○)は不要
|
79
|
+
* %r(リクエストの最初の行)HTTPメソッドとリクエストURL(スペースが含まれる場合にはエンコード)に分割、HTTP○○(HTTPS○○)は不要
|
78
80
|
|
79
81
|
* %>s(HTTPステータス)そのまま
|
80
82
|
|
81
|
-
* %b(レスポンスのバイト数)そのまま
|
83
|
+
* %b(レスポンスのバイト数)そのまま(スペースはそのまま取得)
|
82
84
|
|
83
|
-
* %{Referer}i(リファラ)そのまま
|
85
|
+
* %{Referer}i(リファラ)そのまま(スペースはそのまま取得)
|
84
86
|
|
85
87
|
* %{User-Agent}i(ユーザーエージェント)そのまま
|
86
88
|
|
11
pythonコードの更新
test
CHANGED
File without changes
|
test
CHANGED
@@ -146,6 +146,10 @@
|
|
146
146
|
|
147
147
|
while line:
|
148
148
|
|
149
|
+
#リクエストの最初の行(リクエストURL)に「"」が含まれている場合ダメ
|
150
|
+
|
151
|
+
#fline = re.sub(r'^(\S+) (\S+) (\S+) [([^]]+)] "([A-Z]+) ([^ "]+)? (?:HTTP|HTTPS)/[0-9.]+" ([0-9]{3}) ([0-9]+|-) "([^\"]*)" "([^\"]*)"', r'\1~\2~\3~\4~\5~\6~\7~\8~\9~\10', line)
|
152
|
+
|
149
153
|
fline = conv_line(line)
|
150
154
|
|
151
155
|
check = re.search(r'/public/|/bot/|Googlebot|msnbot|bingbot|applebot|SemrushBot|AhrefsBot|MJ12bot|AdsBot|DotBot|istellabot|Twitterbot|YandexMobileBot|/bots|/bot.html|robots.txt', fline)
|
10
文章の変更
test
CHANGED
File without changes
|
test
CHANGED
@@ -60,7 +60,7 @@
|
|
60
60
|
|
61
61
|
|
62
62
|
|
63
|
-
|
63
|
+
```
|
64
64
|
|
65
65
|
|
66
66
|
|
9
内容の追記
test
CHANGED
File without changes
|
test
CHANGED
@@ -41,6 +41,26 @@
|
|
41
41
|
|
42
42
|
|
43
43
|
```
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
**現在の表示結果**
|
48
|
+
|
49
|
+
```txt
|
50
|
+
|
51
|
+
#log
|
52
|
+
|
53
|
+
123.hoge.jp - - [01/Sep/2018:02:00:03 +0900] "GET /index.php?&q=1 HTTP/1.1" 200 622 "https://www.hoge.com" "Mozilla/5.0 (iPhone; CPU iPhone OS 11_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.0 Mobile/15E148 Safari/604.1"
|
54
|
+
|
55
|
+
↓
|
56
|
+
|
57
|
+
#tsv
|
58
|
+
|
59
|
+
123.hoge.jp~-~-~01/Sep/2018:02:00:03 +0900~GET~/index.php?&q=1~200~622~https://www.hoge.com~Mozilla/5.0~(iPhone;~CPU~iPhone~OS~11_4_1~like~Mac~OS~X)~AppleWebKit/605.1.15~(KHTML,~like~Gecko)~Version/11.0~Mobile/15E148~Safari/604.1"
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
'''
|
44
64
|
|
45
65
|
|
46
66
|
|
@@ -94,6 +114,24 @@
|
|
94
114
|
|
95
115
|
|
96
116
|
|
117
|
+
def conv_line(l):
|
118
|
+
|
119
|
+
cols = l.split(' ') # まずは無条件で空白で分割する
|
120
|
+
|
121
|
+
dst = cols[:3]
|
122
|
+
|
123
|
+
dst += [cols[3].lstrip('[')+' '+cols[4].rstrip(']')] # 04/Sep/2018:20:38:28 +0900
|
124
|
+
|
125
|
+
dst += [cols[5].lstrip('"')] # GET
|
126
|
+
|
127
|
+
dst += [cols[6].rstrip('"')] # /index.php?&q=type%3Apiyo
|
128
|
+
|
129
|
+
dst += [col.strip('"') for col in cols[8:]]
|
130
|
+
|
131
|
+
return '~'.join(dst)
|
132
|
+
|
133
|
+
|
134
|
+
|
97
135
|
fr = bz2.open(r'C:\Users\user\Desktop\bz2\access_log.bz2', 'rt')
|
98
136
|
|
99
137
|
fw = codecs.open(r'C:\Users\user\Desktop\bz2\access_log.bz2.tsv', 'w', encoding='utf-8')
|
@@ -108,7 +146,7 @@
|
|
108
146
|
|
109
147
|
while line:
|
110
148
|
|
111
|
-
fline =
|
149
|
+
fline = conv_line(line)
|
112
150
|
|
113
151
|
check = re.search(r'/public/|/bot/|Googlebot|msnbot|bingbot|applebot|SemrushBot|AhrefsBot|MJ12bot|AdsBot|DotBot|istellabot|Twitterbot|YandexMobileBot|/bots|/bot.html|robots.txt', fline)
|
114
152
|
|
8
文章の変更
test
CHANGED
File without changes
|
test
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
|
4
4
|
|
5
|
-
**
|
5
|
+
**解決したい問題**
|
6
6
|
|
7
7
|
%r(リクエストの最初の行)に「"」が含まれていても対応できる方法をご教授いただきたいです。
|
8
8
|
|
7
内容の追記
test
CHANGED
File without changes
|
test
CHANGED
@@ -1,4 +1,10 @@
|
|
1
1
|
件名の通りです。対処方法をご教授いただけませんでしょうか?よろしくお願いいたします。
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
***解決したい問題***
|
6
|
+
|
7
|
+
%r(リクエストの最初の行)に「"」が含まれていても対応できる方法をご教授いただきたいです。
|
2
8
|
|
3
9
|
|
4
10
|
|
6
内容の追記
test
CHANGED
File without changes
|
test
CHANGED
@@ -37,6 +37,26 @@
|
|
37
37
|
```
|
38
38
|
|
39
39
|
|
40
|
+
|
41
|
+
**分割ルール(取得したいテキスト)**
|
42
|
+
|
43
|
+
* %h(クライアントのホスト名)そのまま
|
44
|
+
|
45
|
+
* %l(リモートログ名)そのまま
|
46
|
+
|
47
|
+
* %u(Basic認証のユーザー名)そのまま
|
48
|
+
|
49
|
+
* %t(リクエスト時刻)[中身]の中身を取得[]は不要
|
50
|
+
|
51
|
+
* %r(リクエストの最初の行)HTTPメソッドとリクエストURLに分割、HTTP○○(HTTPS○○)は不要
|
52
|
+
|
53
|
+
* %>s(HTTPステータス)そのまま
|
54
|
+
|
55
|
+
* %b(レスポンスのバイト数)そのまま
|
56
|
+
|
57
|
+
* %{Referer}i(リファラ)そのまま
|
58
|
+
|
59
|
+
* %{User-Agent}i(ユーザーエージェント)そのまま
|
40
60
|
|
41
61
|
|
42
62
|
|
5
pythonコードの修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -82,7 +82,7 @@
|
|
82
82
|
|
83
83
|
while line:
|
84
84
|
|
85
|
-
fline = re.sub(r'^(\S+) (\S+) (\S+) [([^]]+)] "([A-Z]+) ([^ "]+)? HTTP/[0-9.]+" ([0-9]{3}) ([0-9]+|-) "([^\"]*)" "([^\"]*)"', r'\1~\2~\3~\4~\5~\6~\7~\8~\9~\10', line)
|
85
|
+
fline = re.sub(r'^(\S+) (\S+) (\S+) [([^]]+)] "([A-Z]+) ([^ "]+)? (?:HTTP|HTTPS)/[0-9.]+" ([0-9]{3}) ([0-9]+|-) "([^\"]*)" "([^\"]*)"', r'\1~\2~\3~\4~\5~\6~\7~\8~\9~\10', line)
|
86
86
|
|
87
87
|
check = re.search(r'/public/|/bot/|Googlebot|msnbot|bingbot|applebot|SemrushBot|AhrefsBot|MJ12bot|AdsBot|DotBot|istellabot|Twitterbot|YandexMobileBot|/bots|/bot.html|robots.txt', fline)
|
88
88
|
|
4
pythonコードの修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -82,7 +82,7 @@
|
|
82
82
|
|
83
83
|
while line:
|
84
84
|
|
85
|
-
fline = re.sub(r'^(\S+) (\S+) (\S+) [([^]]+)] "([A-Z]+) ([^ "]+)? HTTP
|
85
|
+
fline = re.sub(r'^(\S+) (\S+) (\S+) [([^]]+)] "([A-Z]+) ([^ "]+)? HTTP/[0-9.]+" ([0-9]{3}) ([0-9]+|-) "([^\"]*)" "([^\"]*)"', r'\1~\2~\3~\4~\5~\6~\7~\8~\9~\10', line)
|
86
86
|
|
87
87
|
check = re.search(r'/public/|/bot/|Googlebot|msnbot|bingbot|applebot|SemrushBot|AhrefsBot|MJ12bot|AdsBot|DotBot|istellabot|Twitterbot|YandexMobileBot|/bots|/bot.html|robots.txt', fline)
|
88
88
|
|
3
pythonコードの修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -82,7 +82,7 @@
|
|
82
82
|
|
83
83
|
while line:
|
84
84
|
|
85
|
-
fline = re.sub(r'^(\S+) (\S+) (\S+) [([^]]+)] "([A-Z]+) ([^ "]+)? HTTP|S/[0-9.]+" ([0-9]{3}) ([0-9]+|-) "([^\"]*)" "([^\"]*)"', r'\1~\2~\3~\4~\5~\6~\7~\8~\9~\10', line)
|
85
|
+
fline = re.sub(r'^(\S+) (\S+) (\S+) [([^]]+)] "([A-Z]+) ([^ "]+)? HTTP|HTTPS/[0-9.]+" ([0-9]{3}) ([0-9]+|-) "([^\"]*)" "([^\"]*)"', r'\1~\2~\3~\4~\5~\6~\7~\8~\9~\10', line)
|
86
86
|
|
87
87
|
check = re.search(r'/public/|/bot/|Googlebot|msnbot|bingbot|applebot|SemrushBot|AhrefsBot|MJ12bot|AdsBot|DotBot|istellabot|Twitterbot|YandexMobileBot|/bots|/bot.html|robots.txt', fline)
|
88
88
|
|
2
pythonコードの修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -82,7 +82,7 @@
|
|
82
82
|
|
83
83
|
while line:
|
84
84
|
|
85
|
-
fline = re.sub(r'^(\S+) (\S+) (\S+) [([^]]+)] "([A-Z]+) ([^ "]+)? HTTP/[0-9.]+" ([0-9]{3}) ([0-9]+|-) "([^\"]*)" "([^\"]*)"', r'\1~\2~\3~\4~\5~\6~\7~\8~\9~\10', line)
|
85
|
+
fline = re.sub(r'^(\S+) (\S+) (\S+) [([^]]+)] "([A-Z]+) ([^ "]+)? HTTP|S/[0-9.]+" ([0-9]{3}) ([0-9]+|-) "([^\"]*)" "([^\"]*)"', r'\1~\2~\3~\4~\5~\6~\7~\8~\9~\10', line)
|
86
86
|
|
87
87
|
check = re.search(r'/public/|/bot/|Googlebot|msnbot|bingbot|applebot|SemrushBot|AhrefsBot|MJ12bot|AdsBot|DotBot|istellabot|Twitterbot|YandexMobileBot|/bots|/bot.html|robots.txt', fline)
|
88
88
|
|
1
内容の追記
test
CHANGED
File without changes
|
test
CHANGED
@@ -21,6 +21,22 @@
|
|
21
21
|
|
22
22
|
|
23
23
|
```
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
**得たい結果**
|
28
|
+
|
29
|
+
```tsv
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
hoge.fuga.com~-~-~04/Sep/2018:20:38:28 +0900~GET~/index.php?&q=type%3Apiyo~200~144155~-~-
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
```
|
38
|
+
|
39
|
+
|
24
40
|
|
25
41
|
|
26
42
|
|
@@ -60,11 +76,13 @@
|
|
60
76
|
|
61
77
|
line = fr.readline()
|
62
78
|
|
79
|
+
line = line.replace('~', '')
|
80
|
+
|
63
81
|
|
64
82
|
|
65
83
|
while line:
|
66
84
|
|
67
|
-
fline = re.sub(r'^(\S+) (\S+) (\S+)
|
85
|
+
fline = re.sub(r'^(\S+) (\S+) (\S+) [([^]]+)] "([A-Z]+) ([^ "]+)? HTTP/[0-9.]+" ([0-9]{3}) ([0-9]+|-) "([^\"]*)" "([^\"]*)"', r'\1~\2~\3~\4~\5~\6~\7~\8~\9~\10', line)
|
68
86
|
|
69
87
|
check = re.search(r'/public/|/bot/|Googlebot|msnbot|bingbot|applebot|SemrushBot|AhrefsBot|MJ12bot|AdsBot|DotBot|istellabot|Twitterbot|YandexMobileBot|/bots|/bot.html|robots.txt', fline)
|
70
88
|
|
@@ -73,6 +91,8 @@
|
|
73
91
|
fw.write(fline)
|
74
92
|
|
75
93
|
line = fr.readline()
|
94
|
+
|
95
|
+
line = line.replace('~', '')
|
76
96
|
|
77
97
|
fr.close()
|
78
98
|
|