回答編集履歴
2
サンプルコード追記
test
CHANGED
@@ -1,3 +1,101 @@
|
|
1
|
+
##追記
|
2
|
+
|
3
|
+
なるべく質問コードの変数名を使ったサンプルコードです。
|
4
|
+
|
5
|
+
あとはご自分でアレンジしてみてください。
|
6
|
+
|
7
|
+
```Python3
|
8
|
+
|
9
|
+
import csv
|
10
|
+
|
11
|
+
import re # Regular expression
|
12
|
+
|
13
|
+
import pandas as pd
|
14
|
+
|
15
|
+
path_in = r'C:\workspace\Python\memo\test\'
|
16
|
+
|
17
|
+
file_in = r'test.csv'
|
18
|
+
|
19
|
+
f_in = path_in + file_in
|
20
|
+
|
21
|
+
reg = re.compile('[亜-熙ぁ-んァ-ヶ]+')
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
csv_ori = pd.read_csv(f_in, sep=",")
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
dst = []
|
30
|
+
|
31
|
+
for index in csv_ori.iterrows():
|
32
|
+
|
33
|
+
print(index[1][1])
|
34
|
+
|
35
|
+
re_csv = re.sub(reg, '', index[1][1])
|
36
|
+
|
37
|
+
print(re_csv)
|
38
|
+
|
39
|
+
dst.append([index[1][0],re_csv])
|
40
|
+
|
41
|
+
print(dst)
|
42
|
+
|
43
|
+
```
|
44
|
+
|
45
|
+
```
|
46
|
+
|
47
|
+
Hi
|
48
|
+
|
49
|
+
Hi
|
50
|
+
|
51
|
+
World
|
52
|
+
|
53
|
+
World
|
54
|
+
|
55
|
+
Hello
|
56
|
+
|
57
|
+
Hello
|
58
|
+
|
59
|
+
こんにちは
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
Python
|
64
|
+
|
65
|
+
Python
|
66
|
+
|
67
|
+
きょう
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
あゆむ
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
Sato
|
76
|
+
|
77
|
+
Sato
|
78
|
+
|
79
|
+
Flower
|
80
|
+
|
81
|
+
Flower
|
82
|
+
|
83
|
+
class
|
84
|
+
|
85
|
+
class
|
86
|
+
|
87
|
+
find
|
88
|
+
|
89
|
+
find
|
90
|
+
|
91
|
+
[[1, 'Hi'], [2, 'World'], [3, 'Hello'], [5, ''], [4, 'Python'], [2, ''], [1, ''], [3, 'Sato'], [5, 'Flower'], [4, 'class'], [3, 'find']]
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
```
|
96
|
+
|
97
|
+
|
98
|
+
|
1
99
|
正規表現 日本語 で検索したら最初にヒットしましたのでご参考まで。
|
2
100
|
|
3
101
|
|
1
サンプルコード追記
test
CHANGED
@@ -11,3 +11,53 @@
|
|
11
11
|
|
12
12
|
|
13
13
|
![イメージ説明](69ebd563536a14453f558de893086bdf.png)
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
同じデータから日本語を含むデータ以外をリストにするサンプルコード
|
18
|
+
|
19
|
+
```Python3
|
20
|
+
|
21
|
+
import re
|
22
|
+
|
23
|
+
path_in = r'C:\workspace\Python\memo\test\'
|
24
|
+
|
25
|
+
file_in = r'test.csv'
|
26
|
+
|
27
|
+
reg = re.compile('[亜-熙ぁ-んァ-ヶ]+')
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
with open(path_in+file_in, "r",encoding="utf-8") as open_file:
|
32
|
+
|
33
|
+
src = [line.split(',') for line in (open_file.read()).split("\n")]
|
34
|
+
|
35
|
+
print(src)
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
dst = []
|
40
|
+
|
41
|
+
for line in src:
|
42
|
+
|
43
|
+
m = re.search(reg,line[1])
|
44
|
+
|
45
|
+
if not m:
|
46
|
+
|
47
|
+
dst.append(line)
|
48
|
+
|
49
|
+
print(dst)
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
```
|
54
|
+
|
55
|
+
```
|
56
|
+
|
57
|
+
[['user_id', 'comment'], ['1', 'Hi'], ['2', 'World'], ['3', 'Hello'], ['5', 'こんにちは'], ['4', 'Python'], ['2', 'きょう'], ['1', 'あゆむ'], ['3', 'Sato'], ['5', 'Flower'], ['4', 'class'], ['3', 'find']]
|
58
|
+
|
59
|
+
[['user_id', 'comment'], ['1', 'Hi'], ['2', 'World'], ['3', 'Hello'], ['4', 'Python'], ['3', 'Sato'], ['5', 'Flower'], ['4', 'class'], ['3', 'find']]
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
```
|