質問編集履歴
5
エラーメッセージを追記しました
title
CHANGED
File without changes
|
body
CHANGED
@@ -1,12 +1,6 @@
|
|
1
1
|
フォルダに格納されたPDFファイル(test1~test10の10ファイル)のプロパティ(タイトルとファイル作成者)にエクセルファイルに入力されたデータを入力したいと考えています。
|
2
2
|
|
3
|
-
以下のようなコードを用いることにより処理をしようと考えているのですが、
|
3
|
+
以下のようなコードを用いることにより処理をしようと考えているのですが、PDFファイルによって以下のようなエラーが生じ、エラーが生じたファイルが壊れて(0KBとなり読み込めなくなって)しまいます。
|
4
|
-
|
5
|
-
問題1.PDFファイルが保存されていない場合(例えばtest2.pdfがフォルダに格納されていない場合)、エラーが生じるのではなく、その部分の処理をスキップしたい
|
6
|
-
|
7
|
-
問題2.return NameObject(name,decode('utf-8'))というエラーが生じてしまう(格納されたPDFファイルの中にutf-8とそうでないものとが混在している?)。
|
8
|
-
⇒ファイルによってはエラーが生じない(セキュリティに違いがなくても)
|
9
|
-
|
10
4
|

|
11
5
|
```Python
|
12
6
|
import PyPDF2
|
@@ -30,4 +24,48 @@
|
|
30
24
|
|
31
25
|
with open('Desktop\テスト/'+path,'wb') as f:
|
32
26
|
dst_pdf.write(f)
|
33
|
-
```
|
27
|
+
```
|
28
|
+
|
29
|
+
エラーメッセージ
|
30
|
+
PdfReadError Traceback (most recent call last)
|
31
|
+
<ipython-input-22-2eb6e479cf31> in <module>
|
32
|
+
22
|
33
|
+
23 with open('Desktop\テスト/'+path,'wb') as f:
|
34
|
+
---> 24 dst_pdf.write(f)
|
35
|
+
|
36
|
+
~\Anaconda3\lib\site-packages\PyPDF2\pdf.py in write(self, stream)
|
37
|
+
480 self.stack = []
|
38
|
+
481 if debug: print(("ERM:", externalReferenceMap, "root:", self._root))
|
39
|
+
--> 482 self._sweepIndirectReferences(externalReferenceMap, self._root)
|
40
|
+
483 del self.stack
|
41
|
+
484
|
42
|
+
|
43
|
+
~\Anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
44
|
+
569 self.stack.append(data.idnum)
|
45
|
+
570 realdata = self.getObject(data)
|
46
|
+
--> 571 self._sweepIndirectReferences(externMap, realdata)
|
47
|
+
572 return data
|
48
|
+
573 else:
|
49
|
+
|
50
|
+
~\Anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
51
|
+
545 for key, value in list(data.items()):
|
52
|
+
546 origvalue = value
|
53
|
+
--> 547 value = self._sweepIndirectReferences(externMap, value)
|
54
|
+
548 if isinstance(value, StreamObject):
|
55
|
+
549 # a dictionary value is a stream. streams must be indirect
|
56
|
+
|
57
|
+
~\Anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
58
|
+
575 if newobj == None:
|
59
|
+
576 try:
|
60
|
+
--> 577 newobj = data.pdf.getObject(data)
|
61
|
+
578 self._objects.append(None) # placeholder
|
62
|
+
579 idnum = len(self._objects)
|
63
|
+
|
64
|
+
~\Anaconda3\lib\site-packages\PyPDF2\pdf.py in getObject(self, indirectReference)
|
65
|
+
1629 indirectReference.generation), utils.PdfReadWarning)
|
66
|
+
1630 #if self.strict:
|
67
|
+
-> 1631 raise utils.PdfReadError("Could not find object.")
|
68
|
+
1632 self.cacheIndirectObject(indirectReference.generation,
|
69
|
+
1633 indirectReference.idnum, retval)
|
70
|
+
|
71
|
+
PdfReadError: Could not find object.
|
4
問題2について修正しました
title
CHANGED
File without changes
|
body
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
問題1.PDFファイルが保存されていない場合(例えばtest2.pdfがフォルダに格納されていない場合)、エラーが生じるのではなく、その部分の処理をスキップしたい
|
6
6
|
|
7
7
|
問題2.return NameObject(name,decode('utf-8'))というエラーが生じてしまう(格納されたPDFファイルの中にutf-8とそうでないものとが混在している?)。
|
8
|
-
⇒
|
8
|
+
⇒ファイルによってはエラーが生じない(セキュリティに違いがなくても)
|
9
9
|
|
10
10
|

|
11
11
|
```Python
|
3
問題2について解決した旨を追記
title
CHANGED
File without changes
|
body
CHANGED
@@ -5,6 +5,8 @@
|
|
5
5
|
問題1.PDFファイルが保存されていない場合(例えばtest2.pdfがフォルダに格納されていない場合)、エラーが生じるのではなく、その部分の処理をスキップしたい
|
6
6
|
|
7
7
|
問題2.return NameObject(name,decode('utf-8'))というエラーが生じてしまう(格納されたPDFファイルの中にutf-8とそうでないものとが混在している?)。
|
8
|
+
⇒解決。セキュリティのかかっているファイルが混在しているだけでした。
|
9
|
+
|
8
10
|

|
9
11
|
```Python
|
10
12
|
import PyPDF2
|
@@ -28,134 +30,4 @@
|
|
28
30
|
|
29
31
|
with open('Desktop\テスト/'+path,'wb') as f:
|
30
32
|
dst_pdf.write(f)
|
31
|
-
```
|
33
|
+
```
|
32
|
-
以下エラーメッセージになります。
|
33
|
-
UnicodeDecodeError Traceback (most recent call last)
|
34
|
-
~\anaconda3\lib\site-packages\PyPDF2\generic.py in readFromStream(stream, pdf)
|
35
|
-
483 try:
|
36
|
-
--> 484 return NameObject(name.decode('utf-8'))
|
37
|
-
485 except (UnicodeEncodeError, UnicodeDecodeError) as e:
|
38
|
-
|
39
|
-
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x82 in position 8: invalid start byte
|
40
|
-
|
41
|
-
During handling of the above exception, another exception occurred:
|
42
|
-
|
43
|
-
PdfReadError Traceback (most recent call last)
|
44
|
-
<ipython-input-95-1c3d6b0830f5> in <module>
|
45
|
-
18
|
46
|
-
19 with open('Desktop\テスト/'+path,'wb') as f:
|
47
|
-
---> 20 dst_pdf.write(f)
|
48
|
-
|
49
|
-
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in write(self, stream)
|
50
|
-
480 self.stack = []
|
51
|
-
481 if debug: print(("ERM:", externalReferenceMap, "root:", self._root))
|
52
|
-
--> 482 self._sweepIndirectReferences(externalReferenceMap, self._root)
|
53
|
-
483 del self.stack
|
54
|
-
484
|
55
|
-
|
56
|
-
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
57
|
-
569 self.stack.append(data.idnum)
|
58
|
-
570 realdata = self.getObject(data)
|
59
|
-
--> 571 self._sweepIndirectReferences(externMap, realdata)
|
60
|
-
572 return data
|
61
|
-
573 else:
|
62
|
-
|
63
|
-
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
64
|
-
545 for key, value in list(data.items()):
|
65
|
-
546 origvalue = value
|
66
|
-
--> 547 value = self._sweepIndirectReferences(externMap, value)
|
67
|
-
548 if isinstance(value, StreamObject):
|
68
|
-
549 # a dictionary value is a stream. streams must be indirect
|
69
|
-
|
70
|
-
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
71
|
-
584 externMap[data.pdf][data.generation] = {}
|
72
|
-
585 externMap[data.pdf][data.generation][data.idnum] = newobj_ido
|
73
|
-
--> 586 newobj = self._sweepIndirectReferences(externMap, newobj)
|
74
|
-
587 self._objects[idnum-1] = newobj
|
75
|
-
588 return newobj_ido
|
76
|
-
|
77
|
-
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
78
|
-
545 for key, value in list(data.items()):
|
79
|
-
546 origvalue = value
|
80
|
-
--> 547 value = self._sweepIndirectReferences(externMap, value)
|
81
|
-
548 if isinstance(value, StreamObject):
|
82
|
-
549 # a dictionary value is a stream. streams must be indirect
|
83
|
-
|
84
|
-
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
85
|
-
554 elif isinstance(data, ArrayObject):
|
86
|
-
555 for i in range(len(data)):
|
87
|
-
--> 556 value = self._sweepIndirectReferences(externMap, data[i])
|
88
|
-
557 if isinstance(value, StreamObject):
|
89
|
-
558 # an array value is a stream. streams must be indirect
|
90
|
-
|
91
|
-
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
92
|
-
584 externMap[data.pdf][data.generation] = {}
|
93
|
-
585 externMap[data.pdf][data.generation][data.idnum] = newobj_ido
|
94
|
-
--> 586 newobj = self._sweepIndirectReferences(externMap, newobj)
|
95
|
-
587 self._objects[idnum-1] = newobj
|
96
|
-
588 return newobj_ido
|
97
|
-
|
98
|
-
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
99
|
-
545 for key, value in list(data.items()):
|
100
|
-
546 origvalue = value
|
101
|
-
--> 547 value = self._sweepIndirectReferences(externMap, value)
|
102
|
-
548 if isinstance(value, StreamObject):
|
103
|
-
549 # a dictionary value is a stream. streams must be indirect
|
104
|
-
|
105
|
-
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
106
|
-
545 for key, value in list(data.items()):
|
107
|
-
546 origvalue = value
|
108
|
-
--> 547 value = self._sweepIndirectReferences(externMap, value)
|
109
|
-
548 if isinstance(value, StreamObject):
|
110
|
-
549 # a dictionary value is a stream. streams must be indirect
|
111
|
-
|
112
|
-
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
113
|
-
545 for key, value in list(data.items()):
|
114
|
-
546 origvalue = value
|
115
|
-
--> 547 value = self._sweepIndirectReferences(externMap, value)
|
116
|
-
548 if isinstance(value, StreamObject):
|
117
|
-
549 # a dictionary value is a stream. streams must be indirect
|
118
|
-
|
119
|
-
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
120
|
-
575 if newobj == None:
|
121
|
-
576 try:
|
122
|
-
--> 577 newobj = data.pdf.getObject(data)
|
123
|
-
578 self._objects.append(None) # placeholder
|
124
|
-
579 idnum = len(self._objects)
|
125
|
-
|
126
|
-
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in getObject(self, indirectReference)
|
127
|
-
1609 % (indirectReference.idnum, indirectReference.generation, idnum, generation))
|
128
|
-
1610 assert generation == indirectReference.generation
|
129
|
-
-> 1611 retval = readObject(self.stream, self)
|
130
|
-
1612
|
131
|
-
1613 # override encryption is used for the /Encrypt dictionary
|
132
|
-
|
133
|
-
~\anaconda3\lib\site-packages\PyPDF2\generic.py in readObject(stream, pdf)
|
134
|
-
64 stream.seek(-2, 1) # reset to start
|
135
|
-
65 if peek == b_('<<'):
|
136
|
-
---> 66 return DictionaryObject.readFromStream(stream, pdf)
|
137
|
-
67 else:
|
138
|
-
68 return readHexStringFromStream(stream)
|
139
|
-
|
140
|
-
~\anaconda3\lib\site-packages\PyPDF2\generic.py in readFromStream(stream, pdf)
|
141
|
-
577 tok = readNonWhitespace(stream)
|
142
|
-
578 stream.seek(-1, 1)
|
143
|
-
--> 579 value = readObject(stream, pdf)
|
144
|
-
580 if not data.get(key):
|
145
|
-
581 data[key] = value
|
146
|
-
|
147
|
-
~\anaconda3\lib\site-packages\PyPDF2\generic.py in readObject(stream, pdf)
|
148
|
-
58 if idx == 0:
|
149
|
-
59 # name object
|
150
|
-
---> 60 return NameObject.readFromStream(stream, pdf)
|
151
|
-
61 elif idx == 1:
|
152
|
-
62 # hexadecimal string OR dictionary
|
153
|
-
|
154
|
-
~\anaconda3\lib\site-packages\PyPDF2\generic.py in readFromStream(stream, pdf)
|
155
|
-
490 return NameObject(name)
|
156
|
-
491 else:
|
157
|
-
--> 492 raise utils.PdfReadError("Illegal character in Name Object")
|
158
|
-
493
|
159
|
-
494 readFromStream = staticmethod(readFromStream)
|
160
|
-
|
161
|
-
PdfReadError: Illegal character in Name Object
|
2
エラーメッセージを追記しました
title
CHANGED
File without changes
|
body
CHANGED
@@ -28,4 +28,134 @@
|
|
28
28
|
|
29
29
|
with open('Desktop\テスト/'+path,'wb') as f:
|
30
30
|
dst_pdf.write(f)
|
31
|
-
```
|
31
|
+
```
|
32
|
+
以下エラーメッセージになります。
|
33
|
+
UnicodeDecodeError Traceback (most recent call last)
|
34
|
+
~\anaconda3\lib\site-packages\PyPDF2\generic.py in readFromStream(stream, pdf)
|
35
|
+
483 try:
|
36
|
+
--> 484 return NameObject(name.decode('utf-8'))
|
37
|
+
485 except (UnicodeEncodeError, UnicodeDecodeError) as e:
|
38
|
+
|
39
|
+
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x82 in position 8: invalid start byte
|
40
|
+
|
41
|
+
During handling of the above exception, another exception occurred:
|
42
|
+
|
43
|
+
PdfReadError Traceback (most recent call last)
|
44
|
+
<ipython-input-95-1c3d6b0830f5> in <module>
|
45
|
+
18
|
46
|
+
19 with open('Desktop\テスト/'+path,'wb') as f:
|
47
|
+
---> 20 dst_pdf.write(f)
|
48
|
+
|
49
|
+
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in write(self, stream)
|
50
|
+
480 self.stack = []
|
51
|
+
481 if debug: print(("ERM:", externalReferenceMap, "root:", self._root))
|
52
|
+
--> 482 self._sweepIndirectReferences(externalReferenceMap, self._root)
|
53
|
+
483 del self.stack
|
54
|
+
484
|
55
|
+
|
56
|
+
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
57
|
+
569 self.stack.append(data.idnum)
|
58
|
+
570 realdata = self.getObject(data)
|
59
|
+
--> 571 self._sweepIndirectReferences(externMap, realdata)
|
60
|
+
572 return data
|
61
|
+
573 else:
|
62
|
+
|
63
|
+
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
64
|
+
545 for key, value in list(data.items()):
|
65
|
+
546 origvalue = value
|
66
|
+
--> 547 value = self._sweepIndirectReferences(externMap, value)
|
67
|
+
548 if isinstance(value, StreamObject):
|
68
|
+
549 # a dictionary value is a stream. streams must be indirect
|
69
|
+
|
70
|
+
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
71
|
+
584 externMap[data.pdf][data.generation] = {}
|
72
|
+
585 externMap[data.pdf][data.generation][data.idnum] = newobj_ido
|
73
|
+
--> 586 newobj = self._sweepIndirectReferences(externMap, newobj)
|
74
|
+
587 self._objects[idnum-1] = newobj
|
75
|
+
588 return newobj_ido
|
76
|
+
|
77
|
+
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
78
|
+
545 for key, value in list(data.items()):
|
79
|
+
546 origvalue = value
|
80
|
+
--> 547 value = self._sweepIndirectReferences(externMap, value)
|
81
|
+
548 if isinstance(value, StreamObject):
|
82
|
+
549 # a dictionary value is a stream. streams must be indirect
|
83
|
+
|
84
|
+
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
85
|
+
554 elif isinstance(data, ArrayObject):
|
86
|
+
555 for i in range(len(data)):
|
87
|
+
--> 556 value = self._sweepIndirectReferences(externMap, data[i])
|
88
|
+
557 if isinstance(value, StreamObject):
|
89
|
+
558 # an array value is a stream. streams must be indirect
|
90
|
+
|
91
|
+
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
92
|
+
584 externMap[data.pdf][data.generation] = {}
|
93
|
+
585 externMap[data.pdf][data.generation][data.idnum] = newobj_ido
|
94
|
+
--> 586 newobj = self._sweepIndirectReferences(externMap, newobj)
|
95
|
+
587 self._objects[idnum-1] = newobj
|
96
|
+
588 return newobj_ido
|
97
|
+
|
98
|
+
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
99
|
+
545 for key, value in list(data.items()):
|
100
|
+
546 origvalue = value
|
101
|
+
--> 547 value = self._sweepIndirectReferences(externMap, value)
|
102
|
+
548 if isinstance(value, StreamObject):
|
103
|
+
549 # a dictionary value is a stream. streams must be indirect
|
104
|
+
|
105
|
+
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
106
|
+
545 for key, value in list(data.items()):
|
107
|
+
546 origvalue = value
|
108
|
+
--> 547 value = self._sweepIndirectReferences(externMap, value)
|
109
|
+
548 if isinstance(value, StreamObject):
|
110
|
+
549 # a dictionary value is a stream. streams must be indirect
|
111
|
+
|
112
|
+
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
113
|
+
545 for key, value in list(data.items()):
|
114
|
+
546 origvalue = value
|
115
|
+
--> 547 value = self._sweepIndirectReferences(externMap, value)
|
116
|
+
548 if isinstance(value, StreamObject):
|
117
|
+
549 # a dictionary value is a stream. streams must be indirect
|
118
|
+
|
119
|
+
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in _sweepIndirectReferences(self, externMap, data)
|
120
|
+
575 if newobj == None:
|
121
|
+
576 try:
|
122
|
+
--> 577 newobj = data.pdf.getObject(data)
|
123
|
+
578 self._objects.append(None) # placeholder
|
124
|
+
579 idnum = len(self._objects)
|
125
|
+
|
126
|
+
~\anaconda3\lib\site-packages\PyPDF2\pdf.py in getObject(self, indirectReference)
|
127
|
+
1609 % (indirectReference.idnum, indirectReference.generation, idnum, generation))
|
128
|
+
1610 assert generation == indirectReference.generation
|
129
|
+
-> 1611 retval = readObject(self.stream, self)
|
130
|
+
1612
|
131
|
+
1613 # override encryption is used for the /Encrypt dictionary
|
132
|
+
|
133
|
+
~\anaconda3\lib\site-packages\PyPDF2\generic.py in readObject(stream, pdf)
|
134
|
+
64 stream.seek(-2, 1) # reset to start
|
135
|
+
65 if peek == b_('<<'):
|
136
|
+
---> 66 return DictionaryObject.readFromStream(stream, pdf)
|
137
|
+
67 else:
|
138
|
+
68 return readHexStringFromStream(stream)
|
139
|
+
|
140
|
+
~\anaconda3\lib\site-packages\PyPDF2\generic.py in readFromStream(stream, pdf)
|
141
|
+
577 tok = readNonWhitespace(stream)
|
142
|
+
578 stream.seek(-1, 1)
|
143
|
+
--> 579 value = readObject(stream, pdf)
|
144
|
+
580 if not data.get(key):
|
145
|
+
581 data[key] = value
|
146
|
+
|
147
|
+
~\anaconda3\lib\site-packages\PyPDF2\generic.py in readObject(stream, pdf)
|
148
|
+
58 if idx == 0:
|
149
|
+
59 # name object
|
150
|
+
---> 60 return NameObject.readFromStream(stream, pdf)
|
151
|
+
61 elif idx == 1:
|
152
|
+
62 # hexadecimal string OR dictionary
|
153
|
+
|
154
|
+
~\anaconda3\lib\site-packages\PyPDF2\generic.py in readFromStream(stream, pdf)
|
155
|
+
490 return NameObject(name)
|
156
|
+
491 else:
|
157
|
+
--> 492 raise utils.PdfReadError("Illegal character in Name Object")
|
158
|
+
493
|
159
|
+
494 readFromStream = staticmethod(readFromStream)
|
160
|
+
|
161
|
+
PdfReadError: Illegal character in Name Object
|
1
タイトルを修正しました
title
CHANGED
@@ -1,1 +1,1 @@
|
|
1
|
-
PDFファイルのプロパティにExcelファイルの情報を入力したい
|
1
|
+
PythonでPDFファイルのプロパティにExcelファイルの情報を入力したい
|
body
CHANGED
File without changes
|