質問編集履歴

2

コードの記載

2021/04/21 04:21

投稿

snake2009
snake2009

スコア12

test CHANGED
File without changes
test CHANGED
@@ -9,6 +9,152 @@
9
9
 
10
10
 
11
11
  https://cloud.google.com/vision/docs/pdf#vision_text_detection_pdf_gcs-python
12
+
13
+ ```python
14
+
15
+ def async_detect_document(gcs_source_uri, gcs_destination_uri):
16
+
17
+ """OCR with PDF/TIFF as source files on GCS"""
18
+
19
+ import json
20
+
21
+ import re
22
+
23
+ from google.cloud import vision
24
+
25
+ from google.cloud import storage
26
+
27
+
28
+
29
+ # Supported mime_types are: 'application/pdf' and 'image/tiff'
30
+
31
+ mime_type = 'application/pdf'
32
+
33
+
34
+
35
+ # How many pages should be grouped into each json output file.
36
+
37
+ batch_size = 2
38
+
39
+
40
+
41
+ client = vision.ImageAnnotatorClient()
42
+
43
+
44
+
45
+ feature = vision.Feature(
46
+
47
+ type_=vision.Feature.Type.DOCUMENT_TEXT_DETECTION)
48
+
49
+
50
+
51
+ gcs_source = vision.GcsSource(uri=gcs_source_uri)
52
+
53
+ input_config = vision.InputConfig(
54
+
55
+ gcs_source=gcs_source, mime_type=mime_type)
56
+
57
+
58
+
59
+ gcs_destination = vision.GcsDestination(uri=gcs_destination_uri)
60
+
61
+ output_config = vision.OutputConfig(
62
+
63
+ gcs_destination=gcs_destination, batch_size=batch_size)
64
+
65
+
66
+
67
+ async_request = vision.AsyncAnnotateFileRequest(
68
+
69
+ features=[feature], input_config=input_config,
70
+
71
+ output_config=output_config)
72
+
73
+
74
+
75
+ operation = client.async_batch_annotate_files(
76
+
77
+ requests=[async_request])
78
+
79
+
80
+
81
+ print('Waiting for the operation to finish.')
82
+
83
+ operation.result(timeout=420)
84
+
85
+
86
+
87
+ # Once the request has completed and the output has been
88
+
89
+ # written to GCS, we can list all the output files.
90
+
91
+ storage_client = storage.Client()
92
+
93
+
94
+
95
+ match = re.match(r'gs://([^/]+)/(.+)', gcs_destination_uri)
96
+
97
+ bucket_name = match.group(1)←ここでエラーになります。
98
+
99
+ prefix = match.group(2)
100
+
101
+
102
+
103
+ bucket = storage_client.get_bucket(bucket_name)
104
+
105
+
106
+
107
+ # List objects with the given prefix.
108
+
109
+ blob_list = list(bucket.list_blobs(prefix=prefix))
110
+
111
+ print('Output files:')
112
+
113
+ for blob in blob_list:
114
+
115
+ print(blob.name)
116
+
117
+
118
+
119
+ # Process the first output file from GCS.
120
+
121
+ # Since we specified batch_size=2, the first response contains
122
+
123
+ # the first two pages of the input file.
124
+
125
+ output = blob_list[0]
126
+
127
+
128
+
129
+ json_string = output.download_as_string()
130
+
131
+ response = json.loads(json_string)
132
+
133
+
134
+
135
+ # The actual response for the first page of the input file.
136
+
137
+ first_page_response = response['responses'][0]
138
+
139
+ annotation = first_page_response['fullTextAnnotation']
140
+
141
+
142
+
143
+ # Here we print the full text from the first page.
144
+
145
+ # The response contains more information:
146
+
147
+ # annotation/pages/blocks/paragraphs/words/symbols
148
+
149
+ # including confidence scores and bounding boxes
150
+
151
+ print('Full text:\n')
152
+
153
+ print(annotation['text'])
154
+
155
+ ```
156
+
157
+
12
158
 
13
159
  (エラー画面)
14
160
 

1

タグの追加

2021/04/21 04:21

投稿

snake2009
snake2009

スコア12

test CHANGED
File without changes
test CHANGED
File without changes