Package vision
Overview ▹
Index ▹
Constants
const (
// Unknown break label type.
UnknownBreak = DetectedBreakType(pb.TextAnnotation_DetectedBreak_UNKNOWN)
// Regular space.
SpaceBreak = DetectedBreakType(pb.TextAnnotation_DetectedBreak_SPACE)
// Sure space (very wide).
SureSpaceBreak = DetectedBreakType(pb.TextAnnotation_DetectedBreak_SURE_SPACE)
// Line-wrapping break.
EOLSureSpaceBreak = DetectedBreakType(pb.TextAnnotation_DetectedBreak_EOL_SURE_SPACE)
// End-line hyphen that is not present in text; does not co-occur with SPACE, LEADER_SPACE, or LINE_BREAK.
HyphenBreak = DetectedBreakType(pb.TextAnnotation_DetectedBreak_HYPHEN)
// Line break that ends a paragraph.
LineBreak = DetectedBreakType(pb.TextAnnotation_DetectedBreak_LINE_BREAK)
)
const (
// LikelihoodUnknown means the likelihood is unknown.
LikelihoodUnknown = Likelihood(pb.Likelihood_UNKNOWN)
// VeryUnlikely means the image is very unlikely to belong to the feature specified.
VeryUnlikely = Likelihood(pb.Likelihood_VERY_UNLIKELY)
// Unlikely means the image is unlikely to belong to the feature specified.
Unlikely = Likelihood(pb.Likelihood_UNLIKELY)
// Possible means the image possibly belongs to the feature specified.
Possible = Likelihood(pb.Likelihood_POSSIBLE)
// Likely means the image is likely to belong to the feature specified.
Likely = Likelihood(pb.Likelihood_LIKELY)
// VeryLikely means the image is very likely to belong to the feature specified.
VeryLikely = Likelihood(pb.Likelihood_VERY_LIKELY)
)
Scope is the OAuth2 scope required by the Google Cloud Vision API.
const Scope = "https://www.googleapis.com/auth/cloud-platform"
type AnnotateRequest ¶
An AnnotateRequest specifies an image to annotate and the features to look for in that image.
type AnnotateRequest struct {
// Image is the image to annotate.
Image *Image
// MaxFaces is the maximum number of faces to detect in the image.
// Specifying a number greater than zero enables face detection.
MaxFaces int
// MaxLandmarks is the maximum number of landmarks to detect in the image.
// Specifying a number greater than zero enables landmark detection.
MaxLandmarks int
// MaxLogos is the maximum number of logos to detect in the image.
// Specifying a number greater than zero enables logo detection.
MaxLogos int
// MaxLabels is the maximum number of labels to detect in the image.
// Specifying a number greater than zero enables labels detection.
MaxLabels int
// MaxTexts is the maximum number of separate pieces of text to detect in the
// image. Specifying a number greater than zero enables text detection.
MaxTexts int
// DocumentText specifies whether a dense text document OCR should be run
// on the image. When true, takes precedence over MaxTexts.
DocumentText bool
// SafeSearch specifies whether a safe-search detection should be run on the image.
SafeSearch bool
// ImageProps specifies whether image properties should be obtained for the image.
ImageProps bool
// Web specifies whether web annotations should be obtained for the image.
Web bool
// CropHints specifies whether crop hints should be computed for the image.
CropHints *CropHintsParams
}
type Annotations ¶
Annotations contains all the annotations performed by the API on a single image. A nil field indicates either that the corresponding feature was not requested, or that annotation failed for that feature.
type Annotations struct {
// Faces holds the results of face detection.
Faces []*FaceAnnotation
// Landmarks holds the results of landmark detection.
Landmarks []*EntityAnnotation
// Logos holds the results of logo detection.
Logos []*EntityAnnotation
// Labels holds the results of label detection.
Labels []*EntityAnnotation
// Texts holds the results of text detection.
Texts []*EntityAnnotation
// FullText holds the results of full text (OCR) detection.
FullText *TextAnnotation
// SafeSearch holds the results of safe-search detection.
SafeSearch *SafeSearchAnnotation
// ImageProps contains properties of the annotated image.
ImageProps *ImageProps
// Web contains web annotations for the image.
Web *WebDetection
// CropHints contains crop hints for the image.
CropHints []*CropHint
// If non-nil, then one or more of the attempted annotations failed.
// Non-nil annotations are guaranteed to be correct, even if Error is
// non-nil.
Error error
}
type Block ¶
A Block is a logical element on the page.
type Block struct {
// Additional information detected for the block.
Properties *TextProperties
// The bounding box for the block.
// The vertices are in the order of top-left, top-right, bottom-right,
// bottom-left. When a rotation of the bounding box is detected the rotation
// is represented as around the top-left corner as defined when the text is
// read in the 'natural' orientation.
// For example:
// * when the text is horizontal it might look like:
// 0----1
// | |
// 3----2
// * when it's rotated 180 degrees around the top-left corner it becomes:
// 2----3
// | |
// 1----0
// and the vertice order will still be (0, 1, 2, 3).
BoundingBox []image.Point
// List of paragraphs in this block (if this blocks is of type text).
Paragraphs []*Paragraph
// Detected block type (text, image etc) for this block.
BlockType BlockType
}
type BlockType ¶
A BlockType represents the kind of Block (text, image, etc.)
type BlockType int
const (
// Unknown block type.
UnknownBlock BlockType = BlockType(pb.Block_UNKNOWN)
// Regular text block.
TextBlock BlockType = BlockType(pb.Block_TEXT)
// Table block.
TableBlock BlockType = BlockType(pb.Block_TABLE)
// Image block.
PictureBlock BlockType = BlockType(pb.Block_PICTURE)
// Horizontal/vertical line box.
RulerBlock BlockType = BlockType(pb.Block_RULER)
// Barcode block.
BarcodeBlock BlockType = BlockType(pb.Block_BARCODE)
)
type Chin ¶
type Chin struct {
Left, Center, Right *r3.Vector
}
type Client ¶
Client is a Google Cloud Vision API client.
type Client struct {
// contains filtered or unexported fields
}
func NewClient ¶
func NewClient(ctx context.Context, opts ...option.ClientOption) (*Client, error)
NewClient creates a new vision client.
▹ Example
func (*Client) Annotate ¶
func (c *Client) Annotate(ctx context.Context, requests ...*AnnotateRequest) ([]*Annotations, error)
Annotate annotates multiple images, each with a potentially different set of features.
▹ Example (OneImage)
func (*Client) Close ¶
func (c *Client) Close() error
Close closes the client.
func (*Client) CropHints ¶
func (c *Client) CropHints(ctx context.Context, img *Image, params *CropHintsParams) ([]*CropHint, error)
CropHints computes crop hints for the image.
func (*Client) DetectDocumentText ¶
func (c *Client) DetectDocumentText(ctx context.Context, img *Image) (*TextAnnotation, error)
DetectDocumentText performs full text (OCR) detection on the image.
func (*Client) DetectFaces ¶
func (c *Client) DetectFaces(ctx context.Context, img *Image, maxResults int) ([]*FaceAnnotation, error)
DetectFaces performs face detection on the image. At most maxResults results are returned.
▹ Example
func (*Client) DetectImageProps ¶
func (c *Client) DetectImageProps(ctx context.Context, img *Image) (*ImageProps, error)
DetectImageProps computes properties of the image.
func (*Client) DetectLabels ¶
func (c *Client) DetectLabels(ctx context.Context, img *Image, maxResults int) ([]*EntityAnnotation, error)
DetectLabels performs label detection on the image. At most maxResults results are returned.
func (*Client) DetectLandmarks ¶
func (c *Client) DetectLandmarks(ctx context.Context, img *Image, maxResults int) ([]*EntityAnnotation, error)
DetectLandmarks performs landmark detection on the image. At most maxResults results are returned.
func (*Client) DetectLogos ¶
func (c *Client) DetectLogos(ctx context.Context, img *Image, maxResults int) ([]*EntityAnnotation, error)
DetectLogos performs logo detection on the image. At most maxResults results are returned.
func (*Client) DetectSafeSearch ¶
func (c *Client) DetectSafeSearch(ctx context.Context, img *Image) (*SafeSearchAnnotation, error)
DetectSafeSearch performs safe-search detection on the image.
func (*Client) DetectTexts ¶
func (c *Client) DetectTexts(ctx context.Context, img *Image, maxResults int) ([]*EntityAnnotation, error)
DetectTexts performs text detection on the image. At most maxResults results are returned.
func (*Client) DetectWeb ¶
func (c *Client) DetectWeb(ctx context.Context, img *Image) (*WebDetection, error)
DetectWeb computes a web annotation on the image.
type ColorInfo ¶
ColorInfo consists of RGB channels, score and fraction of image the color occupies in the image.
type ColorInfo struct {
// RGB components of the color.
Color color.NRGBA64
// Score is the image-specific score for this color, in the range [0, 1].
Score float32
// PixelFraction is the fraction of pixels the color occupies in the image,
// in the range [0, 1].
PixelFraction float32
}
type CropHint ¶
CropHint is a single crop hint that is used to generate a new crop when serving an image.
type CropHint struct {
// The bounding polygon for the crop region. The coordinates of the bounding
// box are in the original image's scale, as returned in `ImageParams`.
BoundingPoly []image.Point
// Confidence of this being a salient region. Range [0, 1].
Confidence float32
// Fraction of importance of this salient region with respect to the original
// image.
ImportanceFraction float32
}
type CropHintsParams ¶
CropHintsParams are parameters for a request for crop hints.
type CropHintsParams struct {
// Aspect ratios for desired crop hints, representing the ratio of the
// width to the height of the image. For example, if the desired aspect
// ratio is 4:3, the corresponding float value should be 1.33333. If not
// specified, the best possible crop is returned. The number of provided
// aspect ratios is limited to a maximum of 16; any aspect ratios provided
// after the 16th are ignored.
AspectRatios []float32
}
type DetectedBreak ¶
DetectedBreak is the detected start or end of a structural component.
type DetectedBreak struct {
// The type of break.
Type DetectedBreakType
// True if break prepends the element.
IsPrefix bool
}
type DetectedBreakType ¶
type DetectedBreakType int
type DetectedLanguage ¶
Detected language for a structural component.
type DetectedLanguage struct {
// The BCP-47 language code, such as "en-US" or "sr-Latn".
Code language.Tag
// The confidence of the detected language, in the range [0, 1].
Confidence float32
}
type Ears ¶
type Ears struct {
Left, Right *r3.Vector
}
type EntityAnnotation ¶
An EntityAnnotation describes the results of a landmark, label, logo or text detection on an image.
type EntityAnnotation struct {
// ID is an opaque entity ID. Some IDs might be available in Knowledge Graph(KG).
// For more details on KG please see:
// https://developers.google.com/knowledge-graph/
ID string
// Locale is the language code for the locale in which the entity textual
// description (next field) is expressed.
Locale string
// Description is the entity textual description, expressed in the language of Locale.
Description string
// Score is the overall score of the result. Range [0, 1].
Score float32
// Confidence is the accuracy of the entity detection in an image.
// For example, for an image containing the Eiffel Tower, this field represents
// the confidence that there is a tower in the query image. Range [0, 1].
Confidence float32
// Topicality is the relevancy of the ICA (Image Content Annotation) label to the
// image. For example, the relevancy of 'tower' to an image containing
// 'Eiffel Tower' is likely higher than an image containing a distant towering
// building, though the confidence that there is a tower may be the same.
// Range [0, 1].
Topicality float32
// BoundingPoly is the image region to which this entity belongs. Not filled currently
// for label detection. For text detection, BoundingPolys
// are produced for the entire text detected in an image region, followed by
// BoundingPolys for each word within the detected text.
BoundingPoly []image.Point
// Locations contains the location information for the detected entity.
// Multiple LatLng structs can be present since one location may indicate the
// location of the scene in the query image, and another the location of the
// place where the query image was taken. Location information is usually
// present for landmarks.
Locations []LatLng
// Properties are additional optional Property fields.
// For example a different kind of score or string that qualifies the entity.
Properties []Property
}
type Eye ¶
type Eye struct {
Left, Right, Top, Bottom, Center, Pupil *r3.Vector
}
type Eyebrow ¶
type Eyebrow struct {
Top, Left, Right *r3.Vector
}
type Eyebrows ¶
type Eyebrows struct {
Left, Right Eyebrow
}
type Eyes ¶
type Eyes struct {
Left, Right Eye
}
type FaceAnnotation ¶
A FaceAnnotation describes the results of face detection on an image.
type FaceAnnotation struct {
// BoundingPoly is the bounding polygon around the face. The coordinates of
// the bounding box are in the original image's scale, as returned in
// ImageParams. The bounding box is computed to "frame" the face in
// accordance with human expectations. It is based on the landmarker
// results. Note that one or more x and/or y coordinates may not be
// generated in the BoundingPoly (the polygon will be unbounded) if only a
// partial face appears in the image to be annotated.
BoundingPoly []image.Point
// FDBoundingPoly is tighter than BoundingPoly, and
// encloses only the skin part of the face. Typically, it is used to
// eliminate the face from any image analysis that detects the "amount of
// skin" visible in an image. It is not based on the landmarker results, only
// on the initial face detection, hence the fd (face detection) prefix.
FDBoundingPoly []image.Point
// Landmarks are detected face landmarks.
Face FaceLandmarks
// RollAngle indicates the amount of clockwise/anti-clockwise rotation of
// the face relative to the image vertical, about the axis perpendicular to
// the face. Range [-180,180].
RollAngle float32
// PanAngle is the yaw angle: the leftward/rightward angle that the face is
// pointing, relative to the vertical plane perpendicular to the image. Range
// [-180,180].
PanAngle float32
// TiltAngle is the pitch angle: the upwards/downwards angle that the face is
// pointing relative to the image's horizontal plane. Range [-180,180].
TiltAngle float32
// DetectionConfidence is the detection confidence. The range is [0, 1].
DetectionConfidence float32
// LandmarkingConfidence is the face landmarking confidence. The range is [0, 1].
LandmarkingConfidence float32
// Likelihoods expresses the likelihood of various aspects of the face.
Likelihoods *FaceLikelihoods
}
type FaceLandmarks ¶
FaceLandmarks contains the positions of facial features detected by the service. TODO(jba): write doc for all
type FaceLandmarks struct {
Eyebrows Eyebrows
Eyes Eyes
Ears Ears
Nose Nose
Mouth Mouth
Chin Chin
Forehead *r3.Vector
}
type FaceLikelihoods ¶
FaceLikelihoods expresses the likelihood of various aspects of a face.
type FaceLikelihoods struct {
// Joy is the likelihood that the face expresses joy.
Joy Likelihood
// Sorrow is the likelihood that the face expresses sorrow.
Sorrow Likelihood
// Anger is the likelihood that the face expresses anger.
Anger Likelihood
// Surprise is the likelihood that the face expresses surprise.
Surprise Likelihood
// UnderExposed is the likelihood that the face is under-exposed.
UnderExposed Likelihood
// Blurred is the likelihood that the face is blurred.
Blurred Likelihood
// Headwear is the likelihood that the face has headwear.
Headwear Likelihood
}
type Image ¶
An Image represents the contents of an image to run detection algorithms on, along with metadata. Images may be described by their raw bytes, or by a reference to a a Google Cloude Storage (GCS) object.
type Image struct {
// Rect is a rectangle on the Earth's surface represented by the
// image. It is optional.
Rect *LatLngRect
// LanguageHints is a list of languages to use for text detection. In most
// cases, leaving this field nil yields the best results since it enables
// automatic language detection. For languages based on the Latin alphabet,
// setting LanguageHints is not needed. In rare cases, when the language of
// the text in the image is known, setting a hint will help get better
// results (although it will be a significant hindrance if the hint is
// wrong). Text detection returns an error if one or more of the specified
// languages is not one of the supported languages (See
// https://cloud.google.com/translate/v2/translate-reference#supported_languages).
LanguageHints []string
// contains filtered or unexported fields
}
func NewImageFromReader ¶
func NewImageFromReader(r io.ReadCloser) (*Image, error)
NewImageFromReader reads the bytes of an image from rc, then closes rc.
You may optionally set Rect and LanguageHints on the returned Image before using it.
func NewImageFromURI ¶
func NewImageFromURI(uri string) *Image
NewImageFromURI returns an image that refers to an object in Google Cloud Storage (when the uri is of the form "gs://BUCKET/OBJECT") or at a public URL.
You may optionally set Rect and LanguageHints on the returned Image before using it.
type ImageProps ¶
ImageProps describes properties of the image itself, like the dominant colors.
type ImageProps struct {
// DominantColors describes the dominant colors of the image.
DominantColors []*ColorInfo
}
type LatLng ¶
A LatLng is a point on the Earth's surface, represented with a latitude and longitude.
type LatLng struct {
// Lat is the latitude in degrees. It must be in the range [-90.0, +90.0].
Lat float64
// Lng is the longitude in degrees. It must be in the range [-180.0, +180.0].
Lng float64
}
type LatLngRect ¶
A LatLngRect is a rectangular area on the Earth's surface, represented by a minimum and maximum latitude and longitude.
type LatLngRect struct {
Min, Max LatLng
}
type Likelihood ¶
A Likelihood is an approximate representation of a probability.
type Likelihood int
type Mouth ¶
type Mouth struct {
Left, Center, Right, UpperLip, LowerLip *r3.Vector
}
type Nose ¶
type Nose struct {
Left, Right, Top, Bottom, Tip *r3.Vector
}
type Page ¶
A Page is a page of text detected from OCR.
type Page struct {
// Additional information detected on the page.
Properties *TextProperties
// Page width in pixels.
Width int32
// Page height in pixels.
Height int32
// List of blocks of text, images etc on this page.
Blocks []*Block
}
type Paragraph ¶
A Paragraph is a structural unit of text representing a number of words in certain order.
type Paragraph struct {
// Additional information detected for the paragraph.
Properties *TextProperties
// The bounding box for the paragraph.
// The vertices are in the order of top-left, top-right, bottom-right,
// bottom-left. When a rotation of the bounding box is detected the rotation
// is represented as around the top-left corner as defined when the text is
// read in the 'natural' orientation.
// For example:
// * when the text is horizontal it might look like:
// 0----1
// | |
// 3----2
// * when it's rotated 180 degrees around the top-left corner it becomes:
// 2----3
// | |
// 1----0
// and the vertice order will still be (0, 1, 2, 3).
BoundingBox []image.Point
// List of words in this paragraph.
Words []*Word
}
type Property ¶
A Property is an arbitrary name-value pair.
type Property struct {
Name string
Value string
}
type SafeSearchAnnotation ¶
SafeSearchAnnotation describes the results of a SafeSearch detection on an image.
type SafeSearchAnnotation struct {
// Adult is the likelihood that the image contains adult content.
Adult Likelihood
// Spoof is the likelihood that an obvious modification was made to the
// image's canonical version to make it appear funny or offensive.
Spoof Likelihood
// Medical is the likelihood that this is a medical image.
Medical Likelihood
// Violence is the likelihood that this image represents violence.
Violence Likelihood
}
type Symbol ¶
A Symbol is a symbol in a text document.
type Symbol struct {
// Additional information detected for the symbol.
Properties *TextProperties
// The bounding box for the symbol.
// The vertices are in the order of top-left, top-right, bottom-right,
// bottom-left. When a rotation of the bounding box is detected the rotation
// is represented as around the top-left corner as defined when the text is
// read in the 'natural' orientation.
// For example:
// * when the text is horizontal it might look like:
// 0----1
// | |
// 3----2
// * when it's rotated 180 degrees around the top-left corner it becomes:
// 2----3
// | |
// 1----0
// and the vertice order will still be (0, 1, 2, 3).
BoundingBox []image.Point
// The actual UTF-8 representation of the symbol.
Text string
}
type TextAnnotation ¶
TextAnnotation contains a structured representation of OCR extracted text. The hierarchy of an OCR extracted text structure looks like:
TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol
Each structural component, starting from Page, may further have its own properties. Properties describe detected languages, breaks etc.
type TextAnnotation struct {
// List of pages detected by OCR.
Pages []*Page
// UTF-8 text detected on the pages.
Text string
}
type TextProperties ¶
TextProperties contains additional information about an OCR structural component.
type TextProperties struct {
// A list of detected languages together with confidence.
DetectedLanguages []*DetectedLanguage
// Detected start or end of a text segment.
DetectedBreak *DetectedBreak
}
type WebDetection ¶
WebDetection contains relevant information for the image from the Internet.
type WebDetection struct {
// Deduced entities from similar images on the Internet.
WebEntities []*WebEntity
// Fully matching images from the Internet.
// They're definite neardups and most often a copy of the query image with
// merely a size change.
FullMatchingImages []*WebImage
// Partial matching images from the Internet.
// Those images are similar enough to share some key-point features. For
// example an original image will likely have partial matching for its crops.
PartialMatchingImages []*WebImage
// Web pages containing the matching images from the Internet.
PagesWithMatchingImages []*WebPage
}
type WebEntity ¶
A WebEntity is an entity deduced from similar images on the Internet.
type WebEntity struct {
// Opaque entity ID.
ID string
// Overall relevancy score for the entity.
// Not normalized and not comparable across different image queries.
Score float32
// Canonical description of the entity, in English.
Description string
}
type WebImage ¶
WebImage contains metadata for online images.
type WebImage struct {
// The result image URL.
URL string
// Overall relevancy score for the image.
// Not normalized and not comparable across different image queries.
Score float32
}
type WebPage ¶
A WebPage contains metadata for web pages.
type WebPage struct {
// The result web page URL.
URL string
// Overall relevancy score for the web page.
// Not normalized and not comparable across different image queries.
Score float32
}
type Word ¶
A Word is a word in a text document.
type Word struct {
// Additional information detected for the word.
Properties *TextProperties
// The bounding box for the word.
// The vertices are in the order of top-left, top-right, bottom-right,
// bottom-left. When a rotation of the bounding box is detected the rotation
// is represented as around the top-left corner as defined when the text is
// read in the 'natural' orientation.
// For example:
// * when the text is horizontal it might look like:
// 0----1
// | |
// 3----2
// * when it's rotated 180 degrees around the top-left corner it becomes:
// 2----3
// | |
// 1----0
// and the vertice order will still be (0, 1, 2, 3).
BoundingBox []image.Point
// List of symbols in the word.
// The order of the symbols follows the natural reading order.
Symbols []*Symbol
}
ActiveGo 1.8