Package vision
Overview ▹
Index ▹
Constants
const ( // Unknown break label type. UnknownBreak = DetectedBreakType(pb.TextAnnotation_DetectedBreak_UNKNOWN) // Regular space. SpaceBreak = DetectedBreakType(pb.TextAnnotation_DetectedBreak_SPACE) // Sure space (very wide). SureSpaceBreak = DetectedBreakType(pb.TextAnnotation_DetectedBreak_SURE_SPACE) // Line-wrapping break. EOLSureSpaceBreak = DetectedBreakType(pb.TextAnnotation_DetectedBreak_EOL_SURE_SPACE) // End-line hyphen that is not present in text; does not co-occur with SPACE, LEADER_SPACE, or LINE_BREAK. HyphenBreak = DetectedBreakType(pb.TextAnnotation_DetectedBreak_HYPHEN) // Line break that ends a paragraph. LineBreak = DetectedBreakType(pb.TextAnnotation_DetectedBreak_LINE_BREAK) )
const ( // LikelihoodUnknown means the likelihood is unknown. LikelihoodUnknown = Likelihood(pb.Likelihood_UNKNOWN) // VeryUnlikely means the image is very unlikely to belong to the feature specified. VeryUnlikely = Likelihood(pb.Likelihood_VERY_UNLIKELY) // Unlikely means the image is unlikely to belong to the feature specified. Unlikely = Likelihood(pb.Likelihood_UNLIKELY) // Possible means the image possibly belongs to the feature specified. Possible = Likelihood(pb.Likelihood_POSSIBLE) // Likely means the image is likely to belong to the feature specified. Likely = Likelihood(pb.Likelihood_LIKELY) // VeryLikely means the image is very likely to belong to the feature specified. VeryLikely = Likelihood(pb.Likelihood_VERY_LIKELY) )
Scope is the OAuth2 scope required by the Google Cloud Vision API.
const Scope = "https://www.googleapis.com/auth/cloud-platform"
type AnnotateRequest ¶
An AnnotateRequest specifies an image to annotate and the features to look for in that image.
type AnnotateRequest struct { // Image is the image to annotate. Image *Image // MaxFaces is the maximum number of faces to detect in the image. // Specifying a number greater than zero enables face detection. MaxFaces int // MaxLandmarks is the maximum number of landmarks to detect in the image. // Specifying a number greater than zero enables landmark detection. MaxLandmarks int // MaxLogos is the maximum number of logos to detect in the image. // Specifying a number greater than zero enables logo detection. MaxLogos int // MaxLabels is the maximum number of labels to detect in the image. // Specifying a number greater than zero enables labels detection. MaxLabels int // MaxTexts is the maximum number of separate pieces of text to detect in the // image. Specifying a number greater than zero enables text detection. MaxTexts int // DocumentText specifies whether a dense text document OCR should be run // on the image. When true, takes precedence over MaxTexts. DocumentText bool // SafeSearch specifies whether a safe-search detection should be run on the image. SafeSearch bool // ImageProps specifies whether image properties should be obtained for the image. ImageProps bool // Web specifies whether web annotations should be obtained for the image. Web bool // CropHints specifies whether crop hints should be computed for the image. CropHints *CropHintsParams }
type Annotations ¶
Annotations contains all the annotations performed by the API on a single image. A nil field indicates either that the corresponding feature was not requested, or that annotation failed for that feature.
type Annotations struct { // Faces holds the results of face detection. Faces []*FaceAnnotation // Landmarks holds the results of landmark detection. Landmarks []*EntityAnnotation // Logos holds the results of logo detection. Logos []*EntityAnnotation // Labels holds the results of label detection. Labels []*EntityAnnotation // Texts holds the results of text detection. Texts []*EntityAnnotation // FullText holds the results of full text (OCR) detection. FullText *TextAnnotation // SafeSearch holds the results of safe-search detection. SafeSearch *SafeSearchAnnotation // ImageProps contains properties of the annotated image. ImageProps *ImageProps // Web contains web annotations for the image. Web *WebDetection // CropHints contains crop hints for the image. CropHints []*CropHint // If non-nil, then one or more of the attempted annotations failed. // Non-nil annotations are guaranteed to be correct, even if Error is // non-nil. Error error }
type Block ¶
A Block is a logical element on the page.
type Block struct { // Additional information detected for the block. Properties *TextProperties // The bounding box for the block. // The vertices are in the order of top-left, top-right, bottom-right, // bottom-left. When a rotation of the bounding box is detected the rotation // is represented as around the top-left corner as defined when the text is // read in the 'natural' orientation. // For example: // * when the text is horizontal it might look like: // 0----1 // | | // 3----2 // * when it's rotated 180 degrees around the top-left corner it becomes: // 2----3 // | | // 1----0 // and the vertice order will still be (0, 1, 2, 3). BoundingBox []image.Point // List of paragraphs in this block (if this blocks is of type text). Paragraphs []*Paragraph // Detected block type (text, image etc) for this block. BlockType BlockType }
type BlockType ¶
A BlockType represents the kind of Block (text, image, etc.)
type BlockType int
const ( // Unknown block type. UnknownBlock BlockType = BlockType(pb.Block_UNKNOWN) // Regular text block. TextBlock BlockType = BlockType(pb.Block_TEXT) // Table block. TableBlock BlockType = BlockType(pb.Block_TABLE) // Image block. PictureBlock BlockType = BlockType(pb.Block_PICTURE) // Horizontal/vertical line box. RulerBlock BlockType = BlockType(pb.Block_RULER) // Barcode block. BarcodeBlock BlockType = BlockType(pb.Block_BARCODE) )
type Chin ¶
type Chin struct { Left, Center, Right *r3.Vector }
type Client ¶
Client is a Google Cloud Vision API client.
type Client struct {
// contains filtered or unexported fields
}
func NewClient ¶
func NewClient(ctx context.Context, opts ...option.ClientOption) (*Client, error)
NewClient creates a new vision client.
▹ Example
func (*Client) Annotate ¶
func (c *Client) Annotate(ctx context.Context, requests ...*AnnotateRequest) ([]*Annotations, error)
Annotate annotates multiple images, each with a potentially different set of features.
▹ Example (OneImage)
func (*Client) Close ¶
func (c *Client) Close() error
Close closes the client.
func (*Client) CropHints ¶
func (c *Client) CropHints(ctx context.Context, img *Image, params *CropHintsParams) ([]*CropHint, error)
CropHints computes crop hints for the image.
func (*Client) DetectDocumentText ¶
func (c *Client) DetectDocumentText(ctx context.Context, img *Image) (*TextAnnotation, error)
DetectDocumentText performs full text (OCR) detection on the image.
func (*Client) DetectFaces ¶
func (c *Client) DetectFaces(ctx context.Context, img *Image, maxResults int) ([]*FaceAnnotation, error)
DetectFaces performs face detection on the image. At most maxResults results are returned.
▹ Example
func (*Client) DetectImageProps ¶
func (c *Client) DetectImageProps(ctx context.Context, img *Image) (*ImageProps, error)
DetectImageProps computes properties of the image.
func (*Client) DetectLabels ¶
func (c *Client) DetectLabels(ctx context.Context, img *Image, maxResults int) ([]*EntityAnnotation, error)
DetectLabels performs label detection on the image. At most maxResults results are returned.
func (*Client) DetectLandmarks ¶
func (c *Client) DetectLandmarks(ctx context.Context, img *Image, maxResults int) ([]*EntityAnnotation, error)
DetectLandmarks performs landmark detection on the image. At most maxResults results are returned.
func (*Client) DetectLogos ¶
func (c *Client) DetectLogos(ctx context.Context, img *Image, maxResults int) ([]*EntityAnnotation, error)
DetectLogos performs logo detection on the image. At most maxResults results are returned.
func (*Client) DetectSafeSearch ¶
func (c *Client) DetectSafeSearch(ctx context.Context, img *Image) (*SafeSearchAnnotation, error)
DetectSafeSearch performs safe-search detection on the image.
func (*Client) DetectTexts ¶
func (c *Client) DetectTexts(ctx context.Context, img *Image, maxResults int) ([]*EntityAnnotation, error)
DetectTexts performs text detection on the image. At most maxResults results are returned.
func (*Client) DetectWeb ¶
func (c *Client) DetectWeb(ctx context.Context, img *Image) (*WebDetection, error)
DetectWeb computes a web annotation on the image.
type ColorInfo ¶
ColorInfo consists of RGB channels, score and fraction of image the color occupies in the image.
type ColorInfo struct { // RGB components of the color. Color color.NRGBA64 // Score is the image-specific score for this color, in the range [0, 1]. Score float32 // PixelFraction is the fraction of pixels the color occupies in the image, // in the range [0, 1]. PixelFraction float32 }
type CropHint ¶
CropHint is a single crop hint that is used to generate a new crop when serving an image.
type CropHint struct { // The bounding polygon for the crop region. The coordinates of the bounding // box are in the original image's scale, as returned in `ImageParams`. BoundingPoly []image.Point // Confidence of this being a salient region. Range [0, 1]. Confidence float32 // Fraction of importance of this salient region with respect to the original // image. ImportanceFraction float32 }
type CropHintsParams ¶
CropHintsParams are parameters for a request for crop hints.
type CropHintsParams struct { // Aspect ratios for desired crop hints, representing the ratio of the // width to the height of the image. For example, if the desired aspect // ratio is 4:3, the corresponding float value should be 1.33333. If not // specified, the best possible crop is returned. The number of provided // aspect ratios is limited to a maximum of 16; any aspect ratios provided // after the 16th are ignored. AspectRatios []float32 }
type DetectedBreak ¶
DetectedBreak is the detected start or end of a structural component.
type DetectedBreak struct { // The type of break. Type DetectedBreakType // True if break prepends the element. IsPrefix bool }
type DetectedBreakType ¶
type DetectedBreakType int
type DetectedLanguage ¶
Detected language for a structural component.
type DetectedLanguage struct { // The BCP-47 language code, such as "en-US" or "sr-Latn". Code language.Tag // The confidence of the detected language, in the range [0, 1]. Confidence float32 }
type Ears ¶
type Ears struct { Left, Right *r3.Vector }
type EntityAnnotation ¶
An EntityAnnotation describes the results of a landmark, label, logo or text detection on an image.
type EntityAnnotation struct { // ID is an opaque entity ID. Some IDs might be available in Knowledge Graph(KG). // For more details on KG please see: // https://developers.google.com/knowledge-graph/ ID string // Locale is the language code for the locale in which the entity textual // description (next field) is expressed. Locale string // Description is the entity textual description, expressed in the language of Locale. Description string // Score is the overall score of the result. Range [0, 1]. Score float32 // Confidence is the accuracy of the entity detection in an image. // For example, for an image containing the Eiffel Tower, this field represents // the confidence that there is a tower in the query image. Range [0, 1]. Confidence float32 // Topicality is the relevancy of the ICA (Image Content Annotation) label to the // image. For example, the relevancy of 'tower' to an image containing // 'Eiffel Tower' is likely higher than an image containing a distant towering // building, though the confidence that there is a tower may be the same. // Range [0, 1]. Topicality float32 // BoundingPoly is the image region to which this entity belongs. Not filled currently // for label detection. For text detection, BoundingPolys // are produced for the entire text detected in an image region, followed by // BoundingPolys for each word within the detected text. BoundingPoly []image.Point // Locations contains the location information for the detected entity. // Multiple LatLng structs can be present since one location may indicate the // location of the scene in the query image, and another the location of the // place where the query image was taken. Location information is usually // present for landmarks. Locations []LatLng // Properties are additional optional Property fields. // For example a different kind of score or string that qualifies the entity. Properties []Property }
type Eye ¶
type Eye struct { Left, Right, Top, Bottom, Center, Pupil *r3.Vector }
type Eyebrow ¶
type Eyebrow struct { Top, Left, Right *r3.Vector }
type Eyebrows ¶
type Eyebrows struct { Left, Right Eyebrow }
type Eyes ¶
type Eyes struct { Left, Right Eye }
type FaceAnnotation ¶
A FaceAnnotation describes the results of face detection on an image.
type FaceAnnotation struct { // BoundingPoly is the bounding polygon around the face. The coordinates of // the bounding box are in the original image's scale, as returned in // ImageParams. The bounding box is computed to "frame" the face in // accordance with human expectations. It is based on the landmarker // results. Note that one or more x and/or y coordinates may not be // generated in the BoundingPoly (the polygon will be unbounded) if only a // partial face appears in the image to be annotated. BoundingPoly []image.Point // FDBoundingPoly is tighter than BoundingPoly, and // encloses only the skin part of the face. Typically, it is used to // eliminate the face from any image analysis that detects the "amount of // skin" visible in an image. It is not based on the landmarker results, only // on the initial face detection, hence the fd (face detection) prefix. FDBoundingPoly []image.Point // Landmarks are detected face landmarks. Face FaceLandmarks // RollAngle indicates the amount of clockwise/anti-clockwise rotation of // the face relative to the image vertical, about the axis perpendicular to // the face. Range [-180,180]. RollAngle float32 // PanAngle is the yaw angle: the leftward/rightward angle that the face is // pointing, relative to the vertical plane perpendicular to the image. Range // [-180,180]. PanAngle float32 // TiltAngle is the pitch angle: the upwards/downwards angle that the face is // pointing relative to the image's horizontal plane. Range [-180,180]. TiltAngle float32 // DetectionConfidence is the detection confidence. The range is [0, 1]. DetectionConfidence float32 // LandmarkingConfidence is the face landmarking confidence. The range is [0, 1]. LandmarkingConfidence float32 // Likelihoods expresses the likelihood of various aspects of the face. Likelihoods *FaceLikelihoods }
type FaceLandmarks ¶
FaceLandmarks contains the positions of facial features detected by the service. TODO(jba): write doc for all
type FaceLandmarks struct { Eyebrows Eyebrows Eyes Eyes Ears Ears Nose Nose Mouth Mouth Chin Chin Forehead *r3.Vector }
type FaceLikelihoods ¶
FaceLikelihoods expresses the likelihood of various aspects of a face.
type FaceLikelihoods struct { // Joy is the likelihood that the face expresses joy. Joy Likelihood // Sorrow is the likelihood that the face expresses sorrow. Sorrow Likelihood // Anger is the likelihood that the face expresses anger. Anger Likelihood // Surprise is the likelihood that the face expresses surprise. Surprise Likelihood // UnderExposed is the likelihood that the face is under-exposed. UnderExposed Likelihood // Blurred is the likelihood that the face is blurred. Blurred Likelihood // Headwear is the likelihood that the face has headwear. Headwear Likelihood }
type Image ¶
An Image represents the contents of an image to run detection algorithms on, along with metadata. Images may be described by their raw bytes, or by a reference to a a Google Cloude Storage (GCS) object.
type Image struct { // Rect is a rectangle on the Earth's surface represented by the // image. It is optional. Rect *LatLngRect // LanguageHints is a list of languages to use for text detection. In most // cases, leaving this field nil yields the best results since it enables // automatic language detection. For languages based on the Latin alphabet, // setting LanguageHints is not needed. In rare cases, when the language of // the text in the image is known, setting a hint will help get better // results (although it will be a significant hindrance if the hint is // wrong). Text detection returns an error if one or more of the specified // languages is not one of the supported languages (See // https://cloud.google.com/translate/v2/translate-reference#supported_languages). LanguageHints []string // contains filtered or unexported fields }
func NewImageFromReader ¶
func NewImageFromReader(r io.ReadCloser) (*Image, error)
NewImageFromReader reads the bytes of an image from rc, then closes rc.
You may optionally set Rect and LanguageHints on the returned Image before using it.
func NewImageFromURI ¶
func NewImageFromURI(uri string) *Image
NewImageFromURI returns an image that refers to an object in Google Cloud Storage (when the uri is of the form "gs://BUCKET/OBJECT") or at a public URL.
You may optionally set Rect and LanguageHints on the returned Image before using it.
type ImageProps ¶
ImageProps describes properties of the image itself, like the dominant colors.
type ImageProps struct {
// DominantColors describes the dominant colors of the image.
DominantColors []*ColorInfo
}
type LatLng ¶
A LatLng is a point on the Earth's surface, represented with a latitude and longitude.
type LatLng struct { // Lat is the latitude in degrees. It must be in the range [-90.0, +90.0]. Lat float64 // Lng is the longitude in degrees. It must be in the range [-180.0, +180.0]. Lng float64 }
type LatLngRect ¶
A LatLngRect is a rectangular area on the Earth's surface, represented by a minimum and maximum latitude and longitude.
type LatLngRect struct { Min, Max LatLng }
type Likelihood ¶
A Likelihood is an approximate representation of a probability.
type Likelihood int
type Mouth ¶
type Mouth struct { Left, Center, Right, UpperLip, LowerLip *r3.Vector }
type Nose ¶
type Nose struct { Left, Right, Top, Bottom, Tip *r3.Vector }
type Page ¶
A Page is a page of text detected from OCR.
type Page struct { // Additional information detected on the page. Properties *TextProperties // Page width in pixels. Width int32 // Page height in pixels. Height int32 // List of blocks of text, images etc on this page. Blocks []*Block }
type Paragraph ¶
A Paragraph is a structural unit of text representing a number of words in certain order.
type Paragraph struct { // Additional information detected for the paragraph. Properties *TextProperties // The bounding box for the paragraph. // The vertices are in the order of top-left, top-right, bottom-right, // bottom-left. When a rotation of the bounding box is detected the rotation // is represented as around the top-left corner as defined when the text is // read in the 'natural' orientation. // For example: // * when the text is horizontal it might look like: // 0----1 // | | // 3----2 // * when it's rotated 180 degrees around the top-left corner it becomes: // 2----3 // | | // 1----0 // and the vertice order will still be (0, 1, 2, 3). BoundingBox []image.Point // List of words in this paragraph. Words []*Word }
type Property ¶
A Property is an arbitrary name-value pair.
type Property struct { Name string Value string }
type SafeSearchAnnotation ¶
SafeSearchAnnotation describes the results of a SafeSearch detection on an image.
type SafeSearchAnnotation struct { // Adult is the likelihood that the image contains adult content. Adult Likelihood // Spoof is the likelihood that an obvious modification was made to the // image's canonical version to make it appear funny or offensive. Spoof Likelihood // Medical is the likelihood that this is a medical image. Medical Likelihood // Violence is the likelihood that this image represents violence. Violence Likelihood }
type Symbol ¶
A Symbol is a symbol in a text document.
type Symbol struct { // Additional information detected for the symbol. Properties *TextProperties // The bounding box for the symbol. // The vertices are in the order of top-left, top-right, bottom-right, // bottom-left. When a rotation of the bounding box is detected the rotation // is represented as around the top-left corner as defined when the text is // read in the 'natural' orientation. // For example: // * when the text is horizontal it might look like: // 0----1 // | | // 3----2 // * when it's rotated 180 degrees around the top-left corner it becomes: // 2----3 // | | // 1----0 // and the vertice order will still be (0, 1, 2, 3). BoundingBox []image.Point // The actual UTF-8 representation of the symbol. Text string }
type TextAnnotation ¶
TextAnnotation contains a structured representation of OCR extracted text. The hierarchy of an OCR extracted text structure looks like:
TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol
Each structural component, starting from Page, may further have its own properties. Properties describe detected languages, breaks etc.
type TextAnnotation struct { // List of pages detected by OCR. Pages []*Page // UTF-8 text detected on the pages. Text string }
type TextProperties ¶
TextProperties contains additional information about an OCR structural component.
type TextProperties struct { // A list of detected languages together with confidence. DetectedLanguages []*DetectedLanguage // Detected start or end of a text segment. DetectedBreak *DetectedBreak }
type WebDetection ¶
WebDetection contains relevant information for the image from the Internet.
type WebDetection struct { // Deduced entities from similar images on the Internet. WebEntities []*WebEntity // Fully matching images from the Internet. // They're definite neardups and most often a copy of the query image with // merely a size change. FullMatchingImages []*WebImage // Partial matching images from the Internet. // Those images are similar enough to share some key-point features. For // example an original image will likely have partial matching for its crops. PartialMatchingImages []*WebImage // Web pages containing the matching images from the Internet. PagesWithMatchingImages []*WebPage }
type WebEntity ¶
A WebEntity is an entity deduced from similar images on the Internet.
type WebEntity struct { // Opaque entity ID. ID string // Overall relevancy score for the entity. // Not normalized and not comparable across different image queries. Score float32 // Canonical description of the entity, in English. Description string }
type WebImage ¶
WebImage contains metadata for online images.
type WebImage struct { // The result image URL. URL string // Overall relevancy score for the image. // Not normalized and not comparable across different image queries. Score float32 }
type WebPage ¶
A WebPage contains metadata for web pages.
type WebPage struct { // The result web page URL. URL string // Overall relevancy score for the web page. // Not normalized and not comparable across different image queries. Score float32 }
type Word ¶
A Word is a word in a text document.
type Word struct { // Additional information detected for the word. Properties *TextProperties // The bounding box for the word. // The vertices are in the order of top-left, top-right, bottom-right, // bottom-left. When a rotation of the bounding box is detected the rotation // is represented as around the top-left corner as defined when the text is // read in the 'natural' orientation. // For example: // * when the text is horizontal it might look like: // 0----1 // | | // 3----2 // * when it's rotated 180 degrees around the top-left corner it becomes: // 2----3 // | | // 1----0 // and the vertice order will still be (0, 1, 2, 3). BoundingBox []image.Point // List of symbols in the word. // The order of the symbols follows the natural reading order. Symbols []*Symbol }