+92 332 4229 857 99ProjectIdeas@Gmail.com

Extracting Text From Image (C#.net)



  
Extracting Text From Image
The following program shows you how to extract text from image. For this you'll have add reference (under COM tab) of "Microsoft Office Document Imaging Library". If you wouldn't find this reference then install microsoft office version 2007 and go to customize and choose Office tools and check the "Microsoft Office Document Imaging" and install it. Now after doing this go to (add reference and under COM tab) add its reference and use it.


Code
 
string extractedText = string.Empty;
string getFileName;
    
//Browsing an image
 private void btnBrowse_Click(object sender, EventArgs e)
 {
           
    if(openFileDialog1.ShowDialog() == DialogResult.OK)
    
     {
    
      getFileName = openFileDialog1.FileName;
     
      Image targetImage = Image.FromFile(getFileName);
      
      targetImage = fitInPBox(targetImage);
     
      pBox.Image = targetImage;
       
    }
 }

 //This function fit the browsed image in picture box
 private Image fitInPBox(Image img)
 {     
   Bitmap image = new Bitmap(img, new Size(pBox.Size.Width,pBox.Size.Height));
       
   return (Image)image;
 }

    
//Extract button working, text is extracted from image
 private void btnExtract_Click(object sender, EventArgs e)
 {

   MODI.Document doc = new MODI.Document();
   doc.Create(getFileName);
   doc.OCR(MODI.MiLANGUAGES.miLANG_ENGLISH, true, true);
   MODI.Image img = (MODI.Image)doc.Images[0];
   MODI.Layout layout = img.Layout;
  
    for (int i = 0; i < layout.Words.Count; i++)
      {
           MODI.Word word = (MODI.Word)layout.Words[i];

                if (extractedText.Length > 0)
                {
                   extractedText += " ";
                }

                extractedText += word.Text;
                richTextBox1.Text = extractedText;
      }
         
  }



2 comments:

Anonymous said...

Thanks a lot....

David said...

Hello,

very nice manual, but when I have black letter and this letter have yellow background so OCR does not recognize this letter. It's any reason how this solve?
Thanks