⚠️ Warning: This is a draft ⚠️

This means it might contain formatting issues, incorrect code, conceptual problems, or other severe issues.

If you want to help to improve and eventually enable this page, please fork RosettaGit's repository and open a merge request on GitHub.

{{draft task}}[[Category:Text processing]][[Category:Networking and Web Interaction]][[Category:Sorting]][[Category:Rosetta Code related]]

Sort most popular programming languages based on the number of users on Rosetta Code. Show the languages with at least 100 users.

A way to solve the task:

Users of a language X are those referenced in the page https://rosettacode.org/wiki/Category:X_User, or preferably https://rosettacode.org/mw/index.php?title=Category:X_User&redirect=no to avoid redirections. In order to find the list of such categories, it's possible to first parse the entries of http://rosettacode.org/mw/index.php?title=Special:Categories&limit=5000. Then download and parse each language users category to count the users.

Sample output on 18 february 2019:

Language             Users
--------------------------
C                      391
Java                   276
C++                    275
Python                 262
JavaScript             238
Perl                   171
PHP                    167
SQL                    138
UNIX Shell             131
BASIC                  120
C sharp                118
Pascal                 116
Haskell                102

A Rosetta Code user usually declares using a language with the [[Template:Mylang|mylang]] template. This template is expected to appear on the User page. However, in some cases it appears in a user Talk page. It's not necessary to take this into account. For instance, among the 373 C users in the table above, 3 are actually declared in a Talk page.

TOC

Go

package main

import (
    "fmt"
    "io/ioutil"
    "net/http"
    "regexp"
    "sort"
    "strconv"
)

type Result struct {
    lang  string
    users int
}

func main() {
    const minimum = 25
    ex := `"Category:(.+?)( User)?"(\}|,"categoryinfo":\{"size":(\d+),)`
    re := regexp.MustCompile(ex)
    page := "http://rosettacode.org/mw/api.php?"
    action := "action=query"
    format := "format=json"
    fversion := "formatversion=2"
    generator := "generator=categorymembers"
    gcmTitle := "gcmtitle=Category:Language%20users"
    gcmLimit := "gcmlimit=500"
    prop := "prop=categoryinfo"
    rawContinue := "rawcontinue="
    page += fmt.Sprintf("%s&%s&%s&%s&%s&%s&%s&%s", action, format, fversion,
        generator, gcmTitle, gcmLimit, prop, rawContinue)
    resp, _ := http.Get(page)
    body, _ := ioutil.ReadAll(resp.Body)
    matches := re.FindAllStringSubmatch(string(body), -1)
    resp.Body.Close()
    var results []Result
    for _, match := range matches {
        if len(match) == 5 {
            users, _ := strconv.Atoi(match[4])
            if users >= minimum {
                result := Result{match[1], users}
                results = append(results, result)
            }
        }
    }
    sort.Slice(results, func(i, j int) bool {
        return results[j].users < results[i].users
    })

    fmt.Println("Rank  Users  Language")
    fmt.Println("----  -----  --------")
    rank := 0
    lastUsers := 0
    lastRank := 0
    for i, result := range results {
        eq := " "
        rank = i + 1
        if lastUsers == result.users {
            eq = "="
            rank = lastRank
        } else {
            lastUsers = result.users
            lastRank = rank
        }
        fmt.Printf(" %-2d%s   %3d    %s\n", rank, eq, result.users, result.lang)
    }
}

{{out}}


Rank  Users  Language
----  -----  --------
 1     397    C
 2     278    C++
 2 =   278    Java
 4     266    Python
 5     240    JavaScript
 6     171    Perl
 7     168    PHP
 8     139    SQL
 9     131    UNIX Shell
 10    121    C sharp
 11    120    BASIC
 12    118    Pascal
 13    102    Haskell
 14     93    Ruby
 15     81    Fortran
 16     70    Visual Basic
 17     65    Prolog
 18     61    Scheme
 19     59    Common Lisp
 20     58    AWK
 20=    58    Lua
 22     52    HTML
 23     46    Batch File
 24     45    Assembly
 24=    45    X86 Assembly
 26     43    Bash
 27     40    Erlang
 27=    40    MATLAB
 29     39    Lisp
 30     38    Forth
 31     36    Visual Basic .NET
 31=    36    Delphi
 33     35    APL
 33=    35    J
 35     34    Tcl
 35=    34    Brainfuck
 37     33    Objective-C
 38     32    COBOL
 38=    32    R
 40     30    Go
 40=    30    Mathematica
 42     29    Perl 6
 43     27    Clojure
 44     25    OCaml
 44=    25    AutoHotkey
 44=    25    REXX

Perl

use strict;
use warnings;
use JSON;
use URI::Escape;
use LWP::UserAgent;

my $client = LWP::UserAgent->new;
$client->agent("Rosettacode Perl task solver");
my $url = 'http://rosettacode.org/mw';
my $minimum = 100;

sub uri_query_string {
    my(%fields) = @_;
    'action=query&format=json&formatversion=2&' .
    join '&', map { $_ . '=' . uri_escape($fields{$_}) } keys %fields
}

sub mediawiki_query {
    my($site, $type, %query) = @_;
    my $url = "$site/api.php?" . uri_query_string(%query);
    my %languages = ();

    my $req = HTTP::Request->new( GET => $url );
    my $response = $client->request($req);
    $response->is_success or die "Failed to GET '$url': ", $response->status_line;
    my $data = decode_json($response->content);
    for my $row ( @{${$data}{query}{pages}} ) {
        next unless defined $$row{categoryinfo} && $$row{title} =~ /User/;
        my($title) = $$row{title} =~ /Category:(.*?) User/;
        my($count) = $$row{categoryinfo}{pages};
        $languages{$title} = $count;
    }
    %languages;
}

my %table = mediawiki_query(
    $url, 'pages',
    ( generator   => 'categorymembers',
      gcmtitle    => 'Category:Language users',
      gcmlimit    => '999',
      prop        => 'categoryinfo',
      rawcontinue => '',
    )
);

for my $k (sort { $table{$b} <=> $table{$a} } keys %table) {
    printf "%4d %s\n", $table{$k}, $k if $table{$k} > $minimum;
}

{{out}}

 397 C
 278 Java
 278 C++
 266 Python
 240 JavaScript
 171 Perl
 168 PHP
 139 SQL
 131 UNIX Shell
 121 C sharp
 120 BASIC
 118 Pascal
 102 Haskell

Perl 6

{{works with|Rakudo|2017.11}} Use the mediawiki API rather than web scraping since it is much faster and less resource intensive. Show languages with more than 25 users since that is still a pretty short list and to demonstrate how tied rankings are handled. Change the '''$minimum''' parameter to adjust what the cut-off point will be.

This is all done in a single pass; ties are not detected until a language has the same count as a previous one, so ties are marked by a '''T''' next to the count indicating that '''this''' language has the same count as the '''previous'''.

use HTTP::UserAgent;
use URI::Escape;
use JSON::Fast;

my $client = HTTP::UserAgent.new;

my $url = 'http://rosettacode.org/mw';

my $start-time = now;

say "
### ===
 Generated: { DateTime.new(time) }
### ===
";

my $lang = 1;
my $rank = 0;
my $last = 0;
my $tie = ' ';
my $minimum = 25;

.say for
    mediawiki-query(
        $url, 'pages',
        :generator<categorymembers>,
        :gcmtitle<Category:Language users>,
        :gcmlimit<350>,
        :rawcontinue(),
        :prop<categoryinfo>
    )

    .map({ %( count => .<categoryinfo><pages> || 0,
              lang  => .<title>.subst(/^'Category:' (.+) ' User'/, ->$/ {$0}) ) })

    .sort( { -.<count>, .<lang> } )

    .map( { last if .<count> < $minimum; display(.<count>, .<lang>) } );

say "
### ===
 elapsed: {(now - $start-time).round(.01)} seconds
### ===
";

sub display ($count, $which) {
    if $last != $count { $last = $count; $rank = $lang; $tie = ' ' } else { $tie = 'T' };
    sprintf "#%3d  Rank: %2d %s  with %-4s users:  %s", $lang++, $rank, $tie, $count, $which;
}

sub mediawiki-query ($site, $type, *%query) {
    my $url = "$site/api.php?" ~ uri-query-string(
        :action<query>, :format<json>, :formatversion<2>, |%query);
    my $continue = '';

    gather loop {
        my $response = $client.get("$url&$continue");
        my $data = from-json($response.content);
        take $_ for $data.<query>.{$type}.values;
        $continue = uri-query-string |($data.<query-continue>{*}».hash.hash or last);
    }
}

sub uri-query-string (*%fields) {
    join '&', %fields.map: { "{.key}={uri-escape .value}" }
}

{{out}}


### ===
 Generated: 2018-06-01T22:09:26Z
### ===

#  1  Rank:  1    with 380  users:  C
#  2  Rank:  2    with 269  users:  Java
#  3  Rank:  3    with 266  users:  C++
#  4  Rank:  4    with 251  users:  Python
#  5  Rank:  5    with 234  users:  JavaScript
#  6  Rank:  6    with 167  users:  Perl
#  7  Rank:  7    with 166  users:  PHP
#  8  Rank:  8    with 134  users:  SQL
#  9  Rank:  9    with 125  users:  UNIX Shell
# 10  Rank: 10    with 119  users:  BASIC
# 11  Rank: 11    with 116  users:  C sharp
# 12  Rank: 12    with 112  users:  Pascal
# 13  Rank: 13    with 99   users:  Haskell
# 14  Rank: 14    with 93   users:  Ruby
# 15  Rank: 15    with 74   users:  Fortran
# 16  Rank: 16    with 67   users:  Visual Basic
# 17  Rank: 17    with 62   users:  Prolog
# 18  Rank: 18    with 61   users:  Scheme
# 19  Rank: 19    with 58   users:  Common Lisp
# 20  Rank: 20    with 55   users:  Lua
# 21  Rank: 21    with 53   users:  AWK
# 22  Rank: 22    with 52   users:  HTML
# 23  Rank: 23    with 46   users:  Assembly
# 24  Rank: 24    with 44   users:  Batch File
# 25  Rank: 25    with 42   users:  Bash
# 26  Rank: 25 T  with 42   users:  X86 Assembly
# 27  Rank: 27    with 40   users:  Erlang
# 28  Rank: 28    with 38   users:  Forth
# 29  Rank: 29    with 37   users:  MATLAB
# 30  Rank: 30    with 36   users:  Lisp
# 31  Rank: 31    with 35   users:  J
# 32  Rank: 31 T  with 35   users:  Visual Basic .NET
# 33  Rank: 33    with 34   users:  Delphi
# 34  Rank: 34    with 33   users:  APL
# 35  Rank: 34 T  with 33   users:  Ada
# 36  Rank: 34 T  with 33   users:  Brainfuck
# 37  Rank: 34 T  with 33   users:  Objective-C
# 38  Rank: 34 T  with 33   users:  Tcl
# 39  Rank: 39    with 32   users:  R
# 40  Rank: 40    with 31   users:  COBOL
# 41  Rank: 41    with 30   users:  Go
# 42  Rank: 42    with 29   users:  Perl 6
# 43  Rank: 43    with 27   users:  Clojure
# 44  Rank: 43 T  with 27   users:  Mathematica
# 45  Rank: 45    with 25   users:  AutoHotkey

### ====== elapsed: 1.45 seconds ======

Phix

See [[Rosetta_Code/Rank_languages_by_popularity#Phix|Rank languages by popularity]], just set output_users to true. {{out}}


  1: 397 - C
  2: 278 - C++
  =: 278 - Java
  4: 266 - Python
  5: 240 - JavaScript
  6: 171 - Perl
  7: 168 - PHP
  8: 139 - SQL
  9: 131 - UNIX Shell
 10: 121 - C sharp
 11: 120 - BASIC
 12: 118 - Pascal
 13: 102 - Haskell
 14: 93 - Ruby
 15: 81 - Fortran
 16: 70 - Visual Basic
 17: 65 - Prolog
 18: 61 - Scheme
 19: 59 - Common Lisp
 20: 58 - AWK

Racket

Note: the implementation is very similar to [[Rosetta_Code/Rank_languages_by_popularity#Racket|Rank languages by popularity]].

#lang racket

(require racket/hash
         net/url
         json)

(define limit 64)
(define (replacer cat) (regexp-replace #rx"^Category:(.*?) User$" cat "\\1"))
(define category "Category:Language users")
(define entries "users")

(define api-url (string->url "http://rosettacode.org/mw/api.php"))
(define (make-complete-url gcmcontinue)
  (struct-copy url api-url
               [query `([format . "json"]
                        [action . "query"]
                        [generator . "categorymembers"]
                        [gcmtitle . ,category]
                        [gcmlimit . "200"]
                        [gcmcontinue . ,gcmcontinue]
                        [continue . ""]
                        [prop . "categoryinfo"])]))

(define @ hash-ref)

(define table (make-hash))

(let loop ([gcmcontinue ""])
  (define resp (read-json (get-pure-port (make-complete-url gcmcontinue))))
  (hash-union! table
               (for/hash ([(k v) (in-hash (@ (@ resp 'query) 'pages))])
                 (values (@ v 'title #f) (@ (@ v 'categoryinfo (hash)) 'size 0))))
  (cond [(@ resp 'continue #f) => (λ (c) (loop (@ c 'gcmcontinue)))]))

(for/fold ([prev #f] [rank #f] #:result (void))
          ([item (in-list (sort (hash->list table) > #:key cdr))] [i (in-range limit)])
  (match-define (cons cat size) item)
  (define this-rank (if (equal? prev size) rank (add1 i)))
  (printf "Rank: ~a ~a ~a\n"
          (~a this-rank #:align 'right #:min-width 2)
          (~a (format "(~a ~a)" size entries) #:align 'right #:min-width 14)
          (replacer cat))
  (values size this-rank))

{{out}}


Rank:  1    (402 users) C
Rank:  2    (283 users) Java
Rank:  3    (281 users) C++
Rank:  4    (270 users) Python
Rank:  5    (243 users) JavaScript
Rank:  6    (175 users) Perl
Rank:  7    (171 users) PHP
Rank:  8    (142 users) SQL
Rank:  9    (134 users) UNIX Shell
Rank: 10    (123 users) C sharp
Rank: 10    (123 users) BASIC
Rank: 12    (119 users) Pascal
Rank: 13    (105 users) Haskell
Rank: 14     (94 users) Ruby
Rank: 15     (83 users) Fortran
Rank: 16     (71 users) Visual Basic
Rank: 17     (67 users) Prolog
Rank: 18     (63 users) Scheme
Rank: 19     (61 users) Common Lisp
Rank: 20     (59 users) AWK
Rank: 20     (59 users) Lua
Rank: 22     (52 users) HTML
Rank: 23     (46 users) X86 Assembly
Rank: 23     (46 users) Batch File
Rank: 23     (46 users) Assembly
Rank: 26     (44 users) Bash
Rank: 27     (40 users) Erlang
Rank: 27     (40 users) MATLAB
Rank: 29     (39 users) Forth
Rank: 29     (39 users) Lisp
Rank: 31     (37 users) Visual Basic .NET
Rank: 32     (36 users) APL
Rank: 32     (36 users) Tcl
Rank: 32     (36 users) Delphi
Rank: 35     (35 users) J
Rank: 36     (34 users) Brainfuck
Rank: 37     (33 users) COBOL
Rank: 37     (33 users) Objective-C
Rank: 39     (32 users) Go
Rank: 39     (32 users) R
Rank: 41     (30 users) Mathematica
Rank: 42     (29 users) Perl 6
Rank: 43     (28 users) Clojure
Rank: 44     (25 users) OCaml
Rank: 44     (25 users) AutoHotkey
Rank: 44     (25 users) REXX
Rank: 47     (24 users) PostScript
Rank: 48     (23 users) Sed
Rank: 48     (23 users) Emacs Lisp
Rank: 48     (23 users) LaTeX
Rank: 51     (22 users) VBScript
Rank: 51     (22 users) CSS
Rank: 51     (22 users) MySQL
Rank: 51     (22 users) Scala
Rank: 55     (20 users) XSLT
Rank: 55     (20 users) Racket
Rank: 57     (19 users) 6502 Assembly
Rank: 58     (18 users) Z80 Assembly
Rank: 58     (18 users) Logo
Rank: 60     (17 users) Factor
Rank: 60     (17 users) Make
Rank: 60     (17 users) 8086 Assembly
Rank: 60     (17 users) F Sharp
Rank: 64     (16 users) PL/I

Stata

copy "http://rosettacode.org/mw/index.php?title=Special:Categories&limit=5000" categ.html, replace
import delimited categ.html, delim("@") enc("utf-8") clear
keep if ustrpos(v1,"/wiki/Category:") & ustrpos(v1,"_User")
gen i = ustrpos(v1,"href=")
gen j = ustrpos(v1,char(34),i+1)
gen k = ustrpos(v1,char(34),j+1)
gen s = usubstr(v1,j+7,k-j-7)
replace i = ustrpos(v1,"title=")
replace j = ustrpos(v1,">",i+1)
replace k = ustrpos(v1," User",j+1)
gen lang = usubstr(v1,j+1,k-j)
keep s lang
gen users=.

forval i=1/`c(N)' {
	local s
	preserve
	copy `"https://rosettacode.org/mw/index.php?title=`=s[`i']'&redirect=no"' `i'.html, replace
	import delimited `i'.html, delim("@") enc("utf-8") clear
	count if ustrpos(v1,"/wiki/User")
	local m `r(N)'
	restore
	replace users=`m' in `i'
	erase `i'.html
}

drop s
gsort -users lang
compress
leftalign
list in f/50
save rc_users, replace

'''Output''' (2019-02-18)

     +----------------------------+
     | lang                 users |
     |----------------------------|
  1. | C                      391 |
  2. | Java                   276 |
  3. | C++                    275 |
  4. | Python                 262 |
  5. | JavaScript             238 |
     |----------------------------|
  6. | Perl                   171 |
  7. | PHP                    167 |
  8. | SQL                    138 |
  9. | UNIX Shell             131 |
 10. | BASIC                  120 |
     |----------------------------|
 11. | C sharp                118 |
 12. | Pascal                 116 |
 13. | Haskell                102 |
 14. | Ruby                    93 |
 15. | Fortran                 79 |
     |----------------------------|
 16. | Visual Basic            68 |
 17. | Prolog                  65 |
 18. | Scheme                  61 |
 19. | Common Lisp             58 |
 20. | AWK                     57 |
     |----------------------------|
 21. | Lua                     57 |
 22. | HTML                    52 |
 23. | Assembly                45 |
 24. | Batch File              44 |
 25. | X86 Assembly            44 |
     |----------------------------|
 26. | Bash                    43 |
 27. | Erlang                  40 |
 28. | Lisp                    39 |
 29. | MATLAB                  39 |
 30. | Forth                   38 |
     |----------------------------|
 31. | Ada                     36 |
 32. | Visual Basic .NET       36 |
 33. | Delphi                  35 |
 34. | J                       35 |
 35. | APL                     34 |
     |----------------------------|
 36. | Brainfuck               34 |
 37. | Tcl                     34 |
 38. | Objective-C             33 |
 39. | Smalltalk               33 |
 40. | COBOL                   32 |
     |----------------------------|
 41. | R                       32 |
 42. | Go                      30 |
 43. | Mathematica             30 |
 44. | Perl 6                  29 |
 45. | Clojure                 27 |
     |----------------------------|
 46. | AutoHotkey              25 |
 47. | REXX                    25 |
 48. | LaTeX                   23 |
 49. | OCaml                   23 |
 50. | Sed                     23 |
     +----------------------------+

zkl

Uses libraries cURL and YAJL (yet another json library)

const MIN_USERS=60;
var [const] CURL=Import("zklCurl"), YAJL=Import("zklYAJL")[0];

fcn rsGet{
   continueValue,r,curl := "",List, CURL();
   do{	// eg 5 times
      page:=("http://rosettacode.org/mw/api.php?action=query"
        "&generator=categorymembers&prop=categoryinfo"
	"&gcmtitle=Category%%3ALanguage%%20users"
	"&rawcontinue=&format=json&gcmlimit=350"
	"%s").fmt(continueValue);
      page=curl.get(page);
      page=page[0].del(0,page[1]);  // get rid of HTML header
      json:=YAJL().write(page).close();
      json["query"]["pages"].pump(r.append,'wrap(x){ x=x[1];
         //("2708",Dictionary(title:Category:C User,...,categoryinfo:D(pages:373,size:373,...)))
	 // or title:SmartBASIC
	 if((pgs:=x.find("categoryinfo")) and (pgs=pgs.find("pages")) and
	    pgs>=MIN_USERS)
	   return(pgs,x["title"].replace("Category:","").replace(" User",""));
	   return(Void.Skip);
      });
      if(continueValue=json.find("query-continue",""))
        continueValue=String("&gcmcontinue=",
	   continueValue["categorymembers"]["gcmcontinue"]);
   }while(continueValue);
   r
}

allLangs:=rsGet();
allLangs=allLangs.sort(fcn(a,b){ a[0]>b[0] });
println("
### ====
 ",Time.Date.prettyDay(),"
### ====
");
foreach n,pgnm in ([1..].zip(allLangs))
   { println("#%3d with %4s users: %s".fmt(n,pgnm.xplode())) }

{{out}}



### ====
 Wednesday, the 20th of December 2017
### ====

#  1 with  373 users: C
#  2 with  261 users: C++
#  3 with  257 users: Java
#  4 with  243 users: Python
#  5 with  228 users: JavaScript
#  6 with  163 users: PHP
#  7 with  162 users: Perl
#  8 with  131 users: SQL
#  9 with  120 users: UNIX Shell
# 10 with  118 users: BASIC
# 11 with  113 users: C sharp
# 12 with  109 users: Pascal
# 13 with   98 users: Haskell
# 14 with   91 users: Ruby
# 15 with   71 users: Fortran
# 16 with   65 users: Visual Basic
# 17 with   60 users: Scheme